1e1f7c9dddfa88c40e03364af34436215243e9d6
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
76
77 enum btrfs_check_mode {
78         CHECK_MODE_ORIGINAL,
79         CHECK_MODE_LOWMEM,
80         CHECK_MODE_UNKNOWN,
81         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
82 };
83
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
85
86 struct extent_backref {
87         struct rb_node node;
88         unsigned int is_data:1;
89         unsigned int found_extent_tree:1;
90         unsigned int full_backref:1;
91         unsigned int found_ref:1;
92         unsigned int broken:1;
93 };
94
95 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
96 {
97         return rb_entry(node, struct extent_backref, node);
98 }
99
100 struct data_backref {
101         struct extent_backref node;
102         union {
103                 u64 parent;
104                 u64 root;
105         };
106         u64 owner;
107         u64 offset;
108         u64 disk_bytenr;
109         u64 bytes;
110         u64 ram_bytes;
111         u32 num_refs;
112         u32 found_ref;
113 };
114
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
116 {
117         return container_of(back, struct data_backref, node);
118 }
119
120 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
121 {
122         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
123         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
124         struct data_backref *back1 = to_data_backref(ext1);
125         struct data_backref *back2 = to_data_backref(ext2);
126
127         WARN_ON(!ext1->is_data);
128         WARN_ON(!ext2->is_data);
129
130         /* parent and root are a union, so this covers both */
131         if (back1->parent > back2->parent)
132                 return 1;
133         if (back1->parent < back2->parent)
134                 return -1;
135
136         /* This is a full backref and the parents match. */
137         if (back1->node.full_backref)
138                 return 0;
139
140         if (back1->owner > back2->owner)
141                 return 1;
142         if (back1->owner < back2->owner)
143                 return -1;
144
145         if (back1->offset > back2->offset)
146                 return 1;
147         if (back1->offset < back2->offset)
148                 return -1;
149
150         if (back1->bytes > back2->bytes)
151                 return 1;
152         if (back1->bytes < back2->bytes)
153                 return -1;
154
155         if (back1->found_ref && back2->found_ref) {
156                 if (back1->disk_bytenr > back2->disk_bytenr)
157                         return 1;
158                 if (back1->disk_bytenr < back2->disk_bytenr)
159                         return -1;
160
161                 if (back1->found_ref > back2->found_ref)
162                         return 1;
163                 if (back1->found_ref < back2->found_ref)
164                         return -1;
165         }
166
167         return 0;
168 }
169
170 /*
171  * Much like data_backref, just removed the undetermined members
172  * and change it to use list_head.
173  * During extent scan, it is stored in root->orphan_data_extent.
174  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
175  */
176 struct orphan_data_extent {
177         struct list_head list;
178         u64 root;
179         u64 objectid;
180         u64 offset;
181         u64 disk_bytenr;
182         u64 disk_len;
183 };
184
185 struct tree_backref {
186         struct extent_backref node;
187         union {
188                 u64 parent;
189                 u64 root;
190         };
191 };
192
193 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
194 {
195         return container_of(back, struct tree_backref, node);
196 }
197
198 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
199 {
200         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
201         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
202         struct tree_backref *back1 = to_tree_backref(ext1);
203         struct tree_backref *back2 = to_tree_backref(ext2);
204
205         WARN_ON(ext1->is_data);
206         WARN_ON(ext2->is_data);
207
208         /* parent and root are a union, so this covers both */
209         if (back1->parent > back2->parent)
210                 return 1;
211         if (back1->parent < back2->parent)
212                 return -1;
213
214         return 0;
215 }
216
217 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
218 {
219         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
220         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
221
222         if (ext1->is_data > ext2->is_data)
223                 return 1;
224
225         if (ext1->is_data < ext2->is_data)
226                 return -1;
227
228         if (ext1->full_backref > ext2->full_backref)
229                 return 1;
230         if (ext1->full_backref < ext2->full_backref)
231                 return -1;
232
233         if (ext1->is_data)
234                 return compare_data_backref(node1, node2);
235         else
236                 return compare_tree_backref(node1, node2);
237 }
238
239 /* Explicit initialization for extent_record::flag_block_full_backref */
240 enum { FLAG_UNSET = 2 };
241
242 struct extent_record {
243         struct list_head backrefs;
244         struct list_head dups;
245         struct rb_root backref_tree;
246         struct list_head list;
247         struct cache_extent cache;
248         struct btrfs_disk_key parent_key;
249         u64 start;
250         u64 max_size;
251         u64 nr;
252         u64 refs;
253         u64 extent_item_refs;
254         u64 generation;
255         u64 parent_generation;
256         u64 info_objectid;
257         u32 num_duplicates;
258         u8 info_level;
259         unsigned int flag_block_full_backref:2;
260         unsigned int found_rec:1;
261         unsigned int content_checked:1;
262         unsigned int owner_ref_checked:1;
263         unsigned int is_root:1;
264         unsigned int metadata:1;
265         unsigned int bad_full_backref:1;
266         unsigned int crossing_stripes:1;
267         unsigned int wrong_chunk_type:1;
268 };
269
270 static inline struct extent_record* to_extent_record(struct list_head *entry)
271 {
272         return container_of(entry, struct extent_record, list);
273 }
274
275 struct inode_backref {
276         struct list_head list;
277         unsigned int found_dir_item:1;
278         unsigned int found_dir_index:1;
279         unsigned int found_inode_ref:1;
280         unsigned int filetype:8;
281         int errors;
282         unsigned int ref_type;
283         u64 dir;
284         u64 index;
285         u16 namelen;
286         char name[0];
287 };
288
289 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
290 {
291         return list_entry(entry, struct inode_backref, list);
292 }
293
294 struct root_item_record {
295         struct list_head list;
296         u64 objectid;
297         u64 bytenr;
298         u64 last_snapshot;
299         u8 level;
300         u8 drop_level;
301         int level_size;
302         struct btrfs_key drop_key;
303 };
304
305 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
306 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
307 #define REF_ERR_NO_INODE_REF            (1 << 2)
308 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
309 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
310 #define REF_ERR_DUP_INODE_REF           (1 << 5)
311 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
312 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
313 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
314 #define REF_ERR_NO_ROOT_REF             (1 << 9)
315 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
316 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
317 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
318
319 struct file_extent_hole {
320         struct rb_node node;
321         u64 start;
322         u64 len;
323 };
324
325 struct inode_record {
326         struct list_head backrefs;
327         unsigned int checked:1;
328         unsigned int merging:1;
329         unsigned int found_inode_item:1;
330         unsigned int found_dir_item:1;
331         unsigned int found_file_extent:1;
332         unsigned int found_csum_item:1;
333         unsigned int some_csum_missing:1;
334         unsigned int nodatasum:1;
335         int errors;
336
337         u64 ino;
338         u32 nlink;
339         u32 imode;
340         u64 isize;
341         u64 nbytes;
342
343         u32 found_link;
344         u64 found_size;
345         u64 extent_start;
346         u64 extent_end;
347         struct rb_root holes;
348         struct list_head orphan_extents;
349
350         u32 refs;
351 };
352
353 #define I_ERR_NO_INODE_ITEM             (1 << 0)
354 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
355 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
356 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
357 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
358 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
359 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
360 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
361 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
362 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
363 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
364 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
365 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
366 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
367 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
368
369 struct root_backref {
370         struct list_head list;
371         unsigned int found_dir_item:1;
372         unsigned int found_dir_index:1;
373         unsigned int found_back_ref:1;
374         unsigned int found_forward_ref:1;
375         unsigned int reachable:1;
376         int errors;
377         u64 ref_root;
378         u64 dir;
379         u64 index;
380         u16 namelen;
381         char name[0];
382 };
383
384 static inline struct root_backref* to_root_backref(struct list_head *entry)
385 {
386         return list_entry(entry, struct root_backref, list);
387 }
388
389 struct root_record {
390         struct list_head backrefs;
391         struct cache_extent cache;
392         unsigned int found_root_item:1;
393         u64 objectid;
394         u32 found_ref;
395 };
396
397 struct ptr_node {
398         struct cache_extent cache;
399         void *data;
400 };
401
402 struct shared_node {
403         struct cache_extent cache;
404         struct cache_tree root_cache;
405         struct cache_tree inode_cache;
406         struct inode_record *current;
407         u32 refs;
408 };
409
410 struct block_info {
411         u64 start;
412         u32 size;
413 };
414
415 struct walk_control {
416         struct cache_tree shared;
417         struct shared_node *nodes[BTRFS_MAX_LEVEL];
418         int active_node;
419         int root_level;
420 };
421
422 struct bad_item {
423         struct btrfs_key key;
424         u64 root_id;
425         struct list_head list;
426 };
427
428 struct extent_entry {
429         u64 bytenr;
430         u64 bytes;
431         int count;
432         int broken;
433         struct list_head list;
434 };
435
436 struct root_item_info {
437         /* level of the root */
438         u8 level;
439         /* number of nodes at this level, must be 1 for a root */
440         int node_count;
441         u64 bytenr;
442         u64 gen;
443         struct cache_extent cache_extent;
444 };
445
446 /*
447  * Error bit for low memory mode check.
448  *
449  * Currently no caller cares about it yet.  Just internal use for error
450  * classification.
451  */
452 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
453 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
454 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
455 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
456 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
457 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
458 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
459 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
460 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
461 #define CHUNK_TYPE_MISMATCH     (1 << 8)
462
463 static void *print_status_check(void *p)
464 {
465         struct task_ctx *priv = p;
466         const char work_indicator[] = { '.', 'o', 'O', 'o' };
467         uint32_t count = 0;
468         static char *task_position_string[] = {
469                 "checking extents",
470                 "checking free space cache",
471                 "checking fs roots",
472         };
473
474         task_period_start(priv->info, 1000 /* 1s */);
475
476         if (priv->tp == TASK_NOTHING)
477                 return NULL;
478
479         while (1) {
480                 printf("%s [%c]\r", task_position_string[priv->tp],
481                                 work_indicator[count % 4]);
482                 count++;
483                 fflush(stdout);
484                 task_period_wait(priv->info);
485         }
486         return NULL;
487 }
488
489 static int print_status_return(void *p)
490 {
491         printf("\n");
492         fflush(stdout);
493
494         return 0;
495 }
496
497 static enum btrfs_check_mode parse_check_mode(const char *str)
498 {
499         if (strcmp(str, "lowmem") == 0)
500                 return CHECK_MODE_LOWMEM;
501         if (strcmp(str, "orig") == 0)
502                 return CHECK_MODE_ORIGINAL;
503         if (strcmp(str, "original") == 0)
504                 return CHECK_MODE_ORIGINAL;
505
506         return CHECK_MODE_UNKNOWN;
507 }
508
509 /* Compatible function to allow reuse of old codes */
510 static u64 first_extent_gap(struct rb_root *holes)
511 {
512         struct file_extent_hole *hole;
513
514         if (RB_EMPTY_ROOT(holes))
515                 return (u64)-1;
516
517         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
518         return hole->start;
519 }
520
521 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
522 {
523         struct file_extent_hole *hole1;
524         struct file_extent_hole *hole2;
525
526         hole1 = rb_entry(node1, struct file_extent_hole, node);
527         hole2 = rb_entry(node2, struct file_extent_hole, node);
528
529         if (hole1->start > hole2->start)
530                 return -1;
531         if (hole1->start < hole2->start)
532                 return 1;
533         /* Now hole1->start == hole2->start */
534         if (hole1->len >= hole2->len)
535                 /*
536                  * Hole 1 will be merge center
537                  * Same hole will be merged later
538                  */
539                 return -1;
540         /* Hole 2 will be merge center */
541         return 1;
542 }
543
544 /*
545  * Add a hole to the record
546  *
547  * This will do hole merge for copy_file_extent_holes(),
548  * which will ensure there won't be continuous holes.
549  */
550 static int add_file_extent_hole(struct rb_root *holes,
551                                 u64 start, u64 len)
552 {
553         struct file_extent_hole *hole;
554         struct file_extent_hole *prev = NULL;
555         struct file_extent_hole *next = NULL;
556
557         hole = malloc(sizeof(*hole));
558         if (!hole)
559                 return -ENOMEM;
560         hole->start = start;
561         hole->len = len;
562         /* Since compare will not return 0, no -EEXIST will happen */
563         rb_insert(holes, &hole->node, compare_hole);
564
565         /* simple merge with previous hole */
566         if (rb_prev(&hole->node))
567                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
568                                 node);
569         if (prev && prev->start + prev->len >= hole->start) {
570                 hole->len = hole->start + hole->len - prev->start;
571                 hole->start = prev->start;
572                 rb_erase(&prev->node, holes);
573                 free(prev);
574                 prev = NULL;
575         }
576
577         /* iterate merge with next holes */
578         while (1) {
579                 if (!rb_next(&hole->node))
580                         break;
581                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
582                                         node);
583                 if (hole->start + hole->len >= next->start) {
584                         if (hole->start + hole->len <= next->start + next->len)
585                                 hole->len = next->start + next->len -
586                                             hole->start;
587                         rb_erase(&next->node, holes);
588                         free(next);
589                         next = NULL;
590                 } else
591                         break;
592         }
593         return 0;
594 }
595
596 static int compare_hole_range(struct rb_node *node, void *data)
597 {
598         struct file_extent_hole *hole;
599         u64 start;
600
601         hole = (struct file_extent_hole *)data;
602         start = hole->start;
603
604         hole = rb_entry(node, struct file_extent_hole, node);
605         if (start < hole->start)
606                 return -1;
607         if (start >= hole->start && start < hole->start + hole->len)
608                 return 0;
609         return 1;
610 }
611
612 /*
613  * Delete a hole in the record
614  *
615  * This will do the hole split and is much restrict than add.
616  */
617 static int del_file_extent_hole(struct rb_root *holes,
618                                 u64 start, u64 len)
619 {
620         struct file_extent_hole *hole;
621         struct file_extent_hole tmp;
622         u64 prev_start = 0;
623         u64 prev_len = 0;
624         u64 next_start = 0;
625         u64 next_len = 0;
626         struct rb_node *node;
627         int have_prev = 0;
628         int have_next = 0;
629         int ret = 0;
630
631         tmp.start = start;
632         tmp.len = len;
633         node = rb_search(holes, &tmp, compare_hole_range, NULL);
634         if (!node)
635                 return -EEXIST;
636         hole = rb_entry(node, struct file_extent_hole, node);
637         if (start + len > hole->start + hole->len)
638                 return -EEXIST;
639
640         /*
641          * Now there will be no overlap, delete the hole and re-add the
642          * split(s) if they exists.
643          */
644         if (start > hole->start) {
645                 prev_start = hole->start;
646                 prev_len = start - hole->start;
647                 have_prev = 1;
648         }
649         if (hole->start + hole->len > start + len) {
650                 next_start = start + len;
651                 next_len = hole->start + hole->len - start - len;
652                 have_next = 1;
653         }
654         rb_erase(node, holes);
655         free(hole);
656         if (have_prev) {
657                 ret = add_file_extent_hole(holes, prev_start, prev_len);
658                 if (ret < 0)
659                         return ret;
660         }
661         if (have_next) {
662                 ret = add_file_extent_hole(holes, next_start, next_len);
663                 if (ret < 0)
664                         return ret;
665         }
666         return 0;
667 }
668
669 static int copy_file_extent_holes(struct rb_root *dst,
670                                   struct rb_root *src)
671 {
672         struct file_extent_hole *hole;
673         struct rb_node *node;
674         int ret = 0;
675
676         node = rb_first(src);
677         while (node) {
678                 hole = rb_entry(node, struct file_extent_hole, node);
679                 ret = add_file_extent_hole(dst, hole->start, hole->len);
680                 if (ret)
681                         break;
682                 node = rb_next(node);
683         }
684         return ret;
685 }
686
687 static void free_file_extent_holes(struct rb_root *holes)
688 {
689         struct rb_node *node;
690         struct file_extent_hole *hole;
691
692         node = rb_first(holes);
693         while (node) {
694                 hole = rb_entry(node, struct file_extent_hole, node);
695                 rb_erase(node, holes);
696                 free(hole);
697                 node = rb_first(holes);
698         }
699 }
700
701 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
702
703 static void record_root_in_trans(struct btrfs_trans_handle *trans,
704                                  struct btrfs_root *root)
705 {
706         if (root->last_trans != trans->transid) {
707                 root->track_dirty = 1;
708                 root->last_trans = trans->transid;
709                 root->commit_root = root->node;
710                 extent_buffer_get(root->node);
711         }
712 }
713
714 static u8 imode_to_type(u32 imode)
715 {
716 #define S_SHIFT 12
717         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
718                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
719                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
720                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
721                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
722                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
723                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
724                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
725         };
726
727         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
728 #undef S_SHIFT
729 }
730
731 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
732 {
733         struct device_record *rec1;
734         struct device_record *rec2;
735
736         rec1 = rb_entry(node1, struct device_record, node);
737         rec2 = rb_entry(node2, struct device_record, node);
738         if (rec1->devid > rec2->devid)
739                 return -1;
740         else if (rec1->devid < rec2->devid)
741                 return 1;
742         else
743                 return 0;
744 }
745
746 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
747 {
748         struct inode_record *rec;
749         struct inode_backref *backref;
750         struct inode_backref *orig;
751         struct inode_backref *tmp;
752         struct orphan_data_extent *src_orphan;
753         struct orphan_data_extent *dst_orphan;
754         size_t size;
755         int ret;
756
757         rec = malloc(sizeof(*rec));
758         if (!rec)
759                 return ERR_PTR(-ENOMEM);
760         memcpy(rec, orig_rec, sizeof(*rec));
761         rec->refs = 1;
762         INIT_LIST_HEAD(&rec->backrefs);
763         INIT_LIST_HEAD(&rec->orphan_extents);
764         rec->holes = RB_ROOT;
765
766         list_for_each_entry(orig, &orig_rec->backrefs, list) {
767                 size = sizeof(*orig) + orig->namelen + 1;
768                 backref = malloc(size);
769                 if (!backref) {
770                         ret = -ENOMEM;
771                         goto cleanup;
772                 }
773                 memcpy(backref, orig, size);
774                 list_add_tail(&backref->list, &rec->backrefs);
775         }
776         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
777                 dst_orphan = malloc(sizeof(*dst_orphan));
778                 if (!dst_orphan) {
779                         ret = -ENOMEM;
780                         goto cleanup;
781                 }
782                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
783                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
784         }
785         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
786         BUG_ON(ret < 0);
787
788         return rec;
789
790 cleanup:
791         if (!list_empty(&rec->backrefs))
792                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
793                         list_del(&orig->list);
794                         free(orig);
795                 }
796
797         if (!list_empty(&rec->orphan_extents))
798                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
799                         list_del(&orig->list);
800                         free(orig);
801                 }
802
803         free(rec);
804
805         return ERR_PTR(ret);
806 }
807
808 static void print_orphan_data_extents(struct list_head *orphan_extents,
809                                       u64 objectid)
810 {
811         struct orphan_data_extent *orphan;
812
813         if (list_empty(orphan_extents))
814                 return;
815         printf("The following data extent is lost in tree %llu:\n",
816                objectid);
817         list_for_each_entry(orphan, orphan_extents, list) {
818                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
819                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
820                        orphan->disk_len);
821         }
822 }
823
824 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
825 {
826         u64 root_objectid = root->root_key.objectid;
827         int errors = rec->errors;
828
829         if (!errors)
830                 return;
831         /* reloc root errors, we print its corresponding fs root objectid*/
832         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
833                 root_objectid = root->root_key.offset;
834                 fprintf(stderr, "reloc");
835         }
836         fprintf(stderr, "root %llu inode %llu errors %x",
837                 (unsigned long long) root_objectid,
838                 (unsigned long long) rec->ino, rec->errors);
839
840         if (errors & I_ERR_NO_INODE_ITEM)
841                 fprintf(stderr, ", no inode item");
842         if (errors & I_ERR_NO_ORPHAN_ITEM)
843                 fprintf(stderr, ", no orphan item");
844         if (errors & I_ERR_DUP_INODE_ITEM)
845                 fprintf(stderr, ", dup inode item");
846         if (errors & I_ERR_DUP_DIR_INDEX)
847                 fprintf(stderr, ", dup dir index");
848         if (errors & I_ERR_ODD_DIR_ITEM)
849                 fprintf(stderr, ", odd dir item");
850         if (errors & I_ERR_ODD_FILE_EXTENT)
851                 fprintf(stderr, ", odd file extent");
852         if (errors & I_ERR_BAD_FILE_EXTENT)
853                 fprintf(stderr, ", bad file extent");
854         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
855                 fprintf(stderr, ", file extent overlap");
856         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
857                 fprintf(stderr, ", file extent discount");
858         if (errors & I_ERR_DIR_ISIZE_WRONG)
859                 fprintf(stderr, ", dir isize wrong");
860         if (errors & I_ERR_FILE_NBYTES_WRONG)
861                 fprintf(stderr, ", nbytes wrong");
862         if (errors & I_ERR_ODD_CSUM_ITEM)
863                 fprintf(stderr, ", odd csum item");
864         if (errors & I_ERR_SOME_CSUM_MISSING)
865                 fprintf(stderr, ", some csum missing");
866         if (errors & I_ERR_LINK_COUNT_WRONG)
867                 fprintf(stderr, ", link count wrong");
868         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
869                 fprintf(stderr, ", orphan file extent");
870         fprintf(stderr, "\n");
871         /* Print the orphan extents if needed */
872         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
873                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
874
875         /* Print the holes if needed */
876         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
877                 struct file_extent_hole *hole;
878                 struct rb_node *node;
879                 int found = 0;
880
881                 node = rb_first(&rec->holes);
882                 fprintf(stderr, "Found file extent holes:\n");
883                 while (node) {
884                         found = 1;
885                         hole = rb_entry(node, struct file_extent_hole, node);
886                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
887                                 hole->start, hole->len);
888                         node = rb_next(node);
889                 }
890                 if (!found)
891                         fprintf(stderr, "\tstart: 0, len: %llu\n",
892                                 round_up(rec->isize, root->sectorsize));
893         }
894 }
895
896 static void print_ref_error(int errors)
897 {
898         if (errors & REF_ERR_NO_DIR_ITEM)
899                 fprintf(stderr, ", no dir item");
900         if (errors & REF_ERR_NO_DIR_INDEX)
901                 fprintf(stderr, ", no dir index");
902         if (errors & REF_ERR_NO_INODE_REF)
903                 fprintf(stderr, ", no inode ref");
904         if (errors & REF_ERR_DUP_DIR_ITEM)
905                 fprintf(stderr, ", dup dir item");
906         if (errors & REF_ERR_DUP_DIR_INDEX)
907                 fprintf(stderr, ", dup dir index");
908         if (errors & REF_ERR_DUP_INODE_REF)
909                 fprintf(stderr, ", dup inode ref");
910         if (errors & REF_ERR_INDEX_UNMATCH)
911                 fprintf(stderr, ", index mismatch");
912         if (errors & REF_ERR_FILETYPE_UNMATCH)
913                 fprintf(stderr, ", filetype mismatch");
914         if (errors & REF_ERR_NAME_TOO_LONG)
915                 fprintf(stderr, ", name too long");
916         if (errors & REF_ERR_NO_ROOT_REF)
917                 fprintf(stderr, ", no root ref");
918         if (errors & REF_ERR_NO_ROOT_BACKREF)
919                 fprintf(stderr, ", no root backref");
920         if (errors & REF_ERR_DUP_ROOT_REF)
921                 fprintf(stderr, ", dup root ref");
922         if (errors & REF_ERR_DUP_ROOT_BACKREF)
923                 fprintf(stderr, ", dup root backref");
924         fprintf(stderr, "\n");
925 }
926
927 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
928                                           u64 ino, int mod)
929 {
930         struct ptr_node *node;
931         struct cache_extent *cache;
932         struct inode_record *rec = NULL;
933         int ret;
934
935         cache = lookup_cache_extent(inode_cache, ino, 1);
936         if (cache) {
937                 node = container_of(cache, struct ptr_node, cache);
938                 rec = node->data;
939                 if (mod && rec->refs > 1) {
940                         node->data = clone_inode_rec(rec);
941                         if (IS_ERR(node->data))
942                                 return node->data;
943                         rec->refs--;
944                         rec = node->data;
945                 }
946         } else if (mod) {
947                 rec = calloc(1, sizeof(*rec));
948                 if (!rec)
949                         return ERR_PTR(-ENOMEM);
950                 rec->ino = ino;
951                 rec->extent_start = (u64)-1;
952                 rec->refs = 1;
953                 INIT_LIST_HEAD(&rec->backrefs);
954                 INIT_LIST_HEAD(&rec->orphan_extents);
955                 rec->holes = RB_ROOT;
956
957                 node = malloc(sizeof(*node));
958                 if (!node) {
959                         free(rec);
960                         return ERR_PTR(-ENOMEM);
961                 }
962                 node->cache.start = ino;
963                 node->cache.size = 1;
964                 node->data = rec;
965
966                 if (ino == BTRFS_FREE_INO_OBJECTID)
967                         rec->found_link = 1;
968
969                 ret = insert_cache_extent(inode_cache, &node->cache);
970                 if (ret)
971                         return ERR_PTR(-EEXIST);
972         }
973         return rec;
974 }
975
976 static void free_orphan_data_extents(struct list_head *orphan_extents)
977 {
978         struct orphan_data_extent *orphan;
979
980         while (!list_empty(orphan_extents)) {
981                 orphan = list_entry(orphan_extents->next,
982                                     struct orphan_data_extent, list);
983                 list_del(&orphan->list);
984                 free(orphan);
985         }
986 }
987
988 static void free_inode_rec(struct inode_record *rec)
989 {
990         struct inode_backref *backref;
991
992         if (--rec->refs > 0)
993                 return;
994
995         while (!list_empty(&rec->backrefs)) {
996                 backref = to_inode_backref(rec->backrefs.next);
997                 list_del(&backref->list);
998                 free(backref);
999         }
1000         free_orphan_data_extents(&rec->orphan_extents);
1001         free_file_extent_holes(&rec->holes);
1002         free(rec);
1003 }
1004
1005 static int can_free_inode_rec(struct inode_record *rec)
1006 {
1007         if (!rec->errors && rec->checked && rec->found_inode_item &&
1008             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1009                 return 1;
1010         return 0;
1011 }
1012
1013 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1014                                  struct inode_record *rec)
1015 {
1016         struct cache_extent *cache;
1017         struct inode_backref *tmp, *backref;
1018         struct ptr_node *node;
1019         unsigned char filetype;
1020
1021         if (!rec->found_inode_item)
1022                 return;
1023
1024         filetype = imode_to_type(rec->imode);
1025         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1026                 if (backref->found_dir_item && backref->found_dir_index) {
1027                         if (backref->filetype != filetype)
1028                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1029                         if (!backref->errors && backref->found_inode_ref &&
1030                             rec->nlink == rec->found_link) {
1031                                 list_del(&backref->list);
1032                                 free(backref);
1033                         }
1034                 }
1035         }
1036
1037         if (!rec->checked || rec->merging)
1038                 return;
1039
1040         if (S_ISDIR(rec->imode)) {
1041                 if (rec->found_size != rec->isize)
1042                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1043                 if (rec->found_file_extent)
1044                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1045         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1046                 if (rec->found_dir_item)
1047                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1048                 if (rec->found_size != rec->nbytes)
1049                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1050                 if (rec->nlink > 0 && !no_holes &&
1051                     (rec->extent_end < rec->isize ||
1052                      first_extent_gap(&rec->holes) < rec->isize))
1053                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1054         }
1055
1056         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1057                 if (rec->found_csum_item && rec->nodatasum)
1058                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1059                 if (rec->some_csum_missing && !rec->nodatasum)
1060                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1061         }
1062
1063         BUG_ON(rec->refs != 1);
1064         if (can_free_inode_rec(rec)) {
1065                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1066                 node = container_of(cache, struct ptr_node, cache);
1067                 BUG_ON(node->data != rec);
1068                 remove_cache_extent(inode_cache, &node->cache);
1069                 free(node);
1070                 free_inode_rec(rec);
1071         }
1072 }
1073
1074 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1075 {
1076         struct btrfs_path path;
1077         struct btrfs_key key;
1078         int ret;
1079
1080         key.objectid = BTRFS_ORPHAN_OBJECTID;
1081         key.type = BTRFS_ORPHAN_ITEM_KEY;
1082         key.offset = ino;
1083
1084         btrfs_init_path(&path);
1085         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1086         btrfs_release_path(&path);
1087         if (ret > 0)
1088                 ret = -ENOENT;
1089         return ret;
1090 }
1091
1092 static int process_inode_item(struct extent_buffer *eb,
1093                               int slot, struct btrfs_key *key,
1094                               struct shared_node *active_node)
1095 {
1096         struct inode_record *rec;
1097         struct btrfs_inode_item *item;
1098
1099         rec = active_node->current;
1100         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1101         if (rec->found_inode_item) {
1102                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1103                 return 1;
1104         }
1105         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1106         rec->nlink = btrfs_inode_nlink(eb, item);
1107         rec->isize = btrfs_inode_size(eb, item);
1108         rec->nbytes = btrfs_inode_nbytes(eb, item);
1109         rec->imode = btrfs_inode_mode(eb, item);
1110         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1111                 rec->nodatasum = 1;
1112         rec->found_inode_item = 1;
1113         if (rec->nlink == 0)
1114                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1115         maybe_free_inode_rec(&active_node->inode_cache, rec);
1116         return 0;
1117 }
1118
1119 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1120                                                 const char *name,
1121                                                 int namelen, u64 dir)
1122 {
1123         struct inode_backref *backref;
1124
1125         list_for_each_entry(backref, &rec->backrefs, list) {
1126                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1127                         break;
1128                 if (backref->dir != dir || backref->namelen != namelen)
1129                         continue;
1130                 if (memcmp(name, backref->name, namelen))
1131                         continue;
1132                 return backref;
1133         }
1134
1135         backref = malloc(sizeof(*backref) + namelen + 1);
1136         if (!backref)
1137                 return NULL;
1138         memset(backref, 0, sizeof(*backref));
1139         backref->dir = dir;
1140         backref->namelen = namelen;
1141         memcpy(backref->name, name, namelen);
1142         backref->name[namelen] = '\0';
1143         list_add_tail(&backref->list, &rec->backrefs);
1144         return backref;
1145 }
1146
1147 static int add_inode_backref(struct cache_tree *inode_cache,
1148                              u64 ino, u64 dir, u64 index,
1149                              const char *name, int namelen,
1150                              int filetype, int itemtype, int errors)
1151 {
1152         struct inode_record *rec;
1153         struct inode_backref *backref;
1154
1155         rec = get_inode_rec(inode_cache, ino, 1);
1156         BUG_ON(IS_ERR(rec));
1157         backref = get_inode_backref(rec, name, namelen, dir);
1158         BUG_ON(!backref);
1159         if (errors)
1160                 backref->errors |= errors;
1161         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1162                 if (backref->found_dir_index)
1163                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1164                 if (backref->found_inode_ref && backref->index != index)
1165                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1166                 if (backref->found_dir_item && backref->filetype != filetype)
1167                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1168
1169                 backref->index = index;
1170                 backref->filetype = filetype;
1171                 backref->found_dir_index = 1;
1172         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1173                 rec->found_link++;
1174                 if (backref->found_dir_item)
1175                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1176                 if (backref->found_dir_index && backref->filetype != filetype)
1177                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1178
1179                 backref->filetype = filetype;
1180                 backref->found_dir_item = 1;
1181         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1182                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1183                 if (backref->found_inode_ref)
1184                         backref->errors |= REF_ERR_DUP_INODE_REF;
1185                 if (backref->found_dir_index && backref->index != index)
1186                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1187                 else
1188                         backref->index = index;
1189
1190                 backref->ref_type = itemtype;
1191                 backref->found_inode_ref = 1;
1192         } else {
1193                 BUG_ON(1);
1194         }
1195
1196         maybe_free_inode_rec(inode_cache, rec);
1197         return 0;
1198 }
1199
1200 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1201                             struct cache_tree *dst_cache)
1202 {
1203         struct inode_backref *backref;
1204         u32 dir_count = 0;
1205         int ret = 0;
1206
1207         dst->merging = 1;
1208         list_for_each_entry(backref, &src->backrefs, list) {
1209                 if (backref->found_dir_index) {
1210                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1211                                         backref->index, backref->name,
1212                                         backref->namelen, backref->filetype,
1213                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1214                 }
1215                 if (backref->found_dir_item) {
1216                         dir_count++;
1217                         add_inode_backref(dst_cache, dst->ino,
1218                                         backref->dir, 0, backref->name,
1219                                         backref->namelen, backref->filetype,
1220                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1221                 }
1222                 if (backref->found_inode_ref) {
1223                         add_inode_backref(dst_cache, dst->ino,
1224                                         backref->dir, backref->index,
1225                                         backref->name, backref->namelen, 0,
1226                                         backref->ref_type, backref->errors);
1227                 }
1228         }
1229
1230         if (src->found_dir_item)
1231                 dst->found_dir_item = 1;
1232         if (src->found_file_extent)
1233                 dst->found_file_extent = 1;
1234         if (src->found_csum_item)
1235                 dst->found_csum_item = 1;
1236         if (src->some_csum_missing)
1237                 dst->some_csum_missing = 1;
1238         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1239                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1240                 if (ret < 0)
1241                         return ret;
1242         }
1243
1244         BUG_ON(src->found_link < dir_count);
1245         dst->found_link += src->found_link - dir_count;
1246         dst->found_size += src->found_size;
1247         if (src->extent_start != (u64)-1) {
1248                 if (dst->extent_start == (u64)-1) {
1249                         dst->extent_start = src->extent_start;
1250                         dst->extent_end = src->extent_end;
1251                 } else {
1252                         if (dst->extent_end > src->extent_start)
1253                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1254                         else if (dst->extent_end < src->extent_start) {
1255                                 ret = add_file_extent_hole(&dst->holes,
1256                                         dst->extent_end,
1257                                         src->extent_start - dst->extent_end);
1258                         }
1259                         if (dst->extent_end < src->extent_end)
1260                                 dst->extent_end = src->extent_end;
1261                 }
1262         }
1263
1264         dst->errors |= src->errors;
1265         if (src->found_inode_item) {
1266                 if (!dst->found_inode_item) {
1267                         dst->nlink = src->nlink;
1268                         dst->isize = src->isize;
1269                         dst->nbytes = src->nbytes;
1270                         dst->imode = src->imode;
1271                         dst->nodatasum = src->nodatasum;
1272                         dst->found_inode_item = 1;
1273                 } else {
1274                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1275                 }
1276         }
1277         dst->merging = 0;
1278
1279         return 0;
1280 }
1281
1282 static int splice_shared_node(struct shared_node *src_node,
1283                               struct shared_node *dst_node)
1284 {
1285         struct cache_extent *cache;
1286         struct ptr_node *node, *ins;
1287         struct cache_tree *src, *dst;
1288         struct inode_record *rec, *conflict;
1289         u64 current_ino = 0;
1290         int splice = 0;
1291         int ret;
1292
1293         if (--src_node->refs == 0)
1294                 splice = 1;
1295         if (src_node->current)
1296                 current_ino = src_node->current->ino;
1297
1298         src = &src_node->root_cache;
1299         dst = &dst_node->root_cache;
1300 again:
1301         cache = search_cache_extent(src, 0);
1302         while (cache) {
1303                 node = container_of(cache, struct ptr_node, cache);
1304                 rec = node->data;
1305                 cache = next_cache_extent(cache);
1306
1307                 if (splice) {
1308                         remove_cache_extent(src, &node->cache);
1309                         ins = node;
1310                 } else {
1311                         ins = malloc(sizeof(*ins));
1312                         BUG_ON(!ins);
1313                         ins->cache.start = node->cache.start;
1314                         ins->cache.size = node->cache.size;
1315                         ins->data = rec;
1316                         rec->refs++;
1317                 }
1318                 ret = insert_cache_extent(dst, &ins->cache);
1319                 if (ret == -EEXIST) {
1320                         conflict = get_inode_rec(dst, rec->ino, 1);
1321                         BUG_ON(IS_ERR(conflict));
1322                         merge_inode_recs(rec, conflict, dst);
1323                         if (rec->checked) {
1324                                 conflict->checked = 1;
1325                                 if (dst_node->current == conflict)
1326                                         dst_node->current = NULL;
1327                         }
1328                         maybe_free_inode_rec(dst, conflict);
1329                         free_inode_rec(rec);
1330                         free(ins);
1331                 } else {
1332                         BUG_ON(ret);
1333                 }
1334         }
1335
1336         if (src == &src_node->root_cache) {
1337                 src = &src_node->inode_cache;
1338                 dst = &dst_node->inode_cache;
1339                 goto again;
1340         }
1341
1342         if (current_ino > 0 && (!dst_node->current ||
1343             current_ino > dst_node->current->ino)) {
1344                 if (dst_node->current) {
1345                         dst_node->current->checked = 1;
1346                         maybe_free_inode_rec(dst, dst_node->current);
1347                 }
1348                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1349                 BUG_ON(IS_ERR(dst_node->current));
1350         }
1351         return 0;
1352 }
1353
1354 static void free_inode_ptr(struct cache_extent *cache)
1355 {
1356         struct ptr_node *node;
1357         struct inode_record *rec;
1358
1359         node = container_of(cache, struct ptr_node, cache);
1360         rec = node->data;
1361         free_inode_rec(rec);
1362         free(node);
1363 }
1364
1365 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1366
1367 static struct shared_node *find_shared_node(struct cache_tree *shared,
1368                                             u64 bytenr)
1369 {
1370         struct cache_extent *cache;
1371         struct shared_node *node;
1372
1373         cache = lookup_cache_extent(shared, bytenr, 1);
1374         if (cache) {
1375                 node = container_of(cache, struct shared_node, cache);
1376                 return node;
1377         }
1378         return NULL;
1379 }
1380
1381 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1382 {
1383         int ret;
1384         struct shared_node *node;
1385
1386         node = calloc(1, sizeof(*node));
1387         if (!node)
1388                 return -ENOMEM;
1389         node->cache.start = bytenr;
1390         node->cache.size = 1;
1391         cache_tree_init(&node->root_cache);
1392         cache_tree_init(&node->inode_cache);
1393         node->refs = refs;
1394
1395         ret = insert_cache_extent(shared, &node->cache);
1396
1397         return ret;
1398 }
1399
1400 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1401                              struct walk_control *wc, int level)
1402 {
1403         struct shared_node *node;
1404         struct shared_node *dest;
1405         int ret;
1406
1407         if (level == wc->active_node)
1408                 return 0;
1409
1410         BUG_ON(wc->active_node <= level);
1411         node = find_shared_node(&wc->shared, bytenr);
1412         if (!node) {
1413                 ret = add_shared_node(&wc->shared, bytenr, refs);
1414                 BUG_ON(ret);
1415                 node = find_shared_node(&wc->shared, bytenr);
1416                 wc->nodes[level] = node;
1417                 wc->active_node = level;
1418                 return 0;
1419         }
1420
1421         if (wc->root_level == wc->active_node &&
1422             btrfs_root_refs(&root->root_item) == 0) {
1423                 if (--node->refs == 0) {
1424                         free_inode_recs_tree(&node->root_cache);
1425                         free_inode_recs_tree(&node->inode_cache);
1426                         remove_cache_extent(&wc->shared, &node->cache);
1427                         free(node);
1428                 }
1429                 return 1;
1430         }
1431
1432         dest = wc->nodes[wc->active_node];
1433         splice_shared_node(node, dest);
1434         if (node->refs == 0) {
1435                 remove_cache_extent(&wc->shared, &node->cache);
1436                 free(node);
1437         }
1438         return 1;
1439 }
1440
1441 static int leave_shared_node(struct btrfs_root *root,
1442                              struct walk_control *wc, int level)
1443 {
1444         struct shared_node *node;
1445         struct shared_node *dest;
1446         int i;
1447
1448         if (level == wc->root_level)
1449                 return 0;
1450
1451         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1452                 if (wc->nodes[i])
1453                         break;
1454         }
1455         BUG_ON(i >= BTRFS_MAX_LEVEL);
1456
1457         node = wc->nodes[wc->active_node];
1458         wc->nodes[wc->active_node] = NULL;
1459         wc->active_node = i;
1460
1461         dest = wc->nodes[wc->active_node];
1462         if (wc->active_node < wc->root_level ||
1463             btrfs_root_refs(&root->root_item) > 0) {
1464                 BUG_ON(node->refs <= 1);
1465                 splice_shared_node(node, dest);
1466         } else {
1467                 BUG_ON(node->refs < 2);
1468                 node->refs--;
1469         }
1470         return 0;
1471 }
1472
1473 /*
1474  * Returns:
1475  * < 0 - on error
1476  * 1   - if the root with id child_root_id is a child of root parent_root_id
1477  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1478  *       has other root(s) as parent(s)
1479  * 2   - if the root child_root_id doesn't have any parent roots
1480  */
1481 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1482                          u64 child_root_id)
1483 {
1484         struct btrfs_path path;
1485         struct btrfs_key key;
1486         struct extent_buffer *leaf;
1487         int has_parent = 0;
1488         int ret;
1489
1490         btrfs_init_path(&path);
1491
1492         key.objectid = parent_root_id;
1493         key.type = BTRFS_ROOT_REF_KEY;
1494         key.offset = child_root_id;
1495         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1496                                 0, 0);
1497         if (ret < 0)
1498                 return ret;
1499         btrfs_release_path(&path);
1500         if (!ret)
1501                 return 1;
1502
1503         key.objectid = child_root_id;
1504         key.type = BTRFS_ROOT_BACKREF_KEY;
1505         key.offset = 0;
1506         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1507                                 0, 0);
1508         if (ret < 0)
1509                 goto out;
1510
1511         while (1) {
1512                 leaf = path.nodes[0];
1513                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1514                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1515                         if (ret)
1516                                 break;
1517                         leaf = path.nodes[0];
1518                 }
1519
1520                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1521                 if (key.objectid != child_root_id ||
1522                     key.type != BTRFS_ROOT_BACKREF_KEY)
1523                         break;
1524
1525                 has_parent = 1;
1526
1527                 if (key.offset == parent_root_id) {
1528                         btrfs_release_path(&path);
1529                         return 1;
1530                 }
1531
1532                 path.slots[0]++;
1533         }
1534 out:
1535         btrfs_release_path(&path);
1536         if (ret < 0)
1537                 return ret;
1538         return has_parent ? 0 : 2;
1539 }
1540
1541 static int process_dir_item(struct btrfs_root *root,
1542                             struct extent_buffer *eb,
1543                             int slot, struct btrfs_key *key,
1544                             struct shared_node *active_node)
1545 {
1546         u32 total;
1547         u32 cur = 0;
1548         u32 len;
1549         u32 name_len;
1550         u32 data_len;
1551         int error;
1552         int nritems = 0;
1553         int filetype;
1554         struct btrfs_dir_item *di;
1555         struct inode_record *rec;
1556         struct cache_tree *root_cache;
1557         struct cache_tree *inode_cache;
1558         struct btrfs_key location;
1559         char namebuf[BTRFS_NAME_LEN];
1560
1561         root_cache = &active_node->root_cache;
1562         inode_cache = &active_node->inode_cache;
1563         rec = active_node->current;
1564         rec->found_dir_item = 1;
1565
1566         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1567         total = btrfs_item_size_nr(eb, slot);
1568         while (cur < total) {
1569                 nritems++;
1570                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1571                 name_len = btrfs_dir_name_len(eb, di);
1572                 data_len = btrfs_dir_data_len(eb, di);
1573                 filetype = btrfs_dir_type(eb, di);
1574
1575                 rec->found_size += name_len;
1576                 if (name_len <= BTRFS_NAME_LEN) {
1577                         len = name_len;
1578                         error = 0;
1579                 } else {
1580                         len = BTRFS_NAME_LEN;
1581                         error = REF_ERR_NAME_TOO_LONG;
1582                 }
1583                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1584
1585                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1586                         add_inode_backref(inode_cache, location.objectid,
1587                                           key->objectid, key->offset, namebuf,
1588                                           len, filetype, key->type, error);
1589                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1590                         add_inode_backref(root_cache, location.objectid,
1591                                           key->objectid, key->offset,
1592                                           namebuf, len, filetype,
1593                                           key->type, error);
1594                 } else {
1595                         fprintf(stderr, "invalid location in dir item %u\n",
1596                                 location.type);
1597                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1598                                           key->objectid, key->offset, namebuf,
1599                                           len, filetype, key->type, error);
1600                 }
1601
1602                 len = sizeof(*di) + name_len + data_len;
1603                 di = (struct btrfs_dir_item *)((char *)di + len);
1604                 cur += len;
1605         }
1606         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1607                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1608
1609         return 0;
1610 }
1611
1612 static int process_inode_ref(struct extent_buffer *eb,
1613                              int slot, struct btrfs_key *key,
1614                              struct shared_node *active_node)
1615 {
1616         u32 total;
1617         u32 cur = 0;
1618         u32 len;
1619         u32 name_len;
1620         u64 index;
1621         int error;
1622         struct cache_tree *inode_cache;
1623         struct btrfs_inode_ref *ref;
1624         char namebuf[BTRFS_NAME_LEN];
1625
1626         inode_cache = &active_node->inode_cache;
1627
1628         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1629         total = btrfs_item_size_nr(eb, slot);
1630         while (cur < total) {
1631                 name_len = btrfs_inode_ref_name_len(eb, ref);
1632                 index = btrfs_inode_ref_index(eb, ref);
1633                 if (name_len <= BTRFS_NAME_LEN) {
1634                         len = name_len;
1635                         error = 0;
1636                 } else {
1637                         len = BTRFS_NAME_LEN;
1638                         error = REF_ERR_NAME_TOO_LONG;
1639                 }
1640                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1641                 add_inode_backref(inode_cache, key->objectid, key->offset,
1642                                   index, namebuf, len, 0, key->type, error);
1643
1644                 len = sizeof(*ref) + name_len;
1645                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1646                 cur += len;
1647         }
1648         return 0;
1649 }
1650
1651 static int process_inode_extref(struct extent_buffer *eb,
1652                                 int slot, struct btrfs_key *key,
1653                                 struct shared_node *active_node)
1654 {
1655         u32 total;
1656         u32 cur = 0;
1657         u32 len;
1658         u32 name_len;
1659         u64 index;
1660         u64 parent;
1661         int error;
1662         struct cache_tree *inode_cache;
1663         struct btrfs_inode_extref *extref;
1664         char namebuf[BTRFS_NAME_LEN];
1665
1666         inode_cache = &active_node->inode_cache;
1667
1668         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1669         total = btrfs_item_size_nr(eb, slot);
1670         while (cur < total) {
1671                 name_len = btrfs_inode_extref_name_len(eb, extref);
1672                 index = btrfs_inode_extref_index(eb, extref);
1673                 parent = btrfs_inode_extref_parent(eb, extref);
1674                 if (name_len <= BTRFS_NAME_LEN) {
1675                         len = name_len;
1676                         error = 0;
1677                 } else {
1678                         len = BTRFS_NAME_LEN;
1679                         error = REF_ERR_NAME_TOO_LONG;
1680                 }
1681                 read_extent_buffer(eb, namebuf,
1682                                    (unsigned long)(extref + 1), len);
1683                 add_inode_backref(inode_cache, key->objectid, parent,
1684                                   index, namebuf, len, 0, key->type, error);
1685
1686                 len = sizeof(*extref) + name_len;
1687                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1688                 cur += len;
1689         }
1690         return 0;
1691
1692 }
1693
1694 static int count_csum_range(struct btrfs_root *root, u64 start,
1695                             u64 len, u64 *found)
1696 {
1697         struct btrfs_key key;
1698         struct btrfs_path path;
1699         struct extent_buffer *leaf;
1700         int ret;
1701         size_t size;
1702         *found = 0;
1703         u64 csum_end;
1704         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1705
1706         btrfs_init_path(&path);
1707
1708         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1709         key.offset = start;
1710         key.type = BTRFS_EXTENT_CSUM_KEY;
1711
1712         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1713                                 &key, &path, 0, 0);
1714         if (ret < 0)
1715                 goto out;
1716         if (ret > 0 && path.slots[0] > 0) {
1717                 leaf = path.nodes[0];
1718                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1719                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1720                     key.type == BTRFS_EXTENT_CSUM_KEY)
1721                         path.slots[0]--;
1722         }
1723
1724         while (len > 0) {
1725                 leaf = path.nodes[0];
1726                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1727                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1728                         if (ret > 0)
1729                                 break;
1730                         else if (ret < 0)
1731                                 goto out;
1732                         leaf = path.nodes[0];
1733                 }
1734
1735                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1736                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1737                     key.type != BTRFS_EXTENT_CSUM_KEY)
1738                         break;
1739
1740                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1741                 if (key.offset >= start + len)
1742                         break;
1743
1744                 if (key.offset > start)
1745                         start = key.offset;
1746
1747                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1748                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1749                 if (csum_end > start) {
1750                         size = min(csum_end - start, len);
1751                         len -= size;
1752                         start += size;
1753                         *found += size;
1754                 }
1755
1756                 path.slots[0]++;
1757         }
1758 out:
1759         btrfs_release_path(&path);
1760         if (ret < 0)
1761                 return ret;
1762         return 0;
1763 }
1764
1765 static int process_file_extent(struct btrfs_root *root,
1766                                 struct extent_buffer *eb,
1767                                 int slot, struct btrfs_key *key,
1768                                 struct shared_node *active_node)
1769 {
1770         struct inode_record *rec;
1771         struct btrfs_file_extent_item *fi;
1772         u64 num_bytes = 0;
1773         u64 disk_bytenr = 0;
1774         u64 extent_offset = 0;
1775         u64 mask = root->sectorsize - 1;
1776         int extent_type;
1777         int ret;
1778
1779         rec = active_node->current;
1780         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1781         rec->found_file_extent = 1;
1782
1783         if (rec->extent_start == (u64)-1) {
1784                 rec->extent_start = key->offset;
1785                 rec->extent_end = key->offset;
1786         }
1787
1788         if (rec->extent_end > key->offset)
1789                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1790         else if (rec->extent_end < key->offset) {
1791                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1792                                            key->offset - rec->extent_end);
1793                 if (ret < 0)
1794                         return ret;
1795         }
1796
1797         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1798         extent_type = btrfs_file_extent_type(eb, fi);
1799
1800         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1801                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1802                 if (num_bytes == 0)
1803                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1804                 rec->found_size += num_bytes;
1805                 num_bytes = (num_bytes + mask) & ~mask;
1806         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1807                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1808                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1809                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1810                 extent_offset = btrfs_file_extent_offset(eb, fi);
1811                 if (num_bytes == 0 || (num_bytes & mask))
1812                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1813                 if (num_bytes + extent_offset >
1814                     btrfs_file_extent_ram_bytes(eb, fi))
1815                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1816                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1817                     (btrfs_file_extent_compression(eb, fi) ||
1818                      btrfs_file_extent_encryption(eb, fi) ||
1819                      btrfs_file_extent_other_encoding(eb, fi)))
1820                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1821                 if (disk_bytenr > 0)
1822                         rec->found_size += num_bytes;
1823         } else {
1824                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1825         }
1826         rec->extent_end = key->offset + num_bytes;
1827
1828         /*
1829          * The data reloc tree will copy full extents into its inode and then
1830          * copy the corresponding csums.  Because the extent it copied could be
1831          * a preallocated extent that hasn't been written to yet there may be no
1832          * csums to copy, ergo we won't have csums for our file extent.  This is
1833          * ok so just don't bother checking csums if the inode belongs to the
1834          * data reloc tree.
1835          */
1836         if (disk_bytenr > 0 &&
1837             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1838                 u64 found;
1839                 if (btrfs_file_extent_compression(eb, fi))
1840                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1841                 else
1842                         disk_bytenr += extent_offset;
1843
1844                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1845                 if (ret < 0)
1846                         return ret;
1847                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1848                         if (found > 0)
1849                                 rec->found_csum_item = 1;
1850                         if (found < num_bytes)
1851                                 rec->some_csum_missing = 1;
1852                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1853                         if (found > 0)
1854                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1855                 }
1856         }
1857         return 0;
1858 }
1859
1860 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1861                             struct walk_control *wc)
1862 {
1863         struct btrfs_key key;
1864         u32 nritems;
1865         int i;
1866         int ret = 0;
1867         struct cache_tree *inode_cache;
1868         struct shared_node *active_node;
1869
1870         if (wc->root_level == wc->active_node &&
1871             btrfs_root_refs(&root->root_item) == 0)
1872                 return 0;
1873
1874         active_node = wc->nodes[wc->active_node];
1875         inode_cache = &active_node->inode_cache;
1876         nritems = btrfs_header_nritems(eb);
1877         for (i = 0; i < nritems; i++) {
1878                 btrfs_item_key_to_cpu(eb, &key, i);
1879
1880                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1881                         continue;
1882                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1883                         continue;
1884
1885                 if (active_node->current == NULL ||
1886                     active_node->current->ino < key.objectid) {
1887                         if (active_node->current) {
1888                                 active_node->current->checked = 1;
1889                                 maybe_free_inode_rec(inode_cache,
1890                                                      active_node->current);
1891                         }
1892                         active_node->current = get_inode_rec(inode_cache,
1893                                                              key.objectid, 1);
1894                         BUG_ON(IS_ERR(active_node->current));
1895                 }
1896                 switch (key.type) {
1897                 case BTRFS_DIR_ITEM_KEY:
1898                 case BTRFS_DIR_INDEX_KEY:
1899                         ret = process_dir_item(root, eb, i, &key, active_node);
1900                         break;
1901                 case BTRFS_INODE_REF_KEY:
1902                         ret = process_inode_ref(eb, i, &key, active_node);
1903                         break;
1904                 case BTRFS_INODE_EXTREF_KEY:
1905                         ret = process_inode_extref(eb, i, &key, active_node);
1906                         break;
1907                 case BTRFS_INODE_ITEM_KEY:
1908                         ret = process_inode_item(eb, i, &key, active_node);
1909                         break;
1910                 case BTRFS_EXTENT_DATA_KEY:
1911                         ret = process_file_extent(root, eb, i, &key,
1912                                                   active_node);
1913                         break;
1914                 default:
1915                         break;
1916                 };
1917         }
1918         return ret;
1919 }
1920
1921 static void reada_walk_down(struct btrfs_root *root,
1922                             struct extent_buffer *node, int slot)
1923 {
1924         u64 bytenr;
1925         u64 ptr_gen;
1926         u32 nritems;
1927         u32 blocksize;
1928         int i;
1929         int level;
1930
1931         level = btrfs_header_level(node);
1932         if (level != 1)
1933                 return;
1934
1935         nritems = btrfs_header_nritems(node);
1936         blocksize = root->nodesize;
1937         for (i = slot; i < nritems; i++) {
1938                 bytenr = btrfs_node_blockptr(node, i);
1939                 ptr_gen = btrfs_node_ptr_generation(node, i);
1940                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1941         }
1942 }
1943
1944 /*
1945  * Check the child node/leaf by the following condition:
1946  * 1. the first item key of the node/leaf should be the same with the one
1947  *    in parent.
1948  * 2. block in parent node should match the child node/leaf.
1949  * 3. generation of parent node and child's header should be consistent.
1950  *
1951  * Or the child node/leaf pointed by the key in parent is not valid.
1952  *
1953  * We hope to check leaf owner too, but since subvol may share leaves,
1954  * which makes leaf owner check not so strong, key check should be
1955  * sufficient enough for that case.
1956  */
1957 static int check_child_node(struct btrfs_root *root,
1958                             struct extent_buffer *parent, int slot,
1959                             struct extent_buffer *child)
1960 {
1961         struct btrfs_key parent_key;
1962         struct btrfs_key child_key;
1963         int ret = 0;
1964
1965         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1966         if (btrfs_header_level(child) == 0)
1967                 btrfs_item_key_to_cpu(child, &child_key, 0);
1968         else
1969                 btrfs_node_key_to_cpu(child, &child_key, 0);
1970
1971         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1972                 ret = -EINVAL;
1973                 fprintf(stderr,
1974                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1975                         parent_key.objectid, parent_key.type, parent_key.offset,
1976                         child_key.objectid, child_key.type, child_key.offset);
1977         }
1978         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1979                 ret = -EINVAL;
1980                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1981                         btrfs_node_blockptr(parent, slot),
1982                         btrfs_header_bytenr(child));
1983         }
1984         if (btrfs_node_ptr_generation(parent, slot) !=
1985             btrfs_header_generation(child)) {
1986                 ret = -EINVAL;
1987                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1988                         btrfs_header_generation(child),
1989                         btrfs_node_ptr_generation(parent, slot));
1990         }
1991         return ret;
1992 }
1993
1994 struct node_refs {
1995         u64 bytenr[BTRFS_MAX_LEVEL];
1996         u64 refs[BTRFS_MAX_LEVEL];
1997 };
1998
1999 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2000                           struct walk_control *wc, int *level,
2001                           struct node_refs *nrefs)
2002 {
2003         enum btrfs_tree_block_status status;
2004         u64 bytenr;
2005         u64 ptr_gen;
2006         struct extent_buffer *next;
2007         struct extent_buffer *cur;
2008         u32 blocksize;
2009         int ret, err = 0;
2010         u64 refs;
2011
2012         WARN_ON(*level < 0);
2013         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2014
2015         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2016                 refs = nrefs->refs[*level];
2017                 ret = 0;
2018         } else {
2019                 ret = btrfs_lookup_extent_info(NULL, root,
2020                                        path->nodes[*level]->start,
2021                                        *level, 1, &refs, NULL);
2022                 if (ret < 0) {
2023                         err = ret;
2024                         goto out;
2025                 }
2026                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2027                 nrefs->refs[*level] = refs;
2028         }
2029
2030         if (refs > 1) {
2031                 ret = enter_shared_node(root, path->nodes[*level]->start,
2032                                         refs, wc, *level);
2033                 if (ret > 0) {
2034                         err = ret;
2035                         goto out;
2036                 }
2037         }
2038
2039         while (*level >= 0) {
2040                 WARN_ON(*level < 0);
2041                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2042                 cur = path->nodes[*level];
2043
2044                 if (btrfs_header_level(cur) != *level)
2045                         WARN_ON(1);
2046
2047                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2048                         break;
2049                 if (*level == 0) {
2050                         ret = process_one_leaf(root, cur, wc);
2051                         if (ret < 0)
2052                                 err = ret;
2053                         break;
2054                 }
2055                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2056                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2057                 blocksize = root->nodesize;
2058
2059                 if (bytenr == nrefs->bytenr[*level - 1]) {
2060                         refs = nrefs->refs[*level - 1];
2061                 } else {
2062                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2063                                         *level - 1, 1, &refs, NULL);
2064                         if (ret < 0) {
2065                                 refs = 0;
2066                         } else {
2067                                 nrefs->bytenr[*level - 1] = bytenr;
2068                                 nrefs->refs[*level - 1] = refs;
2069                         }
2070                 }
2071
2072                 if (refs > 1) {
2073                         ret = enter_shared_node(root, bytenr, refs,
2074                                                 wc, *level - 1);
2075                         if (ret > 0) {
2076                                 path->slots[*level]++;
2077                                 continue;
2078                         }
2079                 }
2080
2081                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2082                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2083                         free_extent_buffer(next);
2084                         reada_walk_down(root, cur, path->slots[*level]);
2085                         next = read_tree_block(root, bytenr, blocksize,
2086                                                ptr_gen);
2087                         if (!extent_buffer_uptodate(next)) {
2088                                 struct btrfs_key node_key;
2089
2090                                 btrfs_node_key_to_cpu(path->nodes[*level],
2091                                                       &node_key,
2092                                                       path->slots[*level]);
2093                                 btrfs_add_corrupt_extent_record(root->fs_info,
2094                                                 &node_key,
2095                                                 path->nodes[*level]->start,
2096                                                 root->nodesize, *level);
2097                                 err = -EIO;
2098                                 goto out;
2099                         }
2100                 }
2101
2102                 ret = check_child_node(root, cur, path->slots[*level], next);
2103                 if (ret) {
2104                         err = ret;
2105                         goto out;
2106                 }
2107
2108                 if (btrfs_is_leaf(next))
2109                         status = btrfs_check_leaf(root, NULL, next);
2110                 else
2111                         status = btrfs_check_node(root, NULL, next);
2112                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2113                         free_extent_buffer(next);
2114                         err = -EIO;
2115                         goto out;
2116                 }
2117
2118                 *level = *level - 1;
2119                 free_extent_buffer(path->nodes[*level]);
2120                 path->nodes[*level] = next;
2121                 path->slots[*level] = 0;
2122         }
2123 out:
2124         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2125         return err;
2126 }
2127
2128 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2129                         struct walk_control *wc, int *level)
2130 {
2131         int i;
2132         struct extent_buffer *leaf;
2133
2134         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2135                 leaf = path->nodes[i];
2136                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2137                         path->slots[i]++;
2138                         *level = i;
2139                         return 0;
2140                 } else {
2141                         free_extent_buffer(path->nodes[*level]);
2142                         path->nodes[*level] = NULL;
2143                         BUG_ON(*level > wc->active_node);
2144                         if (*level == wc->active_node)
2145                                 leave_shared_node(root, wc, *level);
2146                         *level = i + 1;
2147                 }
2148         }
2149         return 1;
2150 }
2151
2152 static int check_root_dir(struct inode_record *rec)
2153 {
2154         struct inode_backref *backref;
2155         int ret = -1;
2156
2157         if (!rec->found_inode_item || rec->errors)
2158                 goto out;
2159         if (rec->nlink != 1 || rec->found_link != 0)
2160                 goto out;
2161         if (list_empty(&rec->backrefs))
2162                 goto out;
2163         backref = to_inode_backref(rec->backrefs.next);
2164         if (!backref->found_inode_ref)
2165                 goto out;
2166         if (backref->index != 0 || backref->namelen != 2 ||
2167             memcmp(backref->name, "..", 2))
2168                 goto out;
2169         if (backref->found_dir_index || backref->found_dir_item)
2170                 goto out;
2171         ret = 0;
2172 out:
2173         return ret;
2174 }
2175
2176 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2177                               struct btrfs_root *root, struct btrfs_path *path,
2178                               struct inode_record *rec)
2179 {
2180         struct btrfs_inode_item *ei;
2181         struct btrfs_key key;
2182         int ret;
2183
2184         key.objectid = rec->ino;
2185         key.type = BTRFS_INODE_ITEM_KEY;
2186         key.offset = (u64)-1;
2187
2188         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2189         if (ret < 0)
2190                 goto out;
2191         if (ret) {
2192                 if (!path->slots[0]) {
2193                         ret = -ENOENT;
2194                         goto out;
2195                 }
2196                 path->slots[0]--;
2197                 ret = 0;
2198         }
2199         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2200         if (key.objectid != rec->ino) {
2201                 ret = -ENOENT;
2202                 goto out;
2203         }
2204
2205         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2206                             struct btrfs_inode_item);
2207         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2208         btrfs_mark_buffer_dirty(path->nodes[0]);
2209         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2210         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2211                root->root_key.objectid);
2212 out:
2213         btrfs_release_path(path);
2214         return ret;
2215 }
2216
2217 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2218                                     struct btrfs_root *root,
2219                                     struct btrfs_path *path,
2220                                     struct inode_record *rec)
2221 {
2222         int ret;
2223
2224         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2225         btrfs_release_path(path);
2226         if (!ret)
2227                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2228         return ret;
2229 }
2230
2231 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2232                                struct btrfs_root *root,
2233                                struct btrfs_path *path,
2234                                struct inode_record *rec)
2235 {
2236         struct btrfs_inode_item *ei;
2237         struct btrfs_key key;
2238         int ret = 0;
2239
2240         key.objectid = rec->ino;
2241         key.type = BTRFS_INODE_ITEM_KEY;
2242         key.offset = 0;
2243
2244         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2245         if (ret) {
2246                 if (ret > 0)
2247                         ret = -ENOENT;
2248                 goto out;
2249         }
2250
2251         /* Since ret == 0, no need to check anything */
2252         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2253                             struct btrfs_inode_item);
2254         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2255         btrfs_mark_buffer_dirty(path->nodes[0]);
2256         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2257         printf("reset nbytes for ino %llu root %llu\n",
2258                rec->ino, root->root_key.objectid);
2259 out:
2260         btrfs_release_path(path);
2261         return ret;
2262 }
2263
2264 static int add_missing_dir_index(struct btrfs_root *root,
2265                                  struct cache_tree *inode_cache,
2266                                  struct inode_record *rec,
2267                                  struct inode_backref *backref)
2268 {
2269         struct btrfs_path *path;
2270         struct btrfs_trans_handle *trans;
2271         struct btrfs_dir_item *dir_item;
2272         struct extent_buffer *leaf;
2273         struct btrfs_key key;
2274         struct btrfs_disk_key disk_key;
2275         struct inode_record *dir_rec;
2276         unsigned long name_ptr;
2277         u32 data_size = sizeof(*dir_item) + backref->namelen;
2278         int ret;
2279
2280         path = btrfs_alloc_path();
2281         if (!path)
2282                 return -ENOMEM;
2283
2284         trans = btrfs_start_transaction(root, 1);
2285         if (IS_ERR(trans)) {
2286                 btrfs_free_path(path);
2287                 return PTR_ERR(trans);
2288         }
2289
2290         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2291                 (unsigned long long)rec->ino);
2292         key.objectid = backref->dir;
2293         key.type = BTRFS_DIR_INDEX_KEY;
2294         key.offset = backref->index;
2295
2296         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2297         BUG_ON(ret);
2298
2299         leaf = path->nodes[0];
2300         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2301
2302         disk_key.objectid = cpu_to_le64(rec->ino);
2303         disk_key.type = BTRFS_INODE_ITEM_KEY;
2304         disk_key.offset = 0;
2305
2306         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2307         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2308         btrfs_set_dir_data_len(leaf, dir_item, 0);
2309         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2310         name_ptr = (unsigned long)(dir_item + 1);
2311         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2312         btrfs_mark_buffer_dirty(leaf);
2313         btrfs_free_path(path);
2314         btrfs_commit_transaction(trans, root);
2315
2316         backref->found_dir_index = 1;
2317         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2318         BUG_ON(IS_ERR(dir_rec));
2319         if (!dir_rec)
2320                 return 0;
2321         dir_rec->found_size += backref->namelen;
2322         if (dir_rec->found_size == dir_rec->isize &&
2323             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2324                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2325         if (dir_rec->found_size != dir_rec->isize)
2326                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2327
2328         return 0;
2329 }
2330
2331 static int delete_dir_index(struct btrfs_root *root,
2332                             struct cache_tree *inode_cache,
2333                             struct inode_record *rec,
2334                             struct inode_backref *backref)
2335 {
2336         struct btrfs_trans_handle *trans;
2337         struct btrfs_dir_item *di;
2338         struct btrfs_path *path;
2339         int ret = 0;
2340
2341         path = btrfs_alloc_path();
2342         if (!path)
2343                 return -ENOMEM;
2344
2345         trans = btrfs_start_transaction(root, 1);
2346         if (IS_ERR(trans)) {
2347                 btrfs_free_path(path);
2348                 return PTR_ERR(trans);
2349         }
2350
2351
2352         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2353                 (unsigned long long)backref->dir,
2354                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2355                 (unsigned long long)root->objectid);
2356
2357         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2358                                     backref->name, backref->namelen,
2359                                     backref->index, -1);
2360         if (IS_ERR(di)) {
2361                 ret = PTR_ERR(di);
2362                 btrfs_free_path(path);
2363                 btrfs_commit_transaction(trans, root);
2364                 if (ret == -ENOENT)
2365                         return 0;
2366                 return ret;
2367         }
2368
2369         if (!di)
2370                 ret = btrfs_del_item(trans, root, path);
2371         else
2372                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2373         BUG_ON(ret);
2374         btrfs_free_path(path);
2375         btrfs_commit_transaction(trans, root);
2376         return ret;
2377 }
2378
2379 static int create_inode_item(struct btrfs_root *root,
2380                              struct inode_record *rec,
2381                              struct inode_backref *backref, int root_dir)
2382 {
2383         struct btrfs_trans_handle *trans;
2384         struct btrfs_inode_item inode_item;
2385         time_t now = time(NULL);
2386         int ret;
2387
2388         trans = btrfs_start_transaction(root, 1);
2389         if (IS_ERR(trans)) {
2390                 ret = PTR_ERR(trans);
2391                 return ret;
2392         }
2393
2394         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2395                 "be incomplete, please check permissions and content after "
2396                 "the fsck completes.\n", (unsigned long long)root->objectid,
2397                 (unsigned long long)rec->ino);
2398
2399         memset(&inode_item, 0, sizeof(inode_item));
2400         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2401         if (root_dir)
2402                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2403         else
2404                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2405         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2406         if (rec->found_dir_item) {
2407                 if (rec->found_file_extent)
2408                         fprintf(stderr, "root %llu inode %llu has both a dir "
2409                                 "item and extents, unsure if it is a dir or a "
2410                                 "regular file so setting it as a directory\n",
2411                                 (unsigned long long)root->objectid,
2412                                 (unsigned long long)rec->ino);
2413                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2414                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2415         } else if (!rec->found_dir_item) {
2416                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2417                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2418         }
2419         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2420         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2421         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2422         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2423         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2424         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2425         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2426         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2427
2428         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2429         BUG_ON(ret);
2430         btrfs_commit_transaction(trans, root);
2431         return 0;
2432 }
2433
2434 static int repair_inode_backrefs(struct btrfs_root *root,
2435                                  struct inode_record *rec,
2436                                  struct cache_tree *inode_cache,
2437                                  int delete)
2438 {
2439         struct inode_backref *tmp, *backref;
2440         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2441         int ret = 0;
2442         int repaired = 0;
2443
2444         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2445                 if (!delete && rec->ino == root_dirid) {
2446                         if (!rec->found_inode_item) {
2447                                 ret = create_inode_item(root, rec, backref, 1);
2448                                 if (ret)
2449                                         break;
2450                                 repaired++;
2451                         }
2452                 }
2453
2454                 /* Index 0 for root dir's are special, don't mess with it */
2455                 if (rec->ino == root_dirid && backref->index == 0)
2456                         continue;
2457
2458                 if (delete &&
2459                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2460                      (backref->found_dir_index && backref->found_inode_ref &&
2461                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2462                         ret = delete_dir_index(root, inode_cache, rec, backref);
2463                         if (ret)
2464                                 break;
2465                         repaired++;
2466                         list_del(&backref->list);
2467                         free(backref);
2468                 }
2469
2470                 if (!delete && !backref->found_dir_index &&
2471                     backref->found_dir_item && backref->found_inode_ref) {
2472                         ret = add_missing_dir_index(root, inode_cache, rec,
2473                                                     backref);
2474                         if (ret)
2475                                 break;
2476                         repaired++;
2477                         if (backref->found_dir_item &&
2478                             backref->found_dir_index &&
2479                             backref->found_dir_index) {
2480                                 if (!backref->errors &&
2481                                     backref->found_inode_ref) {
2482                                         list_del(&backref->list);
2483                                         free(backref);
2484                                 }
2485                         }
2486                 }
2487
2488                 if (!delete && (!backref->found_dir_index &&
2489                                 !backref->found_dir_item &&
2490                                 backref->found_inode_ref)) {
2491                         struct btrfs_trans_handle *trans;
2492                         struct btrfs_key location;
2493
2494                         ret = check_dir_conflict(root, backref->name,
2495                                                  backref->namelen,
2496                                                  backref->dir,
2497                                                  backref->index);
2498                         if (ret) {
2499                                 /*
2500                                  * let nlink fixing routine to handle it,
2501                                  * which can do it better.
2502                                  */
2503                                 ret = 0;
2504                                 break;
2505                         }
2506                         location.objectid = rec->ino;
2507                         location.type = BTRFS_INODE_ITEM_KEY;
2508                         location.offset = 0;
2509
2510                         trans = btrfs_start_transaction(root, 1);
2511                         if (IS_ERR(trans)) {
2512                                 ret = PTR_ERR(trans);
2513                                 break;
2514                         }
2515                         fprintf(stderr, "adding missing dir index/item pair "
2516                                 "for inode %llu\n",
2517                                 (unsigned long long)rec->ino);
2518                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2519                                                     backref->namelen,
2520                                                     backref->dir, &location,
2521                                                     imode_to_type(rec->imode),
2522                                                     backref->index);
2523                         BUG_ON(ret);
2524                         btrfs_commit_transaction(trans, root);
2525                         repaired++;
2526                 }
2527
2528                 if (!delete && (backref->found_inode_ref &&
2529                                 backref->found_dir_index &&
2530                                 backref->found_dir_item &&
2531                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2532                                 !rec->found_inode_item)) {
2533                         ret = create_inode_item(root, rec, backref, 0);
2534                         if (ret)
2535                                 break;
2536                         repaired++;
2537                 }
2538
2539         }
2540         return ret ? ret : repaired;
2541 }
2542
2543 /*
2544  * To determine the file type for nlink/inode_item repair
2545  *
2546  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2547  * Return -ENOENT if file type is not found.
2548  */
2549 static int find_file_type(struct inode_record *rec, u8 *type)
2550 {
2551         struct inode_backref *backref;
2552
2553         /* For inode item recovered case */
2554         if (rec->found_inode_item) {
2555                 *type = imode_to_type(rec->imode);
2556                 return 0;
2557         }
2558
2559         list_for_each_entry(backref, &rec->backrefs, list) {
2560                 if (backref->found_dir_index || backref->found_dir_item) {
2561                         *type = backref->filetype;
2562                         return 0;
2563                 }
2564         }
2565         return -ENOENT;
2566 }
2567
2568 /*
2569  * To determine the file name for nlink repair
2570  *
2571  * Return 0 if file name is found, set name and namelen.
2572  * Return -ENOENT if file name is not found.
2573  */
2574 static int find_file_name(struct inode_record *rec,
2575                           char *name, int *namelen)
2576 {
2577         struct inode_backref *backref;
2578
2579         list_for_each_entry(backref, &rec->backrefs, list) {
2580                 if (backref->found_dir_index || backref->found_dir_item ||
2581                     backref->found_inode_ref) {
2582                         memcpy(name, backref->name, backref->namelen);
2583                         *namelen = backref->namelen;
2584                         return 0;
2585                 }
2586         }
2587         return -ENOENT;
2588 }
2589
2590 /* Reset the nlink of the inode to the correct one */
2591 static int reset_nlink(struct btrfs_trans_handle *trans,
2592                        struct btrfs_root *root,
2593                        struct btrfs_path *path,
2594                        struct inode_record *rec)
2595 {
2596         struct inode_backref *backref;
2597         struct inode_backref *tmp;
2598         struct btrfs_key key;
2599         struct btrfs_inode_item *inode_item;
2600         int ret = 0;
2601
2602         /* We don't believe this either, reset it and iterate backref */
2603         rec->found_link = 0;
2604
2605         /* Remove all backref including the valid ones */
2606         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2607                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2608                                    backref->index, backref->name,
2609                                    backref->namelen, 0);
2610                 if (ret < 0)
2611                         goto out;
2612
2613                 /* remove invalid backref, so it won't be added back */
2614                 if (!(backref->found_dir_index &&
2615                       backref->found_dir_item &&
2616                       backref->found_inode_ref)) {
2617                         list_del(&backref->list);
2618                         free(backref);
2619                 } else {
2620                         rec->found_link++;
2621                 }
2622         }
2623
2624         /* Set nlink to 0 */
2625         key.objectid = rec->ino;
2626         key.type = BTRFS_INODE_ITEM_KEY;
2627         key.offset = 0;
2628         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2629         if (ret < 0)
2630                 goto out;
2631         if (ret > 0) {
2632                 ret = -ENOENT;
2633                 goto out;
2634         }
2635         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2636                                     struct btrfs_inode_item);
2637         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2638         btrfs_mark_buffer_dirty(path->nodes[0]);
2639         btrfs_release_path(path);
2640
2641         /*
2642          * Add back valid inode_ref/dir_item/dir_index,
2643          * add_link() will handle the nlink inc, so new nlink must be correct
2644          */
2645         list_for_each_entry(backref, &rec->backrefs, list) {
2646                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2647                                      backref->name, backref->namelen,
2648                                      backref->filetype, &backref->index, 1);
2649                 if (ret < 0)
2650                         goto out;
2651         }
2652 out:
2653         btrfs_release_path(path);
2654         return ret;
2655 }
2656
2657 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2658                                struct btrfs_root *root,
2659                                struct btrfs_path *path,
2660                                struct inode_record *rec)
2661 {
2662         char *dir_name = "lost+found";
2663         char namebuf[BTRFS_NAME_LEN] = {0};
2664         u64 lost_found_ino;
2665         u32 mode = 0700;
2666         u8 type = 0;
2667         int namelen = 0;
2668         int name_recovered = 0;
2669         int type_recovered = 0;
2670         int ret = 0;
2671
2672         /*
2673          * Get file name and type first before these invalid inode ref
2674          * are deleted by remove_all_invalid_backref()
2675          */
2676         name_recovered = !find_file_name(rec, namebuf, &namelen);
2677         type_recovered = !find_file_type(rec, &type);
2678
2679         if (!name_recovered) {
2680                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2681                        rec->ino, rec->ino);
2682                 namelen = count_digits(rec->ino);
2683                 sprintf(namebuf, "%llu", rec->ino);
2684                 name_recovered = 1;
2685         }
2686         if (!type_recovered) {
2687                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2688                        rec->ino);
2689                 type = BTRFS_FT_REG_FILE;
2690                 type_recovered = 1;
2691         }
2692
2693         ret = reset_nlink(trans, root, path, rec);
2694         if (ret < 0) {
2695                 fprintf(stderr,
2696                         "Failed to reset nlink for inode %llu: %s\n",
2697                         rec->ino, strerror(-ret));
2698                 goto out;
2699         }
2700
2701         if (rec->found_link == 0) {
2702                 lost_found_ino = root->highest_inode;
2703                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2704                         ret = -EOVERFLOW;
2705                         goto out;
2706                 }
2707                 lost_found_ino++;
2708                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2709                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2710                                   mode);
2711                 if (ret < 0) {
2712                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2713                                 dir_name, strerror(-ret));
2714                         goto out;
2715                 }
2716                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2717                                      namebuf, namelen, type, NULL, 1);
2718                 /*
2719                  * Add ".INO" suffix several times to handle case where
2720                  * "FILENAME.INO" is already taken by another file.
2721                  */
2722                 while (ret == -EEXIST) {
2723                         /*
2724                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2725                          */
2726                         if (namelen + count_digits(rec->ino) + 1 >
2727                             BTRFS_NAME_LEN) {
2728                                 ret = -EFBIG;
2729                                 goto out;
2730                         }
2731                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2732                                  ".%llu", rec->ino);
2733                         namelen += count_digits(rec->ino) + 1;
2734                         ret = btrfs_add_link(trans, root, rec->ino,
2735                                              lost_found_ino, namebuf,
2736                                              namelen, type, NULL, 1);
2737                 }
2738                 if (ret < 0) {
2739                         fprintf(stderr,
2740                                 "Failed to link the inode %llu to %s dir: %s\n",
2741                                 rec->ino, dir_name, strerror(-ret));
2742                         goto out;
2743                 }
2744                 /*
2745                  * Just increase the found_link, don't actually add the
2746                  * backref. This will make things easier and this inode
2747                  * record will be freed after the repair is done.
2748                  * So fsck will not report problem about this inode.
2749                  */
2750                 rec->found_link++;
2751                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2752                        namelen, namebuf, dir_name);
2753         }
2754         printf("Fixed the nlink of inode %llu\n", rec->ino);
2755 out:
2756         /*
2757          * Clear the flag anyway, or we will loop forever for the same inode
2758          * as it will not be removed from the bad inode list and the dead loop
2759          * happens.
2760          */
2761         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2762         btrfs_release_path(path);
2763         return ret;
2764 }
2765
2766 /*
2767  * Check if there is any normal(reg or prealloc) file extent for given
2768  * ino.
2769  * This is used to determine the file type when neither its dir_index/item or
2770  * inode_item exists.
2771  *
2772  * This will *NOT* report error, if any error happens, just consider it does
2773  * not have any normal file extent.
2774  */
2775 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2776 {
2777         struct btrfs_path *path;
2778         struct btrfs_key key;
2779         struct btrfs_key found_key;
2780         struct btrfs_file_extent_item *fi;
2781         u8 type;
2782         int ret = 0;
2783
2784         path = btrfs_alloc_path();
2785         if (!path)
2786                 goto out;
2787         key.objectid = ino;
2788         key.type = BTRFS_EXTENT_DATA_KEY;
2789         key.offset = 0;
2790
2791         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2792         if (ret < 0) {
2793                 ret = 0;
2794                 goto out;
2795         }
2796         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2797                 ret = btrfs_next_leaf(root, path);
2798                 if (ret) {
2799                         ret = 0;
2800                         goto out;
2801                 }
2802         }
2803         while (1) {
2804                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2805                                       path->slots[0]);
2806                 if (found_key.objectid != ino ||
2807                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2808                         break;
2809                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2810                                     struct btrfs_file_extent_item);
2811                 type = btrfs_file_extent_type(path->nodes[0], fi);
2812                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2813                         ret = 1;
2814                         goto out;
2815                 }
2816         }
2817 out:
2818         btrfs_free_path(path);
2819         return ret;
2820 }
2821
2822 static u32 btrfs_type_to_imode(u8 type)
2823 {
2824         static u32 imode_by_btrfs_type[] = {
2825                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2826                 [BTRFS_FT_DIR]          = S_IFDIR,
2827                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2828                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2829                 [BTRFS_FT_FIFO]         = S_IFIFO,
2830                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2831                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2832         };
2833
2834         return imode_by_btrfs_type[(type)];
2835 }
2836
2837 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2838                                 struct btrfs_root *root,
2839                                 struct btrfs_path *path,
2840                                 struct inode_record *rec)
2841 {
2842         u8 filetype;
2843         u32 mode = 0700;
2844         int type_recovered = 0;
2845         int ret = 0;
2846
2847         printf("Trying to rebuild inode:%llu\n", rec->ino);
2848
2849         type_recovered = !find_file_type(rec, &filetype);
2850
2851         /*
2852          * Try to determine inode type if type not found.
2853          *
2854          * For found regular file extent, it must be FILE.
2855          * For found dir_item/index, it must be DIR.
2856          *
2857          * For undetermined one, use FILE as fallback.
2858          *
2859          * TODO:
2860          * 1. If found backref(inode_index/item is already handled) to it,
2861          *    it must be DIR.
2862          *    Need new inode-inode ref structure to allow search for that.
2863          */
2864         if (!type_recovered) {
2865                 if (rec->found_file_extent &&
2866                     find_normal_file_extent(root, rec->ino)) {
2867                         type_recovered = 1;
2868                         filetype = BTRFS_FT_REG_FILE;
2869                 } else if (rec->found_dir_item) {
2870                         type_recovered = 1;
2871                         filetype = BTRFS_FT_DIR;
2872                 } else if (!list_empty(&rec->orphan_extents)) {
2873                         type_recovered = 1;
2874                         filetype = BTRFS_FT_REG_FILE;
2875                 } else{
2876                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2877                                rec->ino);
2878                         type_recovered = 1;
2879                         filetype = BTRFS_FT_REG_FILE;
2880                 }
2881         }
2882
2883         ret = btrfs_new_inode(trans, root, rec->ino,
2884                               mode | btrfs_type_to_imode(filetype));
2885         if (ret < 0)
2886                 goto out;
2887
2888         /*
2889          * Here inode rebuild is done, we only rebuild the inode item,
2890          * don't repair the nlink(like move to lost+found).
2891          * That is the job of nlink repair.
2892          *
2893          * We just fill the record and return
2894          */
2895         rec->found_dir_item = 1;
2896         rec->imode = mode | btrfs_type_to_imode(filetype);
2897         rec->nlink = 0;
2898         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2899         /* Ensure the inode_nlinks repair function will be called */
2900         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2901 out:
2902         return ret;
2903 }
2904
2905 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2906                                       struct btrfs_root *root,
2907                                       struct btrfs_path *path,
2908                                       struct inode_record *rec)
2909 {
2910         struct orphan_data_extent *orphan;
2911         struct orphan_data_extent *tmp;
2912         int ret = 0;
2913
2914         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2915                 /*
2916                  * Check for conflicting file extents
2917                  *
2918                  * Here we don't know whether the extents is compressed or not,
2919                  * so we can only assume it not compressed nor data offset,
2920                  * and use its disk_len as extent length.
2921                  */
2922                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2923                                        orphan->offset, orphan->disk_len, 0);
2924                 btrfs_release_path(path);
2925                 if (ret < 0)
2926                         goto out;
2927                 if (!ret) {
2928                         fprintf(stderr,
2929                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2930                                 orphan->disk_bytenr, orphan->disk_len);
2931                         ret = btrfs_free_extent(trans,
2932                                         root->fs_info->extent_root,
2933                                         orphan->disk_bytenr, orphan->disk_len,
2934                                         0, root->objectid, orphan->objectid,
2935                                         orphan->offset);
2936                         if (ret < 0)
2937                                 goto out;
2938                 }
2939                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2940                                 orphan->offset, orphan->disk_bytenr,
2941                                 orphan->disk_len, orphan->disk_len);
2942                 if (ret < 0)
2943                         goto out;
2944
2945                 /* Update file size info */
2946                 rec->found_size += orphan->disk_len;
2947                 if (rec->found_size == rec->nbytes)
2948                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2949
2950                 /* Update the file extent hole info too */
2951                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2952                                            orphan->disk_len);
2953                 if (ret < 0)
2954                         goto out;
2955                 if (RB_EMPTY_ROOT(&rec->holes))
2956                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2957
2958                 list_del(&orphan->list);
2959                 free(orphan);
2960         }
2961         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2962 out:
2963         return ret;
2964 }
2965
2966 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2967                                         struct btrfs_root *root,
2968                                         struct btrfs_path *path,
2969                                         struct inode_record *rec)
2970 {
2971         struct rb_node *node;
2972         struct file_extent_hole *hole;
2973         int found = 0;
2974         int ret = 0;
2975
2976         node = rb_first(&rec->holes);
2977
2978         while (node) {
2979                 found = 1;
2980                 hole = rb_entry(node, struct file_extent_hole, node);
2981                 ret = btrfs_punch_hole(trans, root, rec->ino,
2982                                        hole->start, hole->len);
2983                 if (ret < 0)
2984                         goto out;
2985                 ret = del_file_extent_hole(&rec->holes, hole->start,
2986                                            hole->len);
2987                 if (ret < 0)
2988                         goto out;
2989                 if (RB_EMPTY_ROOT(&rec->holes))
2990                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2991                 node = rb_first(&rec->holes);
2992         }
2993         /* special case for a file losing all its file extent */
2994         if (!found) {
2995                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2996                                        round_up(rec->isize, root->sectorsize));
2997                 if (ret < 0)
2998                         goto out;
2999         }
3000         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3001                rec->ino, root->objectid);
3002 out:
3003         return ret;
3004 }
3005
3006 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3007 {
3008         struct btrfs_trans_handle *trans;
3009         struct btrfs_path *path;
3010         int ret = 0;
3011
3012         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3013                              I_ERR_NO_ORPHAN_ITEM |
3014                              I_ERR_LINK_COUNT_WRONG |
3015                              I_ERR_NO_INODE_ITEM |
3016                              I_ERR_FILE_EXTENT_ORPHAN |
3017                              I_ERR_FILE_EXTENT_DISCOUNT|
3018                              I_ERR_FILE_NBYTES_WRONG)))
3019                 return rec->errors;
3020
3021         path = btrfs_alloc_path();
3022         if (!path)
3023                 return -ENOMEM;
3024
3025         /*
3026          * For nlink repair, it may create a dir and add link, so
3027          * 2 for parent(256)'s dir_index and dir_item
3028          * 2 for lost+found dir's inode_item and inode_ref
3029          * 1 for the new inode_ref of the file
3030          * 2 for lost+found dir's dir_index and dir_item for the file
3031          */
3032         trans = btrfs_start_transaction(root, 7);
3033         if (IS_ERR(trans)) {
3034                 btrfs_free_path(path);
3035                 return PTR_ERR(trans);
3036         }
3037
3038         if (rec->errors & I_ERR_NO_INODE_ITEM)
3039                 ret = repair_inode_no_item(trans, root, path, rec);
3040         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3041                 ret = repair_inode_orphan_extent(trans, root, path, rec);
3042         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3043                 ret = repair_inode_discount_extent(trans, root, path, rec);
3044         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3045                 ret = repair_inode_isize(trans, root, path, rec);
3046         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3047                 ret = repair_inode_orphan_item(trans, root, path, rec);
3048         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3049                 ret = repair_inode_nlinks(trans, root, path, rec);
3050         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3051                 ret = repair_inode_nbytes(trans, root, path, rec);
3052         btrfs_commit_transaction(trans, root);
3053         btrfs_free_path(path);
3054         return ret;
3055 }
3056
3057 static int check_inode_recs(struct btrfs_root *root,
3058                             struct cache_tree *inode_cache)
3059 {
3060         struct cache_extent *cache;
3061         struct ptr_node *node;
3062         struct inode_record *rec;
3063         struct inode_backref *backref;
3064         int stage = 0;
3065         int ret = 0;
3066         int err = 0;
3067         u64 error = 0;
3068         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3069
3070         if (btrfs_root_refs(&root->root_item) == 0) {
3071                 if (!cache_tree_empty(inode_cache))
3072                         fprintf(stderr, "warning line %d\n", __LINE__);
3073                 return 0;
3074         }
3075
3076         /*
3077          * We need to record the highest inode number for later 'lost+found'
3078          * dir creation.
3079          * We must select an ino not used/referred by any existing inode, or
3080          * 'lost+found' ino may be a missing ino in a corrupted leaf,
3081          * this may cause 'lost+found' dir has wrong nlinks.
3082          */
3083         cache = last_cache_extent(inode_cache);
3084         if (cache) {
3085                 node = container_of(cache, struct ptr_node, cache);
3086                 rec = node->data;
3087                 if (rec->ino > root->highest_inode)
3088                         root->highest_inode = rec->ino;
3089         }
3090
3091         /*
3092          * We need to repair backrefs first because we could change some of the
3093          * errors in the inode recs.
3094          *
3095          * We also need to go through and delete invalid backrefs first and then
3096          * add the correct ones second.  We do this because we may get EEXIST
3097          * when adding back the correct index because we hadn't yet deleted the
3098          * invalid index.
3099          *
3100          * For example, if we were missing a dir index then the directories
3101          * isize would be wrong, so if we fixed the isize to what we thought it
3102          * would be and then fixed the backref we'd still have a invalid fs, so
3103          * we need to add back the dir index and then check to see if the isize
3104          * is still wrong.
3105          */
3106         while (stage < 3) {
3107                 stage++;
3108                 if (stage == 3 && !err)
3109                         break;
3110
3111                 cache = search_cache_extent(inode_cache, 0);
3112                 while (repair && cache) {
3113                         node = container_of(cache, struct ptr_node, cache);
3114                         rec = node->data;
3115                         cache = next_cache_extent(cache);
3116
3117                         /* Need to free everything up and rescan */
3118                         if (stage == 3) {
3119                                 remove_cache_extent(inode_cache, &node->cache);
3120                                 free(node);
3121                                 free_inode_rec(rec);
3122                                 continue;
3123                         }
3124
3125                         if (list_empty(&rec->backrefs))
3126                                 continue;
3127
3128                         ret = repair_inode_backrefs(root, rec, inode_cache,
3129                                                     stage == 1);
3130                         if (ret < 0) {
3131                                 err = ret;
3132                                 stage = 2;
3133                                 break;
3134                         } if (ret > 0) {
3135                                 err = -EAGAIN;
3136                         }
3137                 }
3138         }
3139         if (err)
3140                 return err;
3141
3142         rec = get_inode_rec(inode_cache, root_dirid, 0);
3143         BUG_ON(IS_ERR(rec));
3144         if (rec) {
3145                 ret = check_root_dir(rec);
3146                 if (ret) {
3147                         fprintf(stderr, "root %llu root dir %llu error\n",
3148                                 (unsigned long long)root->root_key.objectid,
3149                                 (unsigned long long)root_dirid);
3150                         print_inode_error(root, rec);
3151                         error++;
3152                 }
3153         } else {
3154                 if (repair) {
3155                         struct btrfs_trans_handle *trans;
3156
3157                         trans = btrfs_start_transaction(root, 1);
3158                         if (IS_ERR(trans)) {
3159                                 err = PTR_ERR(trans);
3160                                 return err;
3161                         }
3162
3163                         fprintf(stderr,
3164                                 "root %llu missing its root dir, recreating\n",
3165                                 (unsigned long long)root->objectid);
3166
3167                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3168                         BUG_ON(ret);
3169
3170                         btrfs_commit_transaction(trans, root);
3171                         return -EAGAIN;
3172                 }
3173
3174                 fprintf(stderr, "root %llu root dir %llu not found\n",
3175                         (unsigned long long)root->root_key.objectid,
3176                         (unsigned long long)root_dirid);
3177         }
3178
3179         while (1) {
3180                 cache = search_cache_extent(inode_cache, 0);
3181                 if (!cache)
3182                         break;
3183                 node = container_of(cache, struct ptr_node, cache);
3184                 rec = node->data;
3185                 remove_cache_extent(inode_cache, &node->cache);
3186                 free(node);
3187                 if (rec->ino == root_dirid ||
3188                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3189                         free_inode_rec(rec);
3190                         continue;
3191                 }
3192
3193                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3194                         ret = check_orphan_item(root, rec->ino);
3195                         if (ret == 0)
3196                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3197                         if (can_free_inode_rec(rec)) {
3198                                 free_inode_rec(rec);
3199                                 continue;
3200                         }
3201                 }
3202
3203                 if (!rec->found_inode_item)
3204                         rec->errors |= I_ERR_NO_INODE_ITEM;
3205                 if (rec->found_link != rec->nlink)
3206                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3207                 if (repair) {
3208                         ret = try_repair_inode(root, rec);
3209                         if (ret == 0 && can_free_inode_rec(rec)) {
3210                                 free_inode_rec(rec);
3211                                 continue;
3212                         }
3213                         ret = 0;
3214                 }
3215
3216                 if (!(repair && ret == 0))
3217                         error++;
3218                 print_inode_error(root, rec);
3219                 list_for_each_entry(backref, &rec->backrefs, list) {
3220                         if (!backref->found_dir_item)
3221                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3222                         if (!backref->found_dir_index)
3223                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3224                         if (!backref->found_inode_ref)
3225                                 backref->errors |= REF_ERR_NO_INODE_REF;
3226                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3227                                 " namelen %u name %s filetype %d errors %x",
3228                                 (unsigned long long)backref->dir,
3229                                 (unsigned long long)backref->index,
3230                                 backref->namelen, backref->name,
3231                                 backref->filetype, backref->errors);
3232                         print_ref_error(backref->errors);
3233                 }
3234                 free_inode_rec(rec);
3235         }
3236         return (error > 0) ? -1 : 0;
3237 }
3238
3239 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3240                                         u64 objectid)
3241 {
3242         struct cache_extent *cache;
3243         struct root_record *rec = NULL;
3244         int ret;
3245
3246         cache = lookup_cache_extent(root_cache, objectid, 1);
3247         if (cache) {
3248                 rec = container_of(cache, struct root_record, cache);
3249         } else {
3250                 rec = calloc(1, sizeof(*rec));
3251                 if (!rec)
3252                         return ERR_PTR(-ENOMEM);
3253                 rec->objectid = objectid;
3254                 INIT_LIST_HEAD(&rec->backrefs);
3255                 rec->cache.start = objectid;
3256                 rec->cache.size = 1;
3257
3258                 ret = insert_cache_extent(root_cache, &rec->cache);
3259                 if (ret)
3260                         return ERR_PTR(-EEXIST);
3261         }
3262         return rec;
3263 }
3264
3265 static struct root_backref *get_root_backref(struct root_record *rec,
3266                                              u64 ref_root, u64 dir, u64 index,
3267                                              const char *name, int namelen)
3268 {
3269         struct root_backref *backref;
3270
3271         list_for_each_entry(backref, &rec->backrefs, list) {
3272                 if (backref->ref_root != ref_root || backref->dir != dir ||
3273                     backref->namelen != namelen)
3274                         continue;
3275                 if (memcmp(name, backref->name, namelen))
3276                         continue;
3277                 return backref;
3278         }
3279
3280         backref = calloc(1, sizeof(*backref) + namelen + 1);
3281         if (!backref)
3282                 return NULL;
3283         backref->ref_root = ref_root;
3284         backref->dir = dir;
3285         backref->index = index;
3286         backref->namelen = namelen;
3287         memcpy(backref->name, name, namelen);
3288         backref->name[namelen] = '\0';
3289         list_add_tail(&backref->list, &rec->backrefs);
3290         return backref;
3291 }
3292
3293 static void free_root_record(struct cache_extent *cache)
3294 {
3295         struct root_record *rec;
3296         struct root_backref *backref;
3297
3298         rec = container_of(cache, struct root_record, cache);
3299         while (!list_empty(&rec->backrefs)) {
3300                 backref = to_root_backref(rec->backrefs.next);
3301                 list_del(&backref->list);
3302                 free(backref);
3303         }
3304
3305         kfree(rec);
3306 }
3307
3308 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3309
3310 static int add_root_backref(struct cache_tree *root_cache,
3311                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3312                             const char *name, int namelen,
3313                             int item_type, int errors)
3314 {
3315         struct root_record *rec;
3316         struct root_backref *backref;
3317
3318         rec = get_root_rec(root_cache, root_id);
3319         BUG_ON(IS_ERR(rec));
3320         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3321         BUG_ON(!backref);
3322
3323         backref->errors |= errors;
3324
3325         if (item_type != BTRFS_DIR_ITEM_KEY) {
3326                 if (backref->found_dir_index || backref->found_back_ref ||
3327                     backref->found_forward_ref) {
3328                         if (backref->index != index)
3329                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3330                 } else {
3331                         backref->index = index;
3332                 }
3333         }
3334
3335         if (item_type == BTRFS_DIR_ITEM_KEY) {
3336                 if (backref->found_forward_ref)
3337                         rec->found_ref++;
3338                 backref->found_dir_item = 1;
3339         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3340                 backref->found_dir_index = 1;
3341         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3342                 if (backref->found_forward_ref)
3343                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3344                 else if (backref->found_dir_item)
3345                         rec->found_ref++;
3346                 backref->found_forward_ref = 1;
3347         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3348                 if (backref->found_back_ref)
3349                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3350                 backref->found_back_ref = 1;
3351         } else {
3352                 BUG_ON(1);
3353         }
3354
3355         if (backref->found_forward_ref && backref->found_dir_item)
3356                 backref->reachable = 1;
3357         return 0;
3358 }
3359
3360 static int merge_root_recs(struct btrfs_root *root,
3361                            struct cache_tree *src_cache,
3362                            struct cache_tree *dst_cache)
3363 {
3364         struct cache_extent *cache;
3365         struct ptr_node *node;
3366         struct inode_record *rec;
3367         struct inode_backref *backref;
3368         int ret = 0;
3369
3370         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3371                 free_inode_recs_tree(src_cache);
3372                 return 0;
3373         }
3374
3375         while (1) {
3376                 cache = search_cache_extent(src_cache, 0);
3377                 if (!cache)
3378                         break;
3379                 node = container_of(cache, struct ptr_node, cache);
3380                 rec = node->data;
3381                 remove_cache_extent(src_cache, &node->cache);
3382                 free(node);
3383
3384                 ret = is_child_root(root, root->objectid, rec->ino);
3385                 if (ret < 0)
3386                         break;
3387                 else if (ret == 0)
3388                         goto skip;
3389
3390                 list_for_each_entry(backref, &rec->backrefs, list) {
3391                         BUG_ON(backref->found_inode_ref);
3392                         if (backref->found_dir_item)
3393                                 add_root_backref(dst_cache, rec->ino,
3394                                         root->root_key.objectid, backref->dir,
3395                                         backref->index, backref->name,
3396                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3397                                         backref->errors);
3398                         if (backref->found_dir_index)
3399                                 add_root_backref(dst_cache, rec->ino,
3400                                         root->root_key.objectid, backref->dir,
3401                                         backref->index, backref->name,
3402                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3403                                         backref->errors);
3404                 }
3405 skip:
3406                 free_inode_rec(rec);
3407         }
3408         if (ret < 0)
3409                 return ret;
3410         return 0;
3411 }
3412
3413 static int check_root_refs(struct btrfs_root *root,
3414                            struct cache_tree *root_cache)
3415 {
3416         struct root_record *rec;
3417         struct root_record *ref_root;
3418         struct root_backref *backref;
3419         struct cache_extent *cache;
3420         int loop = 1;
3421         int ret;
3422         int error;
3423         int errors = 0;
3424
3425         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3426         BUG_ON(IS_ERR(rec));
3427         rec->found_ref = 1;
3428
3429         /* fixme: this can not detect circular references */
3430         while (loop) {
3431                 loop = 0;
3432                 cache = search_cache_extent(root_cache, 0);
3433                 while (1) {
3434                         if (!cache)
3435                                 break;
3436                         rec = container_of(cache, struct root_record, cache);
3437                         cache = next_cache_extent(cache);
3438
3439                         if (rec->found_ref == 0)
3440                                 continue;
3441
3442                         list_for_each_entry(backref, &rec->backrefs, list) {
3443                                 if (!backref->reachable)
3444                                         continue;
3445
3446                                 ref_root = get_root_rec(root_cache,
3447                                                         backref->ref_root);
3448                                 BUG_ON(IS_ERR(ref_root));
3449                                 if (ref_root->found_ref > 0)
3450                                         continue;
3451
3452                                 backref->reachable = 0;
3453                                 rec->found_ref--;
3454                                 if (rec->found_ref == 0)
3455                                         loop = 1;
3456                         }
3457                 }
3458         }
3459
3460         cache = search_cache_extent(root_cache, 0);
3461         while (1) {
3462                 if (!cache)
3463                         break;
3464                 rec = container_of(cache, struct root_record, cache);
3465                 cache = next_cache_extent(cache);
3466
3467                 if (rec->found_ref == 0 &&
3468                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3469                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3470                         ret = check_orphan_item(root->fs_info->tree_root,
3471                                                 rec->objectid);
3472                         if (ret == 0)
3473                                 continue;
3474
3475                         /*
3476                          * If we don't have a root item then we likely just have
3477                          * a dir item in a snapshot for this root but no actual
3478                          * ref key or anything so it's meaningless.
3479                          */
3480                         if (!rec->found_root_item)
3481                                 continue;
3482                         errors++;
3483                         fprintf(stderr, "fs tree %llu not referenced\n",
3484                                 (unsigned long long)rec->objectid);
3485                 }
3486
3487                 error = 0;
3488                 if (rec->found_ref > 0 && !rec->found_root_item)
3489                         error = 1;
3490                 list_for_each_entry(backref, &rec->backrefs, list) {
3491                         if (!backref->found_dir_item)
3492                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3493                         if (!backref->found_dir_index)
3494                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3495                         if (!backref->found_back_ref)
3496                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3497                         if (!backref->found_forward_ref)
3498                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3499                         if (backref->reachable && backref->errors)
3500                                 error = 1;
3501                 }
3502                 if (!error)
3503                         continue;
3504
3505                 errors++;
3506                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3507                         (unsigned long long)rec->objectid, rec->found_ref,
3508                          rec->found_root_item ? "" : "not found");
3509
3510                 list_for_each_entry(backref, &rec->backrefs, list) {
3511                         if (!backref->reachable)
3512                                 continue;
3513                         if (!backref->errors && rec->found_root_item)
3514                                 continue;
3515                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3516                                 " index %llu namelen %u name %s errors %x\n",
3517                                 (unsigned long long)backref->ref_root,
3518                                 (unsigned long long)backref->dir,
3519                                 (unsigned long long)backref->index,
3520                                 backref->namelen, backref->name,
3521                                 backref->errors);
3522                         print_ref_error(backref->errors);
3523                 }
3524         }
3525         return errors > 0 ? 1 : 0;
3526 }
3527
3528 static int process_root_ref(struct extent_buffer *eb, int slot,
3529                             struct btrfs_key *key,
3530                             struct cache_tree *root_cache)
3531 {
3532         u64 dirid;
3533         u64 index;
3534         u32 len;
3535         u32 name_len;
3536         struct btrfs_root_ref *ref;
3537         char namebuf[BTRFS_NAME_LEN];
3538         int error;
3539
3540         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3541
3542         dirid = btrfs_root_ref_dirid(eb, ref);
3543         index = btrfs_root_ref_sequence(eb, ref);
3544         name_len = btrfs_root_ref_name_len(eb, ref);
3545
3546         if (name_len <= BTRFS_NAME_LEN) {
3547                 len = name_len;
3548                 error = 0;
3549         } else {
3550                 len = BTRFS_NAME_LEN;
3551                 error = REF_ERR_NAME_TOO_LONG;
3552         }
3553         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3554
3555         if (key->type == BTRFS_ROOT_REF_KEY) {
3556                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3557                                  index, namebuf, len, key->type, error);
3558         } else {
3559                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3560                                  index, namebuf, len, key->type, error);
3561         }
3562         return 0;
3563 }
3564
3565 static void free_corrupt_block(struct cache_extent *cache)
3566 {
3567         struct btrfs_corrupt_block *corrupt;
3568
3569         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3570         free(corrupt);
3571 }
3572
3573 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3574
3575 /*
3576  * Repair the btree of the given root.
3577  *
3578  * The fix is to remove the node key in corrupt_blocks cache_tree.
3579  * and rebalance the tree.
3580  * After the fix, the btree should be writeable.
3581  */
3582 static int repair_btree(struct btrfs_root *root,
3583                         struct cache_tree *corrupt_blocks)
3584 {
3585         struct btrfs_trans_handle *trans;
3586         struct btrfs_path *path;
3587         struct btrfs_corrupt_block *corrupt;
3588         struct cache_extent *cache;
3589         struct btrfs_key key;
3590         u64 offset;
3591         int level;
3592         int ret = 0;
3593
3594         if (cache_tree_empty(corrupt_blocks))
3595                 return 0;
3596
3597         path = btrfs_alloc_path();
3598         if (!path)
3599                 return -ENOMEM;
3600
3601         trans = btrfs_start_transaction(root, 1);
3602         if (IS_ERR(trans)) {
3603                 ret = PTR_ERR(trans);
3604                 fprintf(stderr, "Error starting transaction: %s\n",
3605                         strerror(-ret));
3606                 goto out_free_path;
3607         }
3608         cache = first_cache_extent(corrupt_blocks);
3609         while (cache) {
3610                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3611                                        cache);
3612                 level = corrupt->level;
3613                 path->lowest_level = level;
3614                 key.objectid = corrupt->key.objectid;
3615                 key.type = corrupt->key.type;
3616                 key.offset = corrupt->key.offset;
3617
3618                 /*
3619                  * Here we don't want to do any tree balance, since it may
3620                  * cause a balance with corrupted brother leaf/node,
3621                  * so ins_len set to 0 here.
3622                  * Balance will be done after all corrupt node/leaf is deleted.
3623                  */
3624                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3625                 if (ret < 0)
3626                         goto out;
3627                 offset = btrfs_node_blockptr(path->nodes[level],
3628                                              path->slots[level]);
3629
3630                 /* Remove the ptr */
3631                 ret = btrfs_del_ptr(trans, root, path, level,
3632                                     path->slots[level]);
3633                 if (ret < 0)
3634                         goto out;
3635                 /*
3636                  * Remove the corresponding extent
3637                  * return value is not concerned.
3638                  */
3639                 btrfs_release_path(path);
3640                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3641                                         0, root->root_key.objectid,
3642                                         level - 1, 0);
3643                 cache = next_cache_extent(cache);
3644         }
3645
3646         /* Balance the btree using btrfs_search_slot() */
3647         cache = first_cache_extent(corrupt_blocks);
3648         while (cache) {
3649                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3650                                        cache);
3651                 memcpy(&key, &corrupt->key, sizeof(key));
3652                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3653                 if (ret < 0)
3654                         goto out;
3655                 /* return will always >0 since it won't find the item */
3656                 ret = 0;
3657                 btrfs_release_path(path);
3658                 cache = next_cache_extent(cache);
3659         }
3660 out:
3661         btrfs_commit_transaction(trans, root);
3662 out_free_path:
3663         btrfs_free_path(path);
3664         return ret;
3665 }
3666
3667 static int check_fs_root(struct btrfs_root *root,
3668                          struct cache_tree *root_cache,
3669                          struct walk_control *wc)
3670 {
3671         int ret = 0;
3672         int err = 0;
3673         int wret;
3674         int level;
3675         struct btrfs_path path;
3676         struct shared_node root_node;
3677         struct root_record *rec;
3678         struct btrfs_root_item *root_item = &root->root_item;
3679         struct cache_tree corrupt_blocks;
3680         struct orphan_data_extent *orphan;
3681         struct orphan_data_extent *tmp;
3682         enum btrfs_tree_block_status status;
3683         struct node_refs nrefs;
3684
3685         /*
3686          * Reuse the corrupt_block cache tree to record corrupted tree block
3687          *
3688          * Unlike the usage in extent tree check, here we do it in a per
3689          * fs/subvol tree base.
3690          */
3691         cache_tree_init(&corrupt_blocks);
3692         root->fs_info->corrupt_blocks = &corrupt_blocks;
3693
3694         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3695                 rec = get_root_rec(root_cache, root->root_key.objectid);
3696                 BUG_ON(IS_ERR(rec));
3697                 if (btrfs_root_refs(root_item) > 0)
3698                         rec->found_root_item = 1;
3699         }
3700
3701         btrfs_init_path(&path);
3702         memset(&root_node, 0, sizeof(root_node));
3703         cache_tree_init(&root_node.root_cache);
3704         cache_tree_init(&root_node.inode_cache);
3705         memset(&nrefs, 0, sizeof(nrefs));
3706
3707         /* Move the orphan extent record to corresponding inode_record */
3708         list_for_each_entry_safe(orphan, tmp,
3709                                  &root->orphan_data_extents, list) {
3710                 struct inode_record *inode;
3711
3712                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3713                                       1);
3714                 BUG_ON(IS_ERR(inode));
3715                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3716                 list_move(&orphan->list, &inode->orphan_extents);
3717         }
3718
3719         level = btrfs_header_level(root->node);
3720         memset(wc->nodes, 0, sizeof(wc->nodes));
3721         wc->nodes[level] = &root_node;
3722         wc->active_node = level;
3723         wc->root_level = level;
3724
3725         /* We may not have checked the root block, lets do that now */
3726         if (btrfs_is_leaf(root->node))
3727                 status = btrfs_check_leaf(root, NULL, root->node);
3728         else
3729                 status = btrfs_check_node(root, NULL, root->node);
3730         if (status != BTRFS_TREE_BLOCK_CLEAN)
3731                 return -EIO;
3732
3733         if (btrfs_root_refs(root_item) > 0 ||
3734             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3735                 path.nodes[level] = root->node;
3736                 extent_buffer_get(root->node);
3737                 path.slots[level] = 0;
3738         } else {
3739                 struct btrfs_key key;
3740                 struct btrfs_disk_key found_key;
3741
3742                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3743                 level = root_item->drop_level;
3744                 path.lowest_level = level;
3745                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3746                 if (wret < 0)
3747                         goto skip_walking;
3748                 btrfs_node_key(path.nodes[level], &found_key,
3749                                 path.slots[level]);
3750                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3751                                         sizeof(found_key)));
3752         }
3753
3754         while (1) {
3755                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3756                 if (wret < 0)
3757                         ret = wret;
3758                 if (wret != 0)
3759                         break;
3760
3761                 wret = walk_up_tree(root, &path, wc, &level);
3762                 if (wret < 0)
3763                         ret = wret;
3764                 if (wret != 0)
3765                         break;
3766         }
3767 skip_walking:
3768         btrfs_release_path(&path);
3769
3770         if (!cache_tree_empty(&corrupt_blocks)) {
3771                 struct cache_extent *cache;
3772                 struct btrfs_corrupt_block *corrupt;
3773
3774                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3775                        root->root_key.objectid);
3776                 cache = first_cache_extent(&corrupt_blocks);
3777                 while (cache) {
3778                         corrupt = container_of(cache,
3779                                                struct btrfs_corrupt_block,
3780                                                cache);
3781                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3782                                cache->start, corrupt->level,
3783                                corrupt->key.objectid, corrupt->key.type,
3784                                corrupt->key.offset);
3785                         cache = next_cache_extent(cache);
3786                 }
3787                 if (repair) {
3788                         printf("Try to repair the btree for root %llu\n",
3789                                root->root_key.objectid);
3790                         ret = repair_btree(root, &corrupt_blocks);
3791                         if (ret < 0)
3792                                 fprintf(stderr, "Failed to repair btree: %s\n",
3793                                         strerror(-ret));
3794                         if (!ret)
3795                                 printf("Btree for root %llu is fixed\n",
3796                                        root->root_key.objectid);
3797                 }
3798         }
3799
3800         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3801         if (err < 0)
3802                 ret = err;
3803
3804         if (root_node.current) {
3805                 root_node.current->checked = 1;
3806                 maybe_free_inode_rec(&root_node.inode_cache,
3807                                 root_node.current);
3808         }
3809
3810         err = check_inode_recs(root, &root_node.inode_cache);
3811         if (!ret)
3812                 ret = err;
3813
3814         free_corrupt_blocks_tree(&corrupt_blocks);
3815         root->fs_info->corrupt_blocks = NULL;
3816         free_orphan_data_extents(&root->orphan_data_extents);
3817         return ret;
3818 }
3819
3820 static int fs_root_objectid(u64 objectid)
3821 {
3822         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3823             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3824                 return 1;
3825         return is_fstree(objectid);
3826 }
3827
3828 static int check_fs_roots(struct btrfs_root *root,
3829                           struct cache_tree *root_cache)
3830 {
3831         struct btrfs_path path;
3832         struct btrfs_key key;
3833         struct walk_control wc;
3834         struct extent_buffer *leaf, *tree_node;
3835         struct btrfs_root *tmp_root;
3836         struct btrfs_root *tree_root = root->fs_info->tree_root;
3837         int ret;
3838         int err = 0;
3839
3840         if (ctx.progress_enabled) {
3841                 ctx.tp = TASK_FS_ROOTS;
3842                 task_start(ctx.info);
3843         }
3844
3845         /*
3846          * Just in case we made any changes to the extent tree that weren't
3847          * reflected into the free space cache yet.
3848          */
3849         if (repair)
3850                 reset_cached_block_groups(root->fs_info);
3851         memset(&wc, 0, sizeof(wc));
3852         cache_tree_init(&wc.shared);
3853         btrfs_init_path(&path);
3854
3855 again:
3856         key.offset = 0;
3857         key.objectid = 0;
3858         key.type = BTRFS_ROOT_ITEM_KEY;
3859         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3860         if (ret < 0) {
3861                 err = 1;
3862                 goto out;
3863         }
3864         tree_node = tree_root->node;
3865         while (1) {
3866                 if (tree_node != tree_root->node) {
3867                         free_root_recs_tree(root_cache);
3868                         btrfs_release_path(&path);
3869                         goto again;
3870                 }
3871                 leaf = path.nodes[0];
3872                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3873                         ret = btrfs_next_leaf(tree_root, &path);
3874                         if (ret) {
3875                                 if (ret < 0)
3876                                         err = 1;
3877                                 break;
3878                         }
3879                         leaf = path.nodes[0];
3880                 }
3881                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3882                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3883                     fs_root_objectid(key.objectid)) {
3884                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3885                                 tmp_root = btrfs_read_fs_root_no_cache(
3886                                                 root->fs_info, &key);
3887                         } else {
3888                                 key.offset = (u64)-1;
3889                                 tmp_root = btrfs_read_fs_root(
3890                                                 root->fs_info, &key);
3891                         }
3892                         if (IS_ERR(tmp_root)) {
3893                                 err = 1;
3894                                 goto next;
3895                         }
3896                         ret = check_fs_root(tmp_root, root_cache, &wc);
3897                         if (ret == -EAGAIN) {
3898                                 free_root_recs_tree(root_cache);
3899                                 btrfs_release_path(&path);
3900                                 goto again;
3901                         }
3902                         if (ret)
3903                                 err = 1;
3904                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3905                                 btrfs_free_fs_root(tmp_root);
3906                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3907                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3908                         process_root_ref(leaf, path.slots[0], &key,
3909                                          root_cache);
3910                 }
3911 next:
3912                 path.slots[0]++;
3913         }
3914 out:
3915         btrfs_release_path(&path);
3916         if (err)
3917                 free_extent_cache_tree(&wc.shared);
3918         if (!cache_tree_empty(&wc.shared))
3919                 fprintf(stderr, "warning line %d\n", __LINE__);
3920
3921         task_stop(ctx.info);
3922
3923         return err;
3924 }
3925
3926 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3927 {
3928         struct rb_node *n;
3929         struct extent_backref *back;
3930         struct tree_backref *tback;
3931         struct data_backref *dback;
3932         u64 found = 0;
3933         int err = 0;
3934
3935         for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
3936                 back = rb_node_to_extent_backref(n);
3937                 if (!back->found_extent_tree) {
3938                         err = 1;
3939                         if (!print_errs)
3940                                 goto out;
3941                         if (back->is_data) {
3942                                 dback = to_data_backref(back);
3943                                 fprintf(stderr, "Backref %llu %s %llu"
3944                                         " owner %llu offset %llu num_refs %lu"
3945                                         " not found in extent tree\n",
3946                                         (unsigned long long)rec->start,
3947                                         back->full_backref ?
3948                                         "parent" : "root",
3949                                         back->full_backref ?
3950                                         (unsigned long long)dback->parent:
3951                                         (unsigned long long)dback->root,
3952                                         (unsigned long long)dback->owner,
3953                                         (unsigned long long)dback->offset,
3954                                         (unsigned long)dback->num_refs);
3955                         } else {
3956                                 tback = to_tree_backref(back);
3957                                 fprintf(stderr, "Backref %llu parent %llu"
3958                                         " root %llu not found in extent tree\n",
3959                                         (unsigned long long)rec->start,
3960                                         (unsigned long long)tback->parent,
3961                                         (unsigned long long)tback->root);
3962                         }
3963                 }
3964                 if (!back->is_data && !back->found_ref) {
3965                         err = 1;
3966                         if (!print_errs)
3967                                 goto out;
3968                         tback = to_tree_backref(back);
3969                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3970                                 (unsigned long long)rec->start,
3971                                 back->full_backref ? "parent" : "root",
3972                                 back->full_backref ?
3973                                 (unsigned long long)tback->parent :
3974                                 (unsigned long long)tback->root, back);
3975                 }
3976                 if (back->is_data) {
3977                         dback = to_data_backref(back);
3978                         if (dback->found_ref != dback->num_refs) {
3979                                 err = 1;
3980                                 if (!print_errs)
3981                                         goto out;
3982                                 fprintf(stderr, "Incorrect local backref count"
3983                                         " on %llu %s %llu owner %llu"
3984                                         " offset %llu found %u wanted %u back %p\n",
3985                                         (unsigned long long)rec->start,
3986                                         back->full_backref ?
3987                                         "parent" : "root",
3988                                         back->full_backref ?
3989                                         (unsigned long long)dback->parent:
3990                                         (unsigned long long)dback->root,
3991                                         (unsigned long long)dback->owner,
3992                                         (unsigned long long)dback->offset,
3993                                         dback->found_ref, dback->num_refs, back);
3994                         }
3995                         if (dback->disk_bytenr != rec->start) {
3996                                 err = 1;
3997                                 if (!print_errs)
3998                                         goto out;
3999                                 fprintf(stderr, "Backref disk bytenr does not"
4000                                         " match extent record, bytenr=%llu, "
4001                                         "ref bytenr=%llu\n",
4002                                         (unsigned long long)rec->start,
4003                                         (unsigned long long)dback->disk_bytenr);
4004                         }
4005
4006                         if (dback->bytes != rec->nr) {
4007                                 err = 1;
4008                                 if (!print_errs)
4009                                         goto out;
4010                                 fprintf(stderr, "Backref bytes do not match "
4011                                         "extent backref, bytenr=%llu, ref "
4012                                         "bytes=%llu, backref bytes=%llu\n",
4013                                         (unsigned long long)rec->start,
4014                                         (unsigned long long)rec->nr,
4015                                         (unsigned long long)dback->bytes);
4016                         }
4017                 }
4018                 if (!back->is_data) {
4019                         found += 1;
4020                 } else {
4021                         dback = to_data_backref(back);
4022                         found += dback->found_ref;
4023                 }
4024         }
4025         if (found != rec->refs) {
4026                 err = 1;
4027                 if (!print_errs)
4028                         goto out;
4029                 fprintf(stderr, "Incorrect global backref count "
4030                         "on %llu found %llu wanted %llu\n",
4031                         (unsigned long long)rec->start,
4032                         (unsigned long long)found,
4033                         (unsigned long long)rec->refs);
4034         }
4035 out:
4036         return err;
4037 }
4038
4039 static void __free_one_backref(struct rb_node *node)
4040 {
4041         struct extent_backref *back = rb_node_to_extent_backref(node);
4042
4043         free(back);
4044 }
4045
4046 static void free_all_extent_backrefs(struct extent_record *rec)
4047 {
4048         rb_free_nodes(&rec->backref_tree, __free_one_backref);
4049 }
4050
4051 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4052                                      struct cache_tree *extent_cache)
4053 {
4054         struct cache_extent *cache;
4055         struct extent_record *rec;
4056
4057         while (1) {
4058                 cache = first_cache_extent(extent_cache);
4059                 if (!cache)
4060                         break;
4061                 rec = container_of(cache, struct extent_record, cache);
4062                 remove_cache_extent(extent_cache, cache);
4063                 free_all_extent_backrefs(rec);
4064                 free(rec);
4065         }
4066 }
4067
4068 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4069                                  struct extent_record *rec)
4070 {
4071         if (rec->content_checked && rec->owner_ref_checked &&
4072             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4073             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4074             !rec->bad_full_backref && !rec->crossing_stripes &&
4075             !rec->wrong_chunk_type) {
4076                 remove_cache_extent(extent_cache, &rec->cache);
4077                 free_all_extent_backrefs(rec);
4078                 list_del_init(&rec->list);
4079                 free(rec);
4080         }
4081         return 0;
4082 }
4083
4084 static int check_owner_ref(struct btrfs_root *root,
4085                             struct extent_record *rec,
4086                             struct extent_buffer *buf)
4087 {
4088         struct extent_backref *node, *tmp;
4089         struct tree_backref *back;
4090         struct btrfs_root *ref_root;
4091         struct btrfs_key key;
4092         struct btrfs_path path;
4093         struct extent_buffer *parent;
4094         int level;
4095         int found = 0;
4096         int ret;
4097
4098         rbtree_postorder_for_each_entry_safe(node, tmp,
4099                                              &rec->backref_tree, node) {
4100                 if (node->is_data)
4101                         continue;
4102                 if (!node->found_ref)
4103                         continue;
4104                 if (node->full_backref)
4105                         continue;
4106                 back = to_tree_backref(node);
4107                 if (btrfs_header_owner(buf) == back->root)
4108                         return 0;
4109         }
4110         BUG_ON(rec->is_root);
4111
4112         /* try to find the block by search corresponding fs tree */
4113         key.objectid = btrfs_header_owner(buf);
4114         key.type = BTRFS_ROOT_ITEM_KEY;
4115         key.offset = (u64)-1;
4116
4117         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4118         if (IS_ERR(ref_root))
4119                 return 1;
4120
4121         level = btrfs_header_level(buf);
4122         if (level == 0)
4123                 btrfs_item_key_to_cpu(buf, &key, 0);
4124         else
4125                 btrfs_node_key_to_cpu(buf, &key, 0);
4126
4127         btrfs_init_path(&path);
4128         path.lowest_level = level + 1;
4129         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4130         if (ret < 0)
4131                 return 0;
4132
4133         parent = path.nodes[level + 1];
4134         if (parent && buf->start == btrfs_node_blockptr(parent,
4135                                                         path.slots[level + 1]))
4136                 found = 1;
4137
4138         btrfs_release_path(&path);
4139         return found ? 0 : 1;
4140 }
4141
4142 static int is_extent_tree_record(struct extent_record *rec)
4143 {
4144         struct extent_backref *ref, *tmp;
4145         struct tree_backref *back;
4146         int is_extent = 0;
4147
4148         rbtree_postorder_for_each_entry_safe(ref, tmp,
4149                                              &rec->backref_tree, node) {
4150                 if (ref->is_data)
4151                         return 0;
4152                 back = to_tree_backref(ref);
4153                 if (ref->full_backref)
4154                         return 0;
4155                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4156                         is_extent = 1;
4157         }
4158         return is_extent;
4159 }
4160
4161
4162 static int record_bad_block_io(struct btrfs_fs_info *info,
4163                                struct cache_tree *extent_cache,
4164                                u64 start, u64 len)
4165 {
4166         struct extent_record *rec;
4167         struct cache_extent *cache;
4168         struct btrfs_key key;
4169
4170         cache = lookup_cache_extent(extent_cache, start, len);
4171         if (!cache)
4172                 return 0;
4173
4174         rec = container_of(cache, struct extent_record, cache);
4175         if (!is_extent_tree_record(rec))
4176                 return 0;
4177
4178         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4179         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4180 }
4181
4182 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4183                        struct extent_buffer *buf, int slot)
4184 {
4185         if (btrfs_header_level(buf)) {
4186                 struct btrfs_key_ptr ptr1, ptr2;
4187
4188                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4189                                    sizeof(struct btrfs_key_ptr));
4190                 read_extent_buffer(buf, &ptr2,
4191                                    btrfs_node_key_ptr_offset(slot + 1),
4192                                    sizeof(struct btrfs_key_ptr));
4193                 write_extent_buffer(buf, &ptr1,
4194                                     btrfs_node_key_ptr_offset(slot + 1),
4195                                     sizeof(struct btrfs_key_ptr));
4196                 write_extent_buffer(buf, &ptr2,
4197                                     btrfs_node_key_ptr_offset(slot),
4198                                     sizeof(struct btrfs_key_ptr));
4199                 if (slot == 0) {
4200                         struct btrfs_disk_key key;
4201                         btrfs_node_key(buf, &key, 0);
4202                         btrfs_fixup_low_keys(root, path, &key,
4203                                              btrfs_header_level(buf) + 1);
4204                 }
4205         } else {
4206                 struct btrfs_item *item1, *item2;
4207                 struct btrfs_key k1, k2;
4208                 char *item1_data, *item2_data;
4209                 u32 item1_offset, item2_offset, item1_size, item2_size;
4210
4211                 item1 = btrfs_item_nr(slot);
4212                 item2 = btrfs_item_nr(slot + 1);
4213                 btrfs_item_key_to_cpu(buf, &k1, slot);
4214                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4215                 item1_offset = btrfs_item_offset(buf, item1);
4216                 item2_offset = btrfs_item_offset(buf, item2);
4217                 item1_size = btrfs_item_size(buf, item1);
4218                 item2_size = btrfs_item_size(buf, item2);
4219
4220                 item1_data = malloc(item1_size);
4221                 if (!item1_data)
4222                         return -ENOMEM;
4223                 item2_data = malloc(item2_size);
4224                 if (!item2_data) {
4225                         free(item1_data);
4226                         return -ENOMEM;
4227                 }
4228
4229                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4230                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4231
4232                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4233                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4234                 free(item1_data);
4235                 free(item2_data);
4236
4237                 btrfs_set_item_offset(buf, item1, item2_offset);
4238                 btrfs_set_item_offset(buf, item2, item1_offset);
4239                 btrfs_set_item_size(buf, item1, item2_size);
4240                 btrfs_set_item_size(buf, item2, item1_size);
4241
4242                 path->slots[0] = slot;
4243                 btrfs_set_item_key_unsafe(root, path, &k2);
4244                 path->slots[0] = slot + 1;
4245                 btrfs_set_item_key_unsafe(root, path, &k1);
4246         }
4247         return 0;
4248 }
4249
4250 static int fix_key_order(struct btrfs_trans_handle *trans,
4251                          struct btrfs_root *root,
4252                          struct btrfs_path *path)
4253 {
4254         struct extent_buffer *buf;
4255         struct btrfs_key k1, k2;
4256         int i;
4257         int level = path->lowest_level;
4258         int ret = -EIO;
4259
4260         buf = path->nodes[level];
4261         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4262                 if (level) {
4263                         btrfs_node_key_to_cpu(buf, &k1, i);
4264                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4265                 } else {
4266                         btrfs_item_key_to_cpu(buf, &k1, i);
4267                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4268                 }
4269                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4270                         continue;
4271                 ret = swap_values(root, path, buf, i);
4272                 if (ret)
4273                         break;
4274                 btrfs_mark_buffer_dirty(buf);
4275                 i = 0;
4276         }
4277         return ret;
4278 }
4279
4280 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4281                              struct btrfs_root *root,
4282                              struct btrfs_path *path,
4283                              struct extent_buffer *buf, int slot)
4284 {
4285         struct btrfs_key key;
4286         int nritems = btrfs_header_nritems(buf);
4287
4288         btrfs_item_key_to_cpu(buf, &key, slot);
4289
4290         /* These are all the keys we can deal with missing. */
4291         if (key.type != BTRFS_DIR_INDEX_KEY &&
4292             key.type != BTRFS_EXTENT_ITEM_KEY &&
4293             key.type != BTRFS_METADATA_ITEM_KEY &&
4294             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4295             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4296                 return -1;
4297
4298         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4299                (unsigned long long)key.objectid, key.type,
4300                (unsigned long long)key.offset, slot, buf->start);
4301         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4302                               btrfs_item_nr_offset(slot + 1),
4303                               sizeof(struct btrfs_item) *
4304                               (nritems - slot - 1));
4305         btrfs_set_header_nritems(buf, nritems - 1);
4306         if (slot == 0) {
4307                 struct btrfs_disk_key disk_key;
4308
4309                 btrfs_item_key(buf, &disk_key, 0);
4310                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4311         }
4312         btrfs_mark_buffer_dirty(buf);
4313         return 0;
4314 }
4315
4316 static int fix_item_offset(struct btrfs_trans_handle *trans,
4317                            struct btrfs_root *root,
4318                            struct btrfs_path *path)
4319 {
4320         struct extent_buffer *buf;
4321         int i;
4322         int ret = 0;
4323
4324         /* We should only get this for leaves */
4325         BUG_ON(path->lowest_level);
4326         buf = path->nodes[0];
4327 again:
4328         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4329                 unsigned int shift = 0, offset;
4330
4331                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4332                     BTRFS_LEAF_DATA_SIZE(root)) {
4333                         if (btrfs_item_end_nr(buf, i) >
4334                             BTRFS_LEAF_DATA_SIZE(root)) {
4335                                 ret = delete_bogus_item(trans, root, path,
4336                                                         buf, i);
4337                                 if (!ret)
4338                                         goto again;
4339                                 fprintf(stderr, "item is off the end of the "
4340                                         "leaf, can't fix\n");
4341                                 ret = -EIO;
4342                                 break;
4343                         }
4344                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4345                                 btrfs_item_end_nr(buf, i);
4346                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4347                            btrfs_item_offset_nr(buf, i - 1)) {
4348                         if (btrfs_item_end_nr(buf, i) >
4349                             btrfs_item_offset_nr(buf, i - 1)) {
4350                                 ret = delete_bogus_item(trans, root, path,
4351                                                         buf, i);
4352                                 if (!ret)
4353                                         goto again;
4354                                 fprintf(stderr, "items overlap, can't fix\n");
4355                                 ret = -EIO;
4356                                 break;
4357                         }
4358                         shift = btrfs_item_offset_nr(buf, i - 1) -
4359                                 btrfs_item_end_nr(buf, i);
4360                 }
4361                 if (!shift)
4362                         continue;
4363
4364                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4365                        i, shift, (unsigned long long)buf->start);
4366                 offset = btrfs_item_offset_nr(buf, i);
4367                 memmove_extent_buffer(buf,
4368                                       btrfs_leaf_data(buf) + offset + shift,
4369                                       btrfs_leaf_data(buf) + offset,
4370                                       btrfs_item_size_nr(buf, i));
4371                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4372                                       offset + shift);
4373                 btrfs_mark_buffer_dirty(buf);
4374         }
4375
4376         /*
4377          * We may have moved things, in which case we want to exit so we don't
4378          * write those changes out.  Once we have proper abort functionality in
4379          * progs this can be changed to something nicer.
4380          */
4381         BUG_ON(ret);
4382         return ret;
4383 }
4384
4385 /*
4386  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4387  * then just return -EIO.
4388  */
4389 static int try_to_fix_bad_block(struct btrfs_root *root,
4390                                 struct extent_buffer *buf,
4391                                 enum btrfs_tree_block_status status)
4392 {
4393         struct btrfs_trans_handle *trans;
4394         struct ulist *roots;
4395         struct ulist_node *node;
4396         struct btrfs_root *search_root;
4397         struct btrfs_path *path;
4398         struct ulist_iterator iter;
4399         struct btrfs_key root_key, key;
4400         int ret;
4401
4402         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4403             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4404                 return -EIO;
4405
4406         path = btrfs_alloc_path();
4407         if (!path)
4408                 return -EIO;
4409
4410         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4411                                    0, &roots);
4412         if (ret) {
4413                 btrfs_free_path(path);
4414                 return -EIO;
4415         }
4416
4417         ULIST_ITER_INIT(&iter);
4418         while ((node = ulist_next(roots, &iter))) {
4419                 root_key.objectid = node->val;
4420                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4421                 root_key.offset = (u64)-1;
4422
4423                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4424                 if (IS_ERR(root)) {
4425                         ret = -EIO;
4426                         break;
4427                 }
4428
4429
4430                 trans = btrfs_start_transaction(search_root, 0);
4431                 if (IS_ERR(trans)) {
4432                         ret = PTR_ERR(trans);
4433                         break;
4434                 }
4435
4436                 path->lowest_level = btrfs_header_level(buf);
4437                 path->skip_check_block = 1;
4438                 if (path->lowest_level)
4439                         btrfs_node_key_to_cpu(buf, &key, 0);
4440                 else
4441                         btrfs_item_key_to_cpu(buf, &key, 0);
4442                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4443                 if (ret) {
4444                         ret = -EIO;
4445                         btrfs_commit_transaction(trans, search_root);
4446                         break;
4447                 }
4448                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4449                         ret = fix_key_order(trans, search_root, path);
4450                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4451                         ret = fix_item_offset(trans, search_root, path);
4452                 if (ret) {
4453                         btrfs_commit_transaction(trans, search_root);
4454                         break;
4455                 }
4456                 btrfs_release_path(path);
4457                 btrfs_commit_transaction(trans, search_root);
4458         }
4459         ulist_free(roots);
4460         btrfs_free_path(path);
4461         return ret;
4462 }
4463
4464 static int check_block(struct btrfs_root *root,
4465                        struct cache_tree *extent_cache,
4466                        struct extent_buffer *buf, u64 flags)
4467 {
4468         struct extent_record *rec;
4469         struct cache_extent *cache;
4470         struct btrfs_key key;
4471         enum btrfs_tree_block_status status;
4472         int ret = 0;
4473         int level;
4474
4475         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4476         if (!cache)
4477                 return 1;
4478         rec = container_of(cache, struct extent_record, cache);
4479         rec->generation = btrfs_header_generation(buf);
4480
4481         level = btrfs_header_level(buf);
4482         if (btrfs_header_nritems(buf) > 0) {
4483
4484                 if (level == 0)
4485                         btrfs_item_key_to_cpu(buf, &key, 0);
4486                 else
4487                         btrfs_node_key_to_cpu(buf, &key, 0);
4488
4489                 rec->info_objectid = key.objectid;
4490         }
4491         rec->info_level = level;
4492
4493         if (btrfs_is_leaf(buf))
4494                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4495         else
4496                 status = btrfs_check_node(root, &rec->parent_key, buf);
4497
4498         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4499                 if (repair)
4500                         status = try_to_fix_bad_block(root, buf, status);
4501                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4502                         ret = -EIO;
4503                         fprintf(stderr, "bad block %llu\n",
4504                                 (unsigned long long)buf->start);
4505                 } else {
4506                         /*
4507                          * Signal to callers we need to start the scan over
4508                          * again since we'll have cowed blocks.
4509                          */
4510                         ret = -EAGAIN;
4511                 }
4512         } else {
4513                 rec->content_checked = 1;
4514                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4515                         rec->owner_ref_checked = 1;
4516                 else {
4517                         ret = check_owner_ref(root, rec, buf);
4518                         if (!ret)
4519                                 rec->owner_ref_checked = 1;
4520                 }
4521         }
4522         if (!ret)
4523                 maybe_free_extent_rec(extent_cache, rec);
4524         return ret;
4525 }
4526
4527
4528 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4529                                                 u64 parent, u64 root)
4530 {
4531         struct rb_node *node;
4532         struct tree_backref *back = NULL;
4533         struct tree_backref match = {
4534                 .node = {
4535                         .is_data = 0,
4536                 },
4537         };
4538
4539         if (parent) {
4540                 match.parent = parent;
4541                 match.node.full_backref = 1;
4542         } else {
4543                 match.root = root;
4544         }
4545
4546         node = rb_search(&rec->backref_tree, &match.node.node,
4547                          (rb_compare_keys)compare_extent_backref, NULL);
4548         if (node)
4549                 back = to_tree_backref(rb_node_to_extent_backref(node));
4550
4551         return back;
4552 }
4553
4554 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4555                                                 u64 parent, u64 root)
4556 {
4557         struct tree_backref *ref = malloc(sizeof(*ref));
4558
4559         if (!ref)
4560                 return NULL;
4561         memset(&ref->node, 0, sizeof(ref->node));
4562         if (parent > 0) {
4563                 ref->parent = parent;
4564                 ref->node.full_backref = 1;
4565         } else {
4566                 ref->root = root;
4567                 ref->node.full_backref = 0;
4568         }
4569         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4570
4571         return ref;
4572 }
4573
4574 static struct data_backref *find_data_backref(struct extent_record *rec,
4575                                                 u64 parent, u64 root,
4576                                                 u64 owner, u64 offset,
4577                                                 int found_ref,
4578                                                 u64 disk_bytenr, u64 bytes)
4579 {
4580         struct rb_node *node;
4581         struct data_backref *back = NULL;
4582         struct data_backref match = {
4583                 .node = {
4584                         .is_data = 1,
4585                 },
4586                 .owner = owner,
4587                 .offset = offset,
4588                 .bytes = bytes,
4589                 .found_ref = found_ref,
4590                 .disk_bytenr = disk_bytenr,
4591         };
4592
4593         if (parent) {
4594                 match.parent = parent;
4595                 match.node.full_backref = 1;
4596         } else {
4597                 match.root = root;
4598         }
4599
4600         node = rb_search(&rec->backref_tree, &match.node.node,
4601                          (rb_compare_keys)compare_extent_backref, NULL);
4602         if (node)
4603                 back = to_data_backref(rb_node_to_extent_backref(node));
4604
4605         return back;
4606 }
4607
4608 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4609                                                 u64 parent, u64 root,
4610                                                 u64 owner, u64 offset,
4611                                                 u64 max_size)
4612 {
4613         struct data_backref *ref = malloc(sizeof(*ref));
4614
4615         if (!ref)
4616                 return NULL;
4617         memset(&ref->node, 0, sizeof(ref->node));
4618         ref->node.is_data = 1;
4619
4620         if (parent > 0) {
4621                 ref->parent = parent;
4622                 ref->owner = 0;
4623                 ref->offset = 0;
4624                 ref->node.full_backref = 1;
4625         } else {
4626                 ref->root = root;
4627                 ref->owner = owner;
4628                 ref->offset = offset;
4629                 ref->node.full_backref = 0;
4630         }
4631         ref->bytes = max_size;
4632         ref->found_ref = 0;
4633         ref->num_refs = 0;
4634         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4635         if (max_size > rec->max_size)
4636                 rec->max_size = max_size;
4637         return ref;
4638 }
4639
4640 /* Check if the type of extent matches with its chunk */
4641 static void check_extent_type(struct extent_record *rec)
4642 {
4643         struct btrfs_block_group_cache *bg_cache;
4644
4645         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4646         if (!bg_cache)
4647                 return;
4648
4649         /* data extent, check chunk directly*/
4650         if (!rec->metadata) {
4651                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4652                         rec->wrong_chunk_type = 1;
4653                 return;
4654         }
4655
4656         /* metadata extent, check the obvious case first */
4657         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4658                                  BTRFS_BLOCK_GROUP_METADATA))) {
4659                 rec->wrong_chunk_type = 1;
4660                 return;
4661         }
4662
4663         /*
4664          * Check SYSTEM extent, as it's also marked as metadata, we can only
4665          * make sure it's a SYSTEM extent by its backref
4666          */
4667         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4668                 struct extent_backref *node;
4669                 struct tree_backref *tback;
4670                 u64 bg_type;
4671
4672                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4673                 if (node->is_data) {
4674                         /* tree block shouldn't have data backref */
4675                         rec->wrong_chunk_type = 1;
4676                         return;
4677                 }
4678                 tback = container_of(node, struct tree_backref, node);
4679
4680                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4681                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4682                 else
4683                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4684                 if (!(bg_cache->flags & bg_type))
4685                         rec->wrong_chunk_type = 1;
4686         }
4687 }
4688
4689 /*
4690  * Allocate a new extent record, fill default values from @tmpl and insert int
4691  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4692  * the cache, otherwise it fails.
4693  */
4694 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4695                 struct extent_record *tmpl)
4696 {
4697         struct extent_record *rec;
4698         int ret = 0;
4699
4700         rec = malloc(sizeof(*rec));
4701         if (!rec)
4702                 return -ENOMEM;
4703         rec->start = tmpl->start;
4704         rec->max_size = tmpl->max_size;
4705         rec->nr = max(tmpl->nr, tmpl->max_size);
4706         rec->found_rec = tmpl->found_rec;
4707         rec->content_checked = tmpl->content_checked;
4708         rec->owner_ref_checked = tmpl->owner_ref_checked;
4709         rec->num_duplicates = 0;
4710         rec->metadata = tmpl->metadata;
4711         rec->flag_block_full_backref = FLAG_UNSET;
4712         rec->bad_full_backref = 0;
4713         rec->crossing_stripes = 0;
4714         rec->wrong_chunk_type = 0;
4715         rec->is_root = tmpl->is_root;
4716         rec->refs = tmpl->refs;
4717         rec->extent_item_refs = tmpl->extent_item_refs;
4718         rec->parent_generation = tmpl->parent_generation;
4719         INIT_LIST_HEAD(&rec->backrefs);
4720         INIT_LIST_HEAD(&rec->dups);
4721         INIT_LIST_HEAD(&rec->list);
4722         rec->backref_tree = RB_ROOT;
4723         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4724         rec->cache.start = tmpl->start;
4725         rec->cache.size = tmpl->nr;
4726         ret = insert_cache_extent(extent_cache, &rec->cache);
4727         BUG_ON(ret);
4728         bytes_used += rec->nr;
4729
4730         if (tmpl->metadata)
4731                 rec->crossing_stripes = check_crossing_stripes(rec->start,
4732                                 global_info->tree_root->nodesize);
4733         check_extent_type(rec);
4734         return ret;
4735 }
4736
4737 /*
4738  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4739  * some are hints:
4740  * - refs              - if found, increase refs
4741  * - is_root           - if found, set
4742  * - content_checked   - if found, set
4743  * - owner_ref_checked - if found, set
4744  *
4745  * If not found, create a new one, initialize and insert.
4746  */
4747 static int add_extent_rec(struct cache_tree *extent_cache,
4748                 struct extent_record *tmpl)
4749 {
4750         struct extent_record *rec;
4751         struct cache_extent *cache;
4752         int ret = 0;
4753         int dup = 0;
4754
4755         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4756         if (cache) {
4757                 rec = container_of(cache, struct extent_record, cache);
4758                 if (tmpl->refs)
4759                         rec->refs++;
4760                 if (rec->nr == 1)
4761                         rec->nr = max(tmpl->nr, tmpl->max_size);
4762
4763                 /*
4764                  * We need to make sure to reset nr to whatever the extent
4765                  * record says was the real size, this way we can compare it to
4766                  * the backrefs.
4767                  */
4768                 if (tmpl->found_rec) {
4769                         if (tmpl->start != rec->start || rec->found_rec) {
4770                                 struct extent_record *tmp;
4771
4772                                 dup = 1;
4773                                 if (list_empty(&rec->list))
4774                                         list_add_tail(&rec->list,
4775                                                       &duplicate_extents);
4776
4777                                 /*
4778                                  * We have to do this song and dance in case we
4779                                  * find an extent record that falls inside of
4780                                  * our current extent record but does not have
4781                                  * the same objectid.
4782                                  */
4783                                 tmp = malloc(sizeof(*tmp));
4784                                 if (!tmp)
4785                                         return -ENOMEM;
4786                                 tmp->start = tmpl->start;
4787                                 tmp->max_size = tmpl->max_size;
4788                                 tmp->nr = tmpl->nr;
4789                                 tmp->found_rec = 1;
4790                                 tmp->metadata = tmpl->metadata;
4791                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4792                                 INIT_LIST_HEAD(&tmp->list);
4793                                 list_add_tail(&tmp->list, &rec->dups);
4794                                 rec->num_duplicates++;
4795                         } else {
4796                                 rec->nr = tmpl->nr;
4797                                 rec->found_rec = 1;
4798                         }
4799                 }
4800
4801                 if (tmpl->extent_item_refs && !dup) {
4802                         if (rec->extent_item_refs) {
4803                                 fprintf(stderr, "block %llu rec "
4804                                         "extent_item_refs %llu, passed %llu\n",
4805                                         (unsigned long long)tmpl->start,
4806                                         (unsigned long long)
4807                                                         rec->extent_item_refs,
4808                                         (unsigned long long)tmpl->extent_item_refs);
4809                         }
4810                         rec->extent_item_refs = tmpl->extent_item_refs;
4811                 }
4812                 if (tmpl->is_root)
4813                         rec->is_root = 1;
4814                 if (tmpl->content_checked)
4815                         rec->content_checked = 1;
4816                 if (tmpl->owner_ref_checked)
4817                         rec->owner_ref_checked = 1;
4818                 memcpy(&rec->parent_key, &tmpl->parent_key,
4819                                 sizeof(tmpl->parent_key));
4820                 if (tmpl->parent_generation)
4821                         rec->parent_generation = tmpl->parent_generation;
4822                 if (rec->max_size < tmpl->max_size)
4823                         rec->max_size = tmpl->max_size;
4824
4825                 /*
4826                  * A metadata extent can't cross stripe_len boundary, otherwise
4827                  * kernel scrub won't be able to handle it.
4828                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4829                  * it.
4830                  */
4831                 if (tmpl->metadata)
4832                         rec->crossing_stripes = check_crossing_stripes(
4833                                 rec->start, global_info->tree_root->nodesize);
4834                 check_extent_type(rec);
4835                 maybe_free_extent_rec(extent_cache, rec);
4836                 return ret;
4837         }
4838
4839         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4840
4841         return ret;
4842 }
4843
4844 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4845                             u64 parent, u64 root, int found_ref)
4846 {
4847         struct extent_record *rec;
4848         struct tree_backref *back;
4849         struct cache_extent *cache;
4850
4851         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4852         if (!cache) {
4853                 struct extent_record tmpl;
4854
4855                 memset(&tmpl, 0, sizeof(tmpl));
4856                 tmpl.start = bytenr;
4857                 tmpl.nr = 1;
4858                 tmpl.metadata = 1;
4859
4860                 add_extent_rec_nolookup(extent_cache, &tmpl);
4861
4862                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4863                 if (!cache)
4864                         abort();
4865         }
4866
4867         rec = container_of(cache, struct extent_record, cache);
4868         if (rec->start != bytenr) {
4869                 abort();
4870         }
4871
4872         back = find_tree_backref(rec, parent, root);
4873         if (!back) {
4874                 back = alloc_tree_backref(rec, parent, root);
4875                 BUG_ON(!back);
4876         }
4877
4878         if (found_ref) {
4879                 if (back->node.found_ref) {
4880                         fprintf(stderr, "Extent back ref already exists "
4881                                 "for %llu parent %llu root %llu \n",
4882                                 (unsigned long long)bytenr,
4883                                 (unsigned long long)parent,
4884                                 (unsigned long long)root);
4885                 }
4886                 back->node.found_ref = 1;
4887         } else {
4888                 if (back->node.found_extent_tree) {
4889                         fprintf(stderr, "Extent back ref already exists "
4890                                 "for %llu parent %llu root %llu \n",
4891                                 (unsigned long long)bytenr,
4892                                 (unsigned long long)parent,
4893                                 (unsigned long long)root);
4894                 }
4895                 back->node.found_extent_tree = 1;
4896         }
4897         check_extent_type(rec);
4898         maybe_free_extent_rec(extent_cache, rec);
4899         return 0;
4900 }
4901
4902 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4903                             u64 parent, u64 root, u64 owner, u64 offset,
4904                             u32 num_refs, int found_ref, u64 max_size)
4905 {
4906         struct extent_record *rec;
4907         struct data_backref *back;
4908         struct cache_extent *cache;
4909
4910         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4911         if (!cache) {
4912                 struct extent_record tmpl;
4913
4914                 memset(&tmpl, 0, sizeof(tmpl));
4915                 tmpl.start = bytenr;
4916                 tmpl.nr = 1;
4917                 tmpl.max_size = max_size;
4918
4919                 add_extent_rec_nolookup(extent_cache, &tmpl);
4920
4921                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4922                 if (!cache)
4923                         abort();
4924         }
4925
4926         rec = container_of(cache, struct extent_record, cache);
4927         if (rec->max_size < max_size)
4928                 rec->max_size = max_size;
4929
4930         /*
4931          * If found_ref is set then max_size is the real size and must match the
4932          * existing refs.  So if we have already found a ref then we need to
4933          * make sure that this ref matches the existing one, otherwise we need
4934          * to add a new backref so we can notice that the backrefs don't match
4935          * and we need to figure out who is telling the truth.  This is to
4936          * account for that awful fsync bug I introduced where we'd end up with
4937          * a btrfs_file_extent_item that would have its length include multiple
4938          * prealloc extents or point inside of a prealloc extent.
4939          */
4940         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4941                                  bytenr, max_size);
4942         if (!back) {
4943                 back = alloc_data_backref(rec, parent, root, owner, offset,
4944                                           max_size);
4945                 BUG_ON(!back);
4946         }
4947
4948         if (found_ref) {
4949                 BUG_ON(num_refs != 1);
4950                 if (back->node.found_ref)
4951                         BUG_ON(back->bytes != max_size);
4952                 back->node.found_ref = 1;
4953                 back->found_ref += 1;
4954                 back->bytes = max_size;
4955                 back->disk_bytenr = bytenr;
4956                 rec->refs += 1;
4957                 rec->content_checked = 1;
4958                 rec->owner_ref_checked = 1;
4959         } else {
4960                 if (back->node.found_extent_tree) {
4961                         fprintf(stderr, "Extent back ref already exists "
4962                                 "for %llu parent %llu root %llu "
4963                                 "owner %llu offset %llu num_refs %lu\n",
4964                                 (unsigned long long)bytenr,
4965                                 (unsigned long long)parent,
4966                                 (unsigned long long)root,
4967                                 (unsigned long long)owner,
4968                                 (unsigned long long)offset,
4969                                 (unsigned long)num_refs);
4970                 }
4971                 back->num_refs = num_refs;
4972                 back->node.found_extent_tree = 1;
4973         }
4974         maybe_free_extent_rec(extent_cache, rec);
4975         return 0;
4976 }
4977
4978 static int add_pending(struct cache_tree *pending,
4979                        struct cache_tree *seen, u64 bytenr, u32 size)
4980 {
4981         int ret;
4982         ret = add_cache_extent(seen, bytenr, size);
4983         if (ret)
4984                 return ret;
4985         add_cache_extent(pending, bytenr, size);
4986         return 0;
4987 }
4988
4989 static int pick_next_pending(struct cache_tree *pending,
4990                         struct cache_tree *reada,
4991                         struct cache_tree *nodes,
4992                         u64 last, struct block_info *bits, int bits_nr,
4993                         int *reada_bits)
4994 {
4995         unsigned long node_start = last;
4996         struct cache_extent *cache;
4997         int ret;
4998
4999         cache = search_cache_extent(reada, 0);
5000         if (cache) {
5001                 bits[0].start = cache->start;
5002                 bits[0].size = cache->size;
5003                 *reada_bits = 1;
5004                 return 1;
5005         }
5006         *reada_bits = 0;
5007         if (node_start > 32768)
5008                 node_start -= 32768;
5009
5010         cache = search_cache_extent(nodes, node_start);
5011         if (!cache)
5012                 cache = search_cache_extent(nodes, 0);
5013
5014         if (!cache) {
5015                  cache = search_cache_extent(pending, 0);
5016                  if (!cache)
5017                          return 0;
5018                  ret = 0;
5019                  do {
5020                          bits[ret].start = cache->start;
5021                          bits[ret].size = cache->size;
5022                          cache = next_cache_extent(cache);
5023                          ret++;
5024                  } while (cache && ret < bits_nr);
5025                  return ret;
5026         }
5027
5028         ret = 0;
5029         do {
5030                 bits[ret].start = cache->start;
5031                 bits[ret].size = cache->size;
5032                 cache = next_cache_extent(cache);
5033                 ret++;
5034         } while (cache && ret < bits_nr);
5035
5036         if (bits_nr - ret > 8) {
5037                 u64 lookup = bits[0].start + bits[0].size;
5038                 struct cache_extent *next;
5039                 next = search_cache_extent(pending, lookup);
5040                 while(next) {
5041                         if (next->start - lookup > 32768)
5042                                 break;
5043                         bits[ret].start = next->start;
5044                         bits[ret].size = next->size;
5045                         lookup = next->start + next->size;
5046                         ret++;
5047                         if (ret == bits_nr)
5048                                 break;
5049                         next = next_cache_extent(next);
5050                         if (!next)
5051                                 break;
5052                 }
5053         }
5054         return ret;
5055 }
5056
5057 static void free_chunk_record(struct cache_extent *cache)
5058 {
5059         struct chunk_record *rec;
5060
5061         rec = container_of(cache, struct chunk_record, cache);
5062         list_del_init(&rec->list);
5063         list_del_init(&rec->dextents);
5064         free(rec);
5065 }
5066
5067 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5068 {
5069         cache_tree_free_extents(chunk_cache, free_chunk_record);
5070 }
5071
5072 static void free_device_record(struct rb_node *node)
5073 {
5074         struct device_record *rec;
5075
5076         rec = container_of(node, struct device_record, node);
5077         free(rec);
5078 }
5079
5080 FREE_RB_BASED_TREE(device_cache, free_device_record);
5081
5082 int insert_block_group_record(struct block_group_tree *tree,
5083                               struct block_group_record *bg_rec)
5084 {
5085         int ret;
5086
5087         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5088         if (ret)
5089                 return ret;
5090
5091         list_add_tail(&bg_rec->list, &tree->block_groups);
5092         return 0;
5093 }
5094
5095 static void free_block_group_record(struct cache_extent *cache)
5096 {
5097         struct block_group_record *rec;
5098
5099         rec = container_of(cache, struct block_group_record, cache);
5100         list_del_init(&rec->list);
5101         free(rec);
5102 }
5103
5104 void free_block_group_tree(struct block_group_tree *tree)
5105 {
5106         cache_tree_free_extents(&tree->tree, free_block_group_record);
5107 }
5108
5109 int insert_device_extent_record(struct device_extent_tree *tree,
5110                                 struct device_extent_record *de_rec)
5111 {
5112         int ret;
5113
5114         /*
5115          * Device extent is a bit different from the other extents, because
5116          * the extents which belong to the different devices may have the
5117          * same start and size, so we need use the special extent cache
5118          * search/insert functions.
5119          */
5120         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5121         if (ret)
5122                 return ret;
5123
5124         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5125         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5126         return 0;
5127 }
5128
5129 static void free_device_extent_record(struct cache_extent *cache)
5130 {
5131         struct device_extent_record *rec;
5132
5133         rec = container_of(cache, struct device_extent_record, cache);
5134         if (!list_empty(&rec->chunk_list))
5135                 list_del_init(&rec->chunk_list);
5136         if (!list_empty(&rec->device_list))
5137                 list_del_init(&rec->device_list);
5138         free(rec);
5139 }
5140
5141 void free_device_extent_tree(struct device_extent_tree *tree)
5142 {
5143         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5144 }
5145
5146 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5147 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5148                                  struct extent_buffer *leaf, int slot)
5149 {
5150         struct btrfs_extent_ref_v0 *ref0;
5151         struct btrfs_key key;
5152
5153         btrfs_item_key_to_cpu(leaf, &key, slot);
5154         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5155         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5156                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5157         } else {
5158                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5159                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5160         }
5161         return 0;
5162 }
5163 #endif
5164
5165 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5166                                             struct btrfs_key *key,
5167                                             int slot)
5168 {
5169         struct btrfs_chunk *ptr;
5170         struct chunk_record *rec;
5171         int num_stripes, i;
5172
5173         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5174         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5175
5176         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5177         if (!rec) {
5178                 fprintf(stderr, "memory allocation failed\n");
5179                 exit(-1);
5180         }
5181
5182         INIT_LIST_HEAD(&rec->list);
5183         INIT_LIST_HEAD(&rec->dextents);
5184         rec->bg_rec = NULL;
5185
5186         rec->cache.start = key->offset;
5187         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5188
5189         rec->generation = btrfs_header_generation(leaf);
5190
5191         rec->objectid = key->objectid;
5192         rec->type = key->type;
5193         rec->offset = key->offset;
5194
5195         rec->length = rec->cache.size;
5196         rec->owner = btrfs_chunk_owner(leaf, ptr);
5197         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5198         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5199         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5200         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5201         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5202         rec->num_stripes = num_stripes;
5203         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5204
5205         for (i = 0; i < rec->num_stripes; ++i) {
5206                 rec->stripes[i].devid =
5207                         btrfs_stripe_devid_nr(leaf, ptr, i);
5208                 rec->stripes[i].offset =
5209                         btrfs_stripe_offset_nr(leaf, ptr, i);
5210                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5211                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5212                                 BTRFS_UUID_SIZE);
5213         }
5214
5215         return rec;
5216 }
5217
5218 static int process_chunk_item(struct cache_tree *chunk_cache,
5219                               struct btrfs_key *key, struct extent_buffer *eb,
5220                               int slot)
5221 {
5222         struct chunk_record *rec;
5223         struct btrfs_chunk *chunk;
5224         int ret = 0;
5225
5226         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5227         /*
5228          * Do extra check for this chunk item,
5229          *
5230          * It's still possible one can craft a leaf with CHUNK_ITEM, with
5231          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5232          * and owner<->key_type check.
5233          */
5234         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5235                                       key->offset);
5236         if (ret < 0) {
5237                 error("chunk(%llu, %llu) is not valid, ignore it",
5238                       key->offset, btrfs_chunk_length(eb, chunk));
5239                 return 0;
5240         }
5241         rec = btrfs_new_chunk_record(eb, key, slot);
5242         ret = insert_cache_extent(chunk_cache, &rec->cache);
5243         if (ret) {
5244                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5245                         rec->offset, rec->length);
5246                 free(rec);
5247         }
5248
5249         return ret;
5250 }
5251
5252 static int process_device_item(struct rb_root *dev_cache,
5253                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5254 {
5255         struct btrfs_dev_item *ptr;
5256         struct device_record *rec;
5257         int ret = 0;
5258
5259         ptr = btrfs_item_ptr(eb,
5260                 slot, struct btrfs_dev_item);
5261
5262         rec = malloc(sizeof(*rec));
5263         if (!rec) {
5264                 fprintf(stderr, "memory allocation failed\n");
5265                 return -ENOMEM;
5266         }
5267
5268         rec->devid = key->offset;
5269         rec->generation = btrfs_header_generation(eb);
5270
5271         rec->objectid = key->objectid;
5272         rec->type = key->type;
5273         rec->offset = key->offset;
5274
5275         rec->devid = btrfs_device_id(eb, ptr);
5276         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5277         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5278
5279         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5280         if (ret) {
5281                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5282                 free(rec);
5283         }
5284
5285         return ret;
5286 }
5287
5288 struct block_group_record *
5289 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5290                              int slot)
5291 {
5292         struct btrfs_block_group_item *ptr;
5293         struct block_group_record *rec;
5294
5295         rec = calloc(1, sizeof(*rec));
5296         if (!rec) {
5297                 fprintf(stderr, "memory allocation failed\n");
5298                 exit(-1);
5299         }
5300
5301         rec->cache.start = key->objectid;
5302         rec->cache.size = key->offset;
5303
5304         rec->generation = btrfs_header_generation(leaf);
5305
5306         rec->objectid = key->objectid;
5307         rec->type = key->type;
5308         rec->offset = key->offset;
5309
5310         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5311         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5312
5313         INIT_LIST_HEAD(&rec->list);
5314
5315         return rec;
5316 }
5317
5318 static int process_block_group_item(struct block_group_tree *block_group_cache,
5319                                     struct btrfs_key *key,
5320                                     struct extent_buffer *eb, int slot)
5321 {
5322         struct block_group_record *rec;
5323         int ret = 0;
5324
5325         rec = btrfs_new_block_group_record(eb, key, slot);
5326         ret = insert_block_group_record(block_group_cache, rec);
5327         if (ret) {
5328                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5329                         rec->objectid, rec->offset);
5330                 free(rec);
5331         }
5332
5333         return ret;
5334 }
5335
5336 struct device_extent_record *
5337 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5338                                struct btrfs_key *key, int slot)
5339 {
5340         struct device_extent_record *rec;
5341         struct btrfs_dev_extent *ptr;
5342
5343         rec = calloc(1, sizeof(*rec));
5344         if (!rec) {
5345                 fprintf(stderr, "memory allocation failed\n");
5346                 exit(-1);
5347         }
5348
5349         rec->cache.objectid = key->objectid;
5350         rec->cache.start = key->offset;
5351
5352         rec->generation = btrfs_header_generation(leaf);
5353
5354         rec->objectid = key->objectid;
5355         rec->type = key->type;
5356         rec->offset = key->offset;
5357
5358         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5359         rec->chunk_objecteid =
5360                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5361         rec->chunk_offset =
5362                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5363         rec->length = btrfs_dev_extent_length(leaf, ptr);
5364         rec->cache.size = rec->length;
5365
5366         INIT_LIST_HEAD(&rec->chunk_list);
5367         INIT_LIST_HEAD(&rec->device_list);
5368
5369         return rec;
5370 }
5371
5372 static int
5373 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5374                            struct btrfs_key *key, struct extent_buffer *eb,
5375                            int slot)
5376 {
5377         struct device_extent_record *rec;
5378         int ret;
5379
5380         rec = btrfs_new_device_extent_record(eb, key, slot);
5381         ret = insert_device_extent_record(dev_extent_cache, rec);
5382         if (ret) {
5383                 fprintf(stderr,
5384                         "Device extent[%llu, %llu, %llu] existed.\n",
5385                         rec->objectid, rec->offset, rec->length);
5386                 free(rec);
5387         }
5388
5389         return ret;
5390 }
5391
5392 static int process_extent_item(struct btrfs_root *root,
5393                                struct cache_tree *extent_cache,
5394                                struct extent_buffer *eb, int slot)
5395 {
5396         struct btrfs_extent_item *ei;
5397         struct btrfs_extent_inline_ref *iref;
5398         struct btrfs_extent_data_ref *dref;
5399         struct btrfs_shared_data_ref *sref;
5400         struct btrfs_key key;
5401         struct extent_record tmpl;
5402         unsigned long end;
5403         unsigned long ptr;
5404         int type;
5405         u32 item_size = btrfs_item_size_nr(eb, slot);
5406         u64 refs = 0;
5407         u64 offset;
5408         u64 num_bytes;
5409         int metadata = 0;
5410
5411         btrfs_item_key_to_cpu(eb, &key, slot);
5412
5413         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5414                 metadata = 1;
5415                 num_bytes = root->nodesize;
5416         } else {
5417                 num_bytes = key.offset;
5418         }
5419
5420         if (item_size < sizeof(*ei)) {
5421 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5422                 struct btrfs_extent_item_v0 *ei0;
5423                 BUG_ON(item_size != sizeof(*ei0));
5424                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5425                 refs = btrfs_extent_refs_v0(eb, ei0);
5426 #else
5427                 BUG();
5428 #endif
5429                 memset(&tmpl, 0, sizeof(tmpl));
5430                 tmpl.start = key.objectid;
5431                 tmpl.nr = num_bytes;
5432                 tmpl.extent_item_refs = refs;
5433                 tmpl.metadata = metadata;
5434                 tmpl.found_rec = 1;
5435                 tmpl.max_size = num_bytes;
5436
5437                 return add_extent_rec(extent_cache, &tmpl);
5438         }
5439
5440         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5441         refs = btrfs_extent_refs(eb, ei);
5442         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5443                 metadata = 1;
5444         else
5445                 metadata = 0;
5446
5447         memset(&tmpl, 0, sizeof(tmpl));
5448         tmpl.start = key.objectid;
5449         tmpl.nr = num_bytes;
5450         tmpl.extent_item_refs = refs;
5451         tmpl.metadata = metadata;
5452         tmpl.found_rec = 1;
5453         tmpl.max_size = num_bytes;
5454         add_extent_rec(extent_cache, &tmpl);
5455
5456         ptr = (unsigned long)(ei + 1);
5457         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5458             key.type == BTRFS_EXTENT_ITEM_KEY)
5459                 ptr += sizeof(struct btrfs_tree_block_info);
5460
5461         end = (unsigned long)ei + item_size;
5462         while (ptr < end) {
5463                 iref = (struct btrfs_extent_inline_ref *)ptr;
5464                 type = btrfs_extent_inline_ref_type(eb, iref);
5465                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5466                 switch (type) {
5467                 case BTRFS_TREE_BLOCK_REF_KEY:
5468                         add_tree_backref(extent_cache, key.objectid,
5469                                          0, offset, 0);
5470                         break;
5471                 case BTRFS_SHARED_BLOCK_REF_KEY:
5472                         add_tree_backref(extent_cache, key.objectid,
5473                                          offset, 0, 0);
5474                         break;
5475                 case BTRFS_EXTENT_DATA_REF_KEY:
5476                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5477                         add_data_backref(extent_cache, key.objectid, 0,
5478                                         btrfs_extent_data_ref_root(eb, dref),
5479                                         btrfs_extent_data_ref_objectid(eb,
5480                                                                        dref),
5481                                         btrfs_extent_data_ref_offset(eb, dref),
5482                                         btrfs_extent_data_ref_count(eb, dref),
5483                                         0, num_bytes);
5484                         break;
5485                 case BTRFS_SHARED_DATA_REF_KEY:
5486                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5487                         add_data_backref(extent_cache, key.objectid, offset,
5488                                         0, 0, 0,
5489                                         btrfs_shared_data_ref_count(eb, sref),
5490                                         0, num_bytes);
5491                         break;
5492                 default:
5493                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5494                                 key.objectid, key.type, num_bytes);
5495                         goto out;
5496                 }
5497                 ptr += btrfs_extent_inline_ref_size(type);
5498         }
5499         WARN_ON(ptr > end);
5500 out:
5501         return 0;
5502 }
5503
5504 static int check_cache_range(struct btrfs_root *root,
5505                              struct btrfs_block_group_cache *cache,
5506                              u64 offset, u64 bytes)
5507 {
5508         struct btrfs_free_space *entry;
5509         u64 *logical;
5510         u64 bytenr;
5511         int stripe_len;
5512         int i, nr, ret;
5513
5514         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5515                 bytenr = btrfs_sb_offset(i);
5516                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5517                                        cache->key.objectid, bytenr, 0,
5518                                        &logical, &nr, &stripe_len);
5519                 if (ret)
5520                         return ret;
5521
5522                 while (nr--) {
5523                         if (logical[nr] + stripe_len <= offset)
5524                                 continue;
5525                         if (offset + bytes <= logical[nr])
5526                                 continue;
5527                         if (logical[nr] == offset) {
5528                                 if (stripe_len >= bytes) {
5529                                         kfree(logical);
5530                                         return 0;
5531                                 }
5532                                 bytes -= stripe_len;
5533                                 offset += stripe_len;
5534                         } else if (logical[nr] < offset) {
5535                                 if (logical[nr] + stripe_len >=
5536                                     offset + bytes) {
5537                                         kfree(logical);
5538                                         return 0;
5539                                 }
5540                                 bytes = (offset + bytes) -
5541                                         (logical[nr] + stripe_len);
5542                                 offset = logical[nr] + stripe_len;
5543                         } else {
5544                                 /*
5545                                  * Could be tricky, the super may land in the
5546                                  * middle of the area we're checking.  First
5547                                  * check the easiest case, it's at the end.
5548                                  */
5549                                 if (logical[nr] + stripe_len >=
5550                                     bytes + offset) {
5551                                         bytes = logical[nr] - offset;
5552                                         continue;
5553                                 }
5554
5555                                 /* Check the left side */
5556                                 ret = check_cache_range(root, cache,
5557                                                         offset,
5558                                                         logical[nr] - offset);
5559                                 if (ret) {
5560                                         kfree(logical);
5561                                         return ret;
5562                                 }
5563
5564                                 /* Now we continue with the right side */
5565                                 bytes = (offset + bytes) -
5566                                         (logical[nr] + stripe_len);
5567                                 offset = logical[nr] + stripe_len;
5568                         }
5569                 }
5570
5571                 kfree(logical);
5572         }
5573
5574         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5575         if (!entry) {
5576                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5577                         offset, offset+bytes);
5578                 return -EINVAL;
5579         }
5580
5581         if (entry->offset != offset) {
5582                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5583                         entry->offset);
5584                 return -EINVAL;
5585         }
5586
5587         if (entry->bytes != bytes) {
5588                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5589                         bytes, entry->bytes, offset);
5590                 return -EINVAL;
5591         }
5592
5593         unlink_free_space(cache->free_space_ctl, entry);
5594         free(entry);
5595         return 0;
5596 }
5597
5598 static int verify_space_cache(struct btrfs_root *root,
5599                               struct btrfs_block_group_cache *cache)
5600 {
5601         struct btrfs_path *path;
5602         struct extent_buffer *leaf;
5603         struct btrfs_key key;
5604         u64 last;
5605         int ret = 0;
5606
5607         path = btrfs_alloc_path();
5608         if (!path)
5609                 return -ENOMEM;
5610
5611         root = root->fs_info->extent_root;
5612
5613         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5614
5615         key.objectid = last;
5616         key.offset = 0;
5617         key.type = BTRFS_EXTENT_ITEM_KEY;
5618
5619         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5620         if (ret < 0)
5621                 goto out;
5622         ret = 0;
5623         while (1) {
5624                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5625                         ret = btrfs_next_leaf(root, path);
5626                         if (ret < 0)
5627                                 goto out;
5628                         if (ret > 0) {
5629                                 ret = 0;
5630                                 break;
5631                         }
5632                 }
5633                 leaf = path->nodes[0];
5634                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5635                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5636                         break;
5637                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5638                     key.type != BTRFS_METADATA_ITEM_KEY) {
5639                         path->slots[0]++;
5640                         continue;
5641                 }
5642
5643                 if (last == key.objectid) {
5644                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5645                                 last = key.objectid + key.offset;
5646                         else
5647                                 last = key.objectid + root->nodesize;
5648                         path->slots[0]++;
5649                         continue;
5650                 }
5651
5652                 ret = check_cache_range(root, cache, last,
5653                                         key.objectid - last);
5654                 if (ret)
5655                         break;
5656                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5657                         last = key.objectid + key.offset;
5658                 else
5659                         last = key.objectid + root->nodesize;
5660                 path->slots[0]++;
5661         }
5662
5663         if (last < cache->key.objectid + cache->key.offset)
5664                 ret = check_cache_range(root, cache, last,
5665                                         cache->key.objectid +
5666                                         cache->key.offset - last);
5667
5668 out:
5669         btrfs_free_path(path);
5670
5671         if (!ret &&
5672             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5673                 fprintf(stderr, "There are still entries left in the space "
5674                         "cache\n");
5675                 ret = -EINVAL;
5676         }
5677
5678         return ret;
5679 }
5680
5681 static int check_space_cache(struct btrfs_root *root)
5682 {
5683         struct btrfs_block_group_cache *cache;
5684         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5685         int ret;
5686         int error = 0;
5687
5688         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5689             btrfs_super_generation(root->fs_info->super_copy) !=
5690             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5691                 printf("cache and super generation don't match, space cache "
5692                        "will be invalidated\n");
5693                 return 0;
5694         }
5695
5696         if (ctx.progress_enabled) {
5697                 ctx.tp = TASK_FREE_SPACE;
5698                 task_start(ctx.info);
5699         }
5700
5701         while (1) {
5702                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5703                 if (!cache)
5704                         break;
5705
5706                 start = cache->key.objectid + cache->key.offset;
5707                 if (!cache->free_space_ctl) {
5708                         if (btrfs_init_free_space_ctl(cache,
5709                                                       root->sectorsize)) {
5710                                 ret = -ENOMEM;
5711                                 break;
5712                         }
5713                 } else {
5714                         btrfs_remove_free_space_cache(cache);
5715                 }
5716
5717                 if (btrfs_fs_compat_ro(root->fs_info,
5718                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5719                         ret = exclude_super_stripes(root, cache);
5720                         if (ret) {
5721                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5722                                         strerror(-ret));
5723                                 error++;
5724                                 continue;
5725                         }
5726                         ret = load_free_space_tree(root->fs_info, cache);
5727                         free_excluded_extents(root, cache);
5728                         if (ret < 0) {
5729                                 fprintf(stderr, "could not load free space tree: %s\n",
5730                                         strerror(-ret));
5731                                 error++;
5732                                 continue;
5733                         }
5734                         error += ret;
5735                 } else {
5736                         ret = load_free_space_cache(root->fs_info, cache);
5737                         if (!ret)
5738                                 continue;
5739                 }
5740
5741                 ret = verify_space_cache(root, cache);
5742                 if (ret) {
5743                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5744                                 cache->key.objectid);
5745                         error++;
5746                 }
5747         }
5748
5749         task_stop(ctx.info);
5750
5751         return error ? -EINVAL : 0;
5752 }
5753
5754 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5755                         u64 num_bytes, unsigned long leaf_offset,
5756                         struct extent_buffer *eb) {
5757
5758         u64 offset = 0;
5759         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5760         char *data;
5761         unsigned long csum_offset;
5762         u32 csum;
5763         u32 csum_expected;
5764         u64 read_len;
5765         u64 data_checked = 0;
5766         u64 tmp;
5767         int ret = 0;
5768         int mirror;
5769         int num_copies;
5770
5771         if (num_bytes % root->sectorsize)
5772                 return -EINVAL;
5773
5774         data = malloc(num_bytes);
5775         if (!data)
5776                 return -ENOMEM;
5777
5778         while (offset < num_bytes) {
5779                 mirror = 0;
5780 again:
5781                 read_len = num_bytes - offset;
5782                 /* read as much space once a time */
5783                 ret = read_extent_data(root, data + offset,
5784                                 bytenr + offset, &read_len, mirror);
5785                 if (ret)
5786                         goto out;
5787                 data_checked = 0;
5788                 /* verify every 4k data's checksum */
5789                 while (data_checked < read_len) {
5790                         csum = ~(u32)0;
5791                         tmp = offset + data_checked;
5792
5793                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5794                                                csum, root->sectorsize);
5795                         btrfs_csum_final(csum, (char *)&csum);
5796
5797                         csum_offset = leaf_offset +
5798                                  tmp / root->sectorsize * csum_size;
5799                         read_extent_buffer(eb, (char *)&csum_expected,
5800                                            csum_offset, csum_size);
5801                         /* try another mirror */
5802                         if (csum != csum_expected) {
5803                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5804                                                 mirror, bytenr + tmp,
5805                                                 csum, csum_expected);
5806                                 num_copies = btrfs_num_copies(
5807                                                 &root->fs_info->mapping_tree,
5808                                                 bytenr, num_bytes);
5809                                 if (mirror < num_copies - 1) {
5810                                         mirror += 1;
5811                                         goto again;
5812                                 }
5813                         }
5814                         data_checked += root->sectorsize;
5815                 }
5816                 offset += read_len;
5817         }
5818 out:
5819         free(data);
5820         return ret;
5821 }
5822
5823 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5824                                u64 num_bytes)
5825 {
5826         struct btrfs_path *path;
5827         struct extent_buffer *leaf;
5828         struct btrfs_key key;
5829         int ret;
5830
5831         path = btrfs_alloc_path();
5832         if (!path) {
5833                 fprintf(stderr, "Error allocating path\n");
5834                 return -ENOMEM;
5835         }
5836
5837         key.objectid = bytenr;
5838         key.type = BTRFS_EXTENT_ITEM_KEY;
5839         key.offset = (u64)-1;
5840
5841 again:
5842         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5843                                 0, 0);
5844         if (ret < 0) {
5845                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5846                 btrfs_free_path(path);
5847                 return ret;
5848         } else if (ret) {
5849                 if (path->slots[0] > 0) {
5850                         path->slots[0]--;
5851                 } else {
5852                         ret = btrfs_prev_leaf(root, path);
5853                         if (ret < 0) {
5854                                 goto out;
5855                         } else if (ret > 0) {
5856                                 ret = 0;
5857                                 goto out;
5858                         }
5859                 }
5860         }
5861
5862         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5863
5864         /*
5865          * Block group items come before extent items if they have the same
5866          * bytenr, so walk back one more just in case.  Dear future traveller,
5867          * first congrats on mastering time travel.  Now if it's not too much
5868          * trouble could you go back to 2006 and tell Chris to make the
5869          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5870          * EXTENT_ITEM_KEY please?
5871          */
5872         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5873                 if (path->slots[0] > 0) {
5874                         path->slots[0]--;
5875                 } else {
5876                         ret = btrfs_prev_leaf(root, path);
5877                         if (ret < 0) {
5878                                 goto out;
5879                         } else if (ret > 0) {
5880                                 ret = 0;
5881                                 goto out;
5882                         }
5883                 }
5884                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5885         }
5886
5887         while (num_bytes) {
5888                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5889                         ret = btrfs_next_leaf(root, path);
5890                         if (ret < 0) {
5891                                 fprintf(stderr, "Error going to next leaf "
5892                                         "%d\n", ret);
5893                                 btrfs_free_path(path);
5894                                 return ret;
5895                         } else if (ret) {
5896                                 break;
5897                         }
5898                 }
5899                 leaf = path->nodes[0];
5900                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5901                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5902                         path->slots[0]++;
5903                         continue;
5904                 }
5905                 if (key.objectid + key.offset < bytenr) {
5906                         path->slots[0]++;
5907                         continue;
5908                 }
5909                 if (key.objectid > bytenr + num_bytes)
5910                         break;
5911
5912                 if (key.objectid == bytenr) {
5913                         if (key.offset >= num_bytes) {
5914                                 num_bytes = 0;
5915                                 break;
5916                         }
5917                         num_bytes -= key.offset;
5918                         bytenr += key.offset;
5919                 } else if (key.objectid < bytenr) {
5920                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5921                                 num_bytes = 0;
5922                                 break;
5923                         }
5924                         num_bytes = (bytenr + num_bytes) -
5925                                 (key.objectid + key.offset);
5926                         bytenr = key.objectid + key.offset;
5927                 } else {
5928                         if (key.objectid + key.offset < bytenr + num_bytes) {
5929                                 u64 new_start = key.objectid + key.offset;
5930                                 u64 new_bytes = bytenr + num_bytes - new_start;
5931
5932                                 /*
5933                                  * Weird case, the extent is in the middle of
5934                                  * our range, we'll have to search one side
5935                                  * and then the other.  Not sure if this happens
5936                                  * in real life, but no harm in coding it up
5937                                  * anyway just in case.
5938                                  */
5939                                 btrfs_release_path(path);
5940                                 ret = check_extent_exists(root, new_start,
5941                                                           new_bytes);
5942                                 if (ret) {
5943                                         fprintf(stderr, "Right section didn't "
5944                                                 "have a record\n");
5945                                         break;
5946                                 }
5947                                 num_bytes = key.objectid - bytenr;
5948                                 goto again;
5949                         }
5950                         num_bytes = key.objectid - bytenr;
5951                 }
5952                 path->slots[0]++;
5953         }
5954         ret = 0;
5955
5956 out:
5957         if (num_bytes && !ret) {
5958                 fprintf(stderr, "There are no extents for csum range "
5959                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5960                 ret = 1;
5961         }
5962
5963         btrfs_free_path(path);
5964         return ret;
5965 }
5966
5967 static int check_csums(struct btrfs_root *root)
5968 {
5969         struct btrfs_path *path;
5970         struct extent_buffer *leaf;
5971         struct btrfs_key key;
5972         u64 offset = 0, num_bytes = 0;
5973         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5974         int errors = 0;
5975         int ret;
5976         u64 data_len;
5977         unsigned long leaf_offset;
5978
5979         root = root->fs_info->csum_root;
5980         if (!extent_buffer_uptodate(root->node)) {
5981                 fprintf(stderr, "No valid csum tree found\n");
5982                 return -ENOENT;
5983         }
5984
5985         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5986         key.type = BTRFS_EXTENT_CSUM_KEY;
5987         key.offset = 0;
5988
5989         path = btrfs_alloc_path();
5990         if (!path)
5991                 return -ENOMEM;
5992
5993         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5994         if (ret < 0) {
5995                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5996                 btrfs_free_path(path);
5997                 return ret;
5998         }
5999
6000         if (ret > 0 && path->slots[0])
6001                 path->slots[0]--;
6002         ret = 0;
6003
6004         while (1) {
6005                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6006                         ret = btrfs_next_leaf(root, path);
6007                         if (ret < 0) {
6008                                 fprintf(stderr, "Error going to next leaf "
6009                                         "%d\n", ret);
6010                                 break;
6011                         }
6012                         if (ret)
6013                                 break;
6014                 }
6015                 leaf = path->nodes[0];
6016
6017                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6018                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
6019                         path->slots[0]++;
6020                         continue;
6021                 }
6022
6023                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
6024                               csum_size) * root->sectorsize;
6025                 if (!check_data_csum)
6026                         goto skip_csum_check;
6027                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
6028                 ret = check_extent_csums(root, key.offset, data_len,
6029                                          leaf_offset, leaf);
6030                 if (ret)
6031                         break;
6032 skip_csum_check:
6033                 if (!num_bytes) {
6034                         offset = key.offset;
6035                 } else if (key.offset != offset + num_bytes) {
6036                         ret = check_extent_exists(root, offset, num_bytes);
6037                         if (ret) {
6038                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6039                                         "there is no extent record\n",
6040                                         offset, offset+num_bytes);
6041                                 errors++;
6042                         }
6043                         offset = key.offset;
6044                         num_bytes = 0;
6045                 }
6046                 num_bytes += data_len;
6047                 path->slots[0]++;
6048         }
6049
6050         btrfs_free_path(path);
6051         return errors;
6052 }
6053
6054 static int is_dropped_key(struct btrfs_key *key,
6055                           struct btrfs_key *drop_key) {
6056         if (key->objectid < drop_key->objectid)
6057                 return 1;
6058         else if (key->objectid == drop_key->objectid) {
6059                 if (key->type < drop_key->type)
6060                         return 1;
6061                 else if (key->type == drop_key->type) {
6062                         if (key->offset < drop_key->offset)
6063                                 return 1;
6064                 }
6065         }
6066         return 0;
6067 }
6068
6069 /*
6070  * Here are the rules for FULL_BACKREF.
6071  *
6072  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6073  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6074  *      FULL_BACKREF set.
6075  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6076  *    if it happened after the relocation occurred since we'll have dropped the
6077  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6078  *    have no real way to know for sure.
6079  *
6080  * We process the blocks one root at a time, and we start from the lowest root
6081  * objectid and go to the highest.  So we can just lookup the owner backref for
6082  * the record and if we don't find it then we know it doesn't exist and we have
6083  * a FULL BACKREF.
6084  *
6085  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6086  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6087  * be set or not and then we can check later once we've gathered all the refs.
6088  */
6089 static int calc_extent_flag(struct btrfs_root *root,
6090                            struct cache_tree *extent_cache,
6091                            struct extent_buffer *buf,
6092                            struct root_item_record *ri,
6093                            u64 *flags)
6094 {
6095         struct extent_record *rec;
6096         struct cache_extent *cache;
6097         struct tree_backref *tback;
6098         u64 owner = 0;
6099
6100         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6101         /* we have added this extent before */
6102         BUG_ON(!cache);
6103         rec = container_of(cache, struct extent_record, cache);
6104
6105         /*
6106          * Except file/reloc tree, we can not have
6107          * FULL BACKREF MODE
6108          */
6109         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6110                 goto normal;
6111         /*
6112          * root node
6113          */
6114         if (buf->start == ri->bytenr)
6115                 goto normal;
6116
6117         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6118                 goto full_backref;
6119
6120         owner = btrfs_header_owner(buf);
6121         if (owner == ri->objectid)
6122                 goto normal;
6123
6124         tback = find_tree_backref(rec, 0, owner);
6125         if (!tback)
6126                 goto full_backref;
6127 normal:
6128         *flags = 0;
6129         if (rec->flag_block_full_backref != FLAG_UNSET &&
6130             rec->flag_block_full_backref != 0)
6131                 rec->bad_full_backref = 1;
6132         return 0;
6133 full_backref:
6134         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6135         if (rec->flag_block_full_backref != FLAG_UNSET &&
6136             rec->flag_block_full_backref != 1)
6137                 rec->bad_full_backref = 1;
6138         return 0;
6139 }
6140
6141 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6142 {
6143         fprintf(stderr, "Invalid key type(");
6144         print_key_type(stderr, 0, key_type);
6145         fprintf(stderr, ") found in root(");
6146         print_objectid(stderr, rootid, 0);
6147         fprintf(stderr, ")\n");
6148 }
6149
6150 /*
6151  * Check if the key is valid with its extent buffer.
6152  *
6153  * This is a early check in case invalid key exists in a extent buffer
6154  * This is not comprehensive yet, but should prevent wrong key/item passed
6155  * further
6156  */
6157 static int check_type_with_root(u64 rootid, u8 key_type)
6158 {
6159         switch (key_type) {
6160         /* Only valid in chunk tree */
6161         case BTRFS_DEV_ITEM_KEY:
6162         case BTRFS_CHUNK_ITEM_KEY:
6163                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6164                         goto err;
6165                 break;
6166         /* valid in csum and log tree */
6167         case BTRFS_CSUM_TREE_OBJECTID:
6168                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6169                       is_fstree(rootid)))
6170                         goto err;
6171                 break;
6172         case BTRFS_EXTENT_ITEM_KEY:
6173         case BTRFS_METADATA_ITEM_KEY:
6174         case BTRFS_BLOCK_GROUP_ITEM_KEY:
6175                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6176                         goto err;
6177                 break;
6178         case BTRFS_ROOT_ITEM_KEY:
6179                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6180                         goto err;
6181                 break;
6182         case BTRFS_DEV_EXTENT_KEY:
6183                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6184                         goto err;
6185                 break;
6186         }
6187         return 0;
6188 err:
6189         report_mismatch_key_root(key_type, rootid);
6190         return -EINVAL;
6191 }
6192
6193 static int run_next_block(struct btrfs_root *root,
6194                           struct block_info *bits,
6195                           int bits_nr,
6196                           u64 *last,
6197                           struct cache_tree *pending,
6198                           struct cache_tree *seen,
6199                           struct cache_tree *reada,
6200                           struct cache_tree *nodes,
6201                           struct cache_tree *extent_cache,
6202                           struct cache_tree *chunk_cache,
6203                           struct rb_root *dev_cache,
6204                           struct block_group_tree *block_group_cache,
6205                           struct device_extent_tree *dev_extent_cache,
6206                           struct root_item_record *ri)
6207 {
6208         struct extent_buffer *buf;
6209         struct extent_record *rec = NULL;
6210         u64 bytenr;
6211         u32 size;
6212         u64 parent;
6213         u64 owner;
6214         u64 flags;
6215         u64 ptr;
6216         u64 gen = 0;
6217         int ret = 0;
6218         int i;
6219         int nritems;
6220         struct btrfs_key key;
6221         struct cache_extent *cache;
6222         int reada_bits;
6223
6224         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6225                                     bits_nr, &reada_bits);
6226         if (nritems == 0)
6227                 return 1;
6228
6229         if (!reada_bits) {
6230                 for(i = 0; i < nritems; i++) {
6231                         ret = add_cache_extent(reada, bits[i].start,
6232                                                bits[i].size);
6233                         if (ret == -EEXIST)
6234                                 continue;
6235
6236                         /* fixme, get the parent transid */
6237                         readahead_tree_block(root, bits[i].start,
6238                                              bits[i].size, 0);
6239                 }
6240         }
6241         *last = bits[0].start;
6242         bytenr = bits[0].start;
6243         size = bits[0].size;
6244
6245         cache = lookup_cache_extent(pending, bytenr, size);
6246         if (cache) {
6247                 remove_cache_extent(pending, cache);
6248                 free(cache);
6249         }
6250         cache = lookup_cache_extent(reada, bytenr, size);
6251         if (cache) {
6252                 remove_cache_extent(reada, cache);
6253                 free(cache);
6254         }
6255         cache = lookup_cache_extent(nodes, bytenr, size);
6256         if (cache) {
6257                 remove_cache_extent(nodes, cache);
6258                 free(cache);
6259         }
6260         cache = lookup_cache_extent(extent_cache, bytenr, size);
6261         if (cache) {
6262                 rec = container_of(cache, struct extent_record, cache);
6263                 gen = rec->parent_generation;
6264         }
6265
6266         /* fixme, get the real parent transid */
6267         buf = read_tree_block(root, bytenr, size, gen);
6268         if (!extent_buffer_uptodate(buf)) {
6269                 record_bad_block_io(root->fs_info,
6270                                     extent_cache, bytenr, size);
6271                 goto out;
6272         }
6273
6274         nritems = btrfs_header_nritems(buf);
6275
6276         flags = 0;
6277         if (!init_extent_tree) {
6278                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6279                                        btrfs_header_level(buf), 1, NULL,
6280                                        &flags);
6281                 if (ret < 0) {
6282                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6283                         if (ret < 0) {
6284                                 fprintf(stderr, "Couldn't calc extent flags\n");
6285                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6286                         }
6287                 }
6288         } else {
6289                 flags = 0;
6290                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6291                 if (ret < 0) {
6292                         fprintf(stderr, "Couldn't calc extent flags\n");
6293                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6294                 }
6295         }
6296
6297         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6298                 if (ri != NULL &&
6299                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6300                     ri->objectid == btrfs_header_owner(buf)) {
6301                         /*
6302                          * Ok we got to this block from it's original owner and
6303                          * we have FULL_BACKREF set.  Relocation can leave
6304                          * converted blocks over so this is altogether possible,
6305                          * however it's not possible if the generation > the
6306                          * last snapshot, so check for this case.
6307                          */
6308                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6309                             btrfs_header_generation(buf) > ri->last_snapshot) {
6310                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6311                                 rec->bad_full_backref = 1;
6312                         }
6313                 }
6314         } else {
6315                 if (ri != NULL &&
6316                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6317                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6318                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6319                         rec->bad_full_backref = 1;
6320                 }
6321         }
6322
6323         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6324                 rec->flag_block_full_backref = 1;
6325                 parent = bytenr;
6326                 owner = 0;
6327         } else {
6328                 rec->flag_block_full_backref = 0;
6329                 parent = 0;
6330                 owner = btrfs_header_owner(buf);
6331         }
6332
6333         ret = check_block(root, extent_cache, buf, flags);
6334         if (ret)
6335                 goto out;
6336
6337         if (btrfs_is_leaf(buf)) {
6338                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6339                 for (i = 0; i < nritems; i++) {
6340                         struct btrfs_file_extent_item *fi;
6341                         btrfs_item_key_to_cpu(buf, &key, i);
6342                         /*
6343                          * Check key type against the leaf owner.
6344                          * Could filter quite a lot of early error if
6345                          * owner is correct
6346                          */
6347                         if (check_type_with_root(btrfs_header_owner(buf),
6348                                                  key.type)) {
6349                                 fprintf(stderr, "ignoring invalid key\n");
6350                                 continue;
6351                         }
6352                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6353                                 process_extent_item(root, extent_cache, buf,
6354                                                     i);
6355                                 continue;
6356                         }
6357                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6358                                 process_extent_item(root, extent_cache, buf,
6359                                                     i);
6360                                 continue;
6361                         }
6362                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6363                                 total_csum_bytes +=
6364                                         btrfs_item_size_nr(buf, i);
6365                                 continue;
6366                         }
6367                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6368                                 process_chunk_item(chunk_cache, &key, buf, i);
6369                                 continue;
6370                         }
6371                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6372                                 process_device_item(dev_cache, &key, buf, i);
6373                                 continue;
6374                         }
6375                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6376                                 process_block_group_item(block_group_cache,
6377                                         &key, buf, i);
6378                                 continue;
6379                         }
6380                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6381                                 process_device_extent_item(dev_extent_cache,
6382                                         &key, buf, i);
6383                                 continue;
6384
6385                         }
6386                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6387 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6388                                 process_extent_ref_v0(extent_cache, buf, i);
6389 #else
6390                                 BUG();
6391 #endif
6392                                 continue;
6393                         }
6394
6395                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6396                                 add_tree_backref(extent_cache, key.objectid, 0,
6397                                                  key.offset, 0);
6398                                 continue;
6399                         }
6400                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6401                                 add_tree_backref(extent_cache, key.objectid,
6402                                                  key.offset, 0, 0);
6403                                 continue;
6404                         }
6405                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6406                                 struct btrfs_extent_data_ref *ref;
6407                                 ref = btrfs_item_ptr(buf, i,
6408                                                 struct btrfs_extent_data_ref);
6409                                 add_data_backref(extent_cache,
6410                                         key.objectid, 0,
6411                                         btrfs_extent_data_ref_root(buf, ref),
6412                                         btrfs_extent_data_ref_objectid(buf,
6413                                                                        ref),
6414                                         btrfs_extent_data_ref_offset(buf, ref),
6415                                         btrfs_extent_data_ref_count(buf, ref),
6416                                         0, root->sectorsize);
6417                                 continue;
6418                         }
6419                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6420                                 struct btrfs_shared_data_ref *ref;
6421                                 ref = btrfs_item_ptr(buf, i,
6422                                                 struct btrfs_shared_data_ref);
6423                                 add_data_backref(extent_cache,
6424                                         key.objectid, key.offset, 0, 0, 0,
6425                                         btrfs_shared_data_ref_count(buf, ref),
6426                                         0, root->sectorsize);
6427                                 continue;
6428                         }
6429                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6430                                 struct bad_item *bad;
6431
6432                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6433                                         continue;
6434                                 if (!owner)
6435                                         continue;
6436                                 bad = malloc(sizeof(struct bad_item));
6437                                 if (!bad)
6438                                         continue;
6439                                 INIT_LIST_HEAD(&bad->list);
6440                                 memcpy(&bad->key, &key,
6441                                        sizeof(struct btrfs_key));
6442                                 bad->root_id = owner;
6443                                 list_add_tail(&bad->list, &delete_items);
6444                                 continue;
6445                         }
6446                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6447                                 continue;
6448                         fi = btrfs_item_ptr(buf, i,
6449                                             struct btrfs_file_extent_item);
6450                         if (btrfs_file_extent_type(buf, fi) ==
6451                             BTRFS_FILE_EXTENT_INLINE)
6452                                 continue;
6453                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6454                                 continue;
6455
6456                         data_bytes_allocated +=
6457                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6458                         if (data_bytes_allocated < root->sectorsize) {
6459                                 abort();
6460                         }
6461                         data_bytes_referenced +=
6462                                 btrfs_file_extent_num_bytes(buf, fi);
6463                         add_data_backref(extent_cache,
6464                                 btrfs_file_extent_disk_bytenr(buf, fi),
6465                                 parent, owner, key.objectid, key.offset -
6466                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6467                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6468                 }
6469         } else {
6470                 int level;
6471                 struct btrfs_key first_key;
6472
6473                 first_key.objectid = 0;
6474
6475                 if (nritems > 0)
6476                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6477                 level = btrfs_header_level(buf);
6478                 for (i = 0; i < nritems; i++) {
6479                         struct extent_record tmpl;
6480
6481                         ptr = btrfs_node_blockptr(buf, i);
6482                         size = root->nodesize;
6483                         btrfs_node_key_to_cpu(buf, &key, i);
6484                         if (ri != NULL) {
6485                                 if ((level == ri->drop_level)
6486                                     && is_dropped_key(&key, &ri->drop_key)) {
6487                                         continue;
6488                                 }
6489                         }
6490
6491                         memset(&tmpl, 0, sizeof(tmpl));
6492                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6493                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6494                         tmpl.start = ptr;
6495                         tmpl.nr = size;
6496                         tmpl.refs = 1;
6497                         tmpl.metadata = 1;
6498                         tmpl.max_size = size;
6499                         ret = add_extent_rec(extent_cache, &tmpl);
6500                         BUG_ON(ret);
6501
6502                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6503
6504                         if (level > 1) {
6505                                 add_pending(nodes, seen, ptr, size);
6506                         } else {
6507                                 add_pending(pending, seen, ptr, size);
6508                         }
6509                 }
6510                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6511                                       nritems) * sizeof(struct btrfs_key_ptr);
6512         }
6513         total_btree_bytes += buf->len;
6514         if (fs_root_objectid(btrfs_header_owner(buf)))
6515                 total_fs_tree_bytes += buf->len;
6516         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6517                 total_extent_tree_bytes += buf->len;
6518         if (!found_old_backref &&
6519             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6520             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6521             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6522                 found_old_backref = 1;
6523 out:
6524         free_extent_buffer(buf);
6525         return ret;
6526 }
6527
6528 static int add_root_to_pending(struct extent_buffer *buf,
6529                                struct cache_tree *extent_cache,
6530                                struct cache_tree *pending,
6531                                struct cache_tree *seen,
6532                                struct cache_tree *nodes,
6533                                u64 objectid)
6534 {
6535         struct extent_record tmpl;
6536
6537         if (btrfs_header_level(buf) > 0)
6538                 add_pending(nodes, seen, buf->start, buf->len);
6539         else
6540                 add_pending(pending, seen, buf->start, buf->len);
6541
6542         memset(&tmpl, 0, sizeof(tmpl));
6543         tmpl.start = buf->start;
6544         tmpl.nr = buf->len;
6545         tmpl.is_root = 1;
6546         tmpl.refs = 1;
6547         tmpl.metadata = 1;
6548         tmpl.max_size = buf->len;
6549         add_extent_rec(extent_cache, &tmpl);
6550
6551         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6552             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6553                 add_tree_backref(extent_cache, buf->start, buf->start,
6554                                  0, 1);
6555         else
6556                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6557         return 0;
6558 }
6559
6560 /* as we fix the tree, we might be deleting blocks that
6561  * we're tracking for repair.  This hook makes sure we
6562  * remove any backrefs for blocks as we are fixing them.
6563  */
6564 static int free_extent_hook(struct btrfs_trans_handle *trans,
6565                             struct btrfs_root *root,
6566                             u64 bytenr, u64 num_bytes, u64 parent,
6567                             u64 root_objectid, u64 owner, u64 offset,
6568                             int refs_to_drop)
6569 {
6570         struct extent_record *rec;
6571         struct cache_extent *cache;
6572         int is_data;
6573         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6574
6575         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6576         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6577         if (!cache)
6578                 return 0;
6579
6580         rec = container_of(cache, struct extent_record, cache);
6581         if (is_data) {
6582                 struct data_backref *back;
6583                 back = find_data_backref(rec, parent, root_objectid, owner,
6584                                          offset, 1, bytenr, num_bytes);
6585                 if (!back)
6586                         goto out;
6587                 if (back->node.found_ref) {
6588                         back->found_ref -= refs_to_drop;
6589                         if (rec->refs)
6590                                 rec->refs -= refs_to_drop;
6591                 }
6592                 if (back->node.found_extent_tree) {
6593                         back->num_refs -= refs_to_drop;
6594                         if (rec->extent_item_refs)
6595                                 rec->extent_item_refs -= refs_to_drop;
6596                 }
6597                 if (back->found_ref == 0)
6598                         back->node.found_ref = 0;
6599                 if (back->num_refs == 0)
6600                         back->node.found_extent_tree = 0;
6601
6602                 if (!back->node.found_extent_tree && back->node.found_ref) {
6603                         rb_erase(&back->node.node, &rec->backref_tree);
6604                         free(back);
6605                 }
6606         } else {
6607                 struct tree_backref *back;
6608                 back = find_tree_backref(rec, parent, root_objectid);
6609                 if (!back)
6610                         goto out;
6611                 if (back->node.found_ref) {
6612                         if (rec->refs)
6613                                 rec->refs--;
6614                         back->node.found_ref = 0;
6615                 }
6616                 if (back->node.found_extent_tree) {
6617                         if (rec->extent_item_refs)
6618                                 rec->extent_item_refs--;
6619                         back->node.found_extent_tree = 0;
6620                 }
6621                 if (!back->node.found_extent_tree && back->node.found_ref) {
6622                         rb_erase(&back->node.node, &rec->backref_tree);
6623                         free(back);
6624                 }
6625         }
6626         maybe_free_extent_rec(extent_cache, rec);
6627 out:
6628         return 0;
6629 }
6630
6631 static int delete_extent_records(struct btrfs_trans_handle *trans,
6632                                  struct btrfs_root *root,
6633                                  struct btrfs_path *path,
6634                                  u64 bytenr, u64 new_len)
6635 {
6636         struct btrfs_key key;
6637         struct btrfs_key found_key;
6638         struct extent_buffer *leaf;
6639         int ret;
6640         int slot;
6641
6642
6643         key.objectid = bytenr;
6644         key.type = (u8)-1;
6645         key.offset = (u64)-1;
6646
6647         while(1) {
6648                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6649                                         &key, path, 0, 1);
6650                 if (ret < 0)
6651                         break;
6652
6653                 if (ret > 0) {
6654                         ret = 0;
6655                         if (path->slots[0] == 0)
6656                                 break;
6657                         path->slots[0]--;
6658                 }
6659                 ret = 0;
6660
6661                 leaf = path->nodes[0];
6662                 slot = path->slots[0];
6663
6664                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6665                 if (found_key.objectid != bytenr)
6666                         break;
6667
6668                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6669                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6670                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6671                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6672                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6673                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6674                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6675                         btrfs_release_path(path);
6676                         if (found_key.type == 0) {
6677                                 if (found_key.offset == 0)
6678                                         break;
6679                                 key.offset = found_key.offset - 1;
6680                                 key.type = found_key.type;
6681                         }
6682                         key.type = found_key.type - 1;
6683                         key.offset = (u64)-1;
6684                         continue;
6685                 }
6686
6687                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6688                         found_key.objectid, found_key.type, found_key.offset);
6689
6690                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6691                 if (ret)
6692                         break;
6693                 btrfs_release_path(path);
6694
6695                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6696                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6697                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6698                                 found_key.offset : root->nodesize;
6699
6700                         ret = btrfs_update_block_group(trans, root, bytenr,
6701                                                        bytes, 0, 0);
6702                         if (ret)
6703                                 break;
6704                 }
6705         }
6706
6707         btrfs_release_path(path);
6708         return ret;
6709 }
6710
6711 /*
6712  * for a single backref, this will allocate a new extent
6713  * and add the backref to it.
6714  */
6715 static int record_extent(struct btrfs_trans_handle *trans,
6716                          struct btrfs_fs_info *info,
6717                          struct btrfs_path *path,
6718                          struct extent_record *rec,
6719                          struct extent_backref *back,
6720                          int allocated, u64 flags)
6721 {
6722         int ret;
6723         struct btrfs_root *extent_root = info->extent_root;
6724         struct extent_buffer *leaf;
6725         struct btrfs_key ins_key;
6726         struct btrfs_extent_item *ei;
6727         struct tree_backref *tback;
6728         struct data_backref *dback;
6729         struct btrfs_tree_block_info *bi;
6730
6731         if (!back->is_data)
6732                 rec->max_size = max_t(u64, rec->max_size,
6733                                     info->extent_root->nodesize);
6734
6735         if (!allocated) {
6736                 u32 item_size = sizeof(*ei);
6737
6738                 if (!back->is_data)
6739                         item_size += sizeof(*bi);
6740
6741                 ins_key.objectid = rec->start;
6742                 ins_key.offset = rec->max_size;
6743                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6744
6745                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6746                                         &ins_key, item_size);
6747                 if (ret)
6748                         goto fail;
6749
6750                 leaf = path->nodes[0];
6751                 ei = btrfs_item_ptr(leaf, path->slots[0],
6752                                     struct btrfs_extent_item);
6753
6754                 btrfs_set_extent_refs(leaf, ei, 0);
6755                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6756
6757                 if (back->is_data) {
6758                         btrfs_set_extent_flags(leaf, ei,
6759                                                BTRFS_EXTENT_FLAG_DATA);
6760                 } else {
6761                         struct btrfs_disk_key copy_key;;
6762
6763                         tback = to_tree_backref(back);
6764                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6765                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6766                                              sizeof(*bi));
6767
6768                         btrfs_set_disk_key_objectid(&copy_key,
6769                                                     rec->info_objectid);
6770                         btrfs_set_disk_key_type(&copy_key, 0);
6771                         btrfs_set_disk_key_offset(&copy_key, 0);
6772
6773                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6774                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6775
6776                         btrfs_set_extent_flags(leaf, ei,
6777                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6778                 }
6779
6780                 btrfs_mark_buffer_dirty(leaf);
6781                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6782                                                rec->max_size, 1, 0);
6783                 if (ret)
6784                         goto fail;
6785                 btrfs_release_path(path);
6786         }
6787
6788         if (back->is_data) {
6789                 u64 parent;
6790                 int i;
6791
6792                 dback = to_data_backref(back);
6793                 if (back->full_backref)
6794                         parent = dback->parent;
6795                 else
6796                         parent = 0;
6797
6798                 for (i = 0; i < dback->found_ref; i++) {
6799                         /* if parent != 0, we're doing a full backref
6800                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6801                          * just makes the backref allocator create a data
6802                          * backref
6803                          */
6804                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6805                                                    rec->start, rec->max_size,
6806                                                    parent,
6807                                                    dback->root,
6808                                                    parent ?
6809                                                    BTRFS_FIRST_FREE_OBJECTID :
6810                                                    dback->owner,
6811                                                    dback->offset);
6812                         if (ret)
6813                                 break;
6814                 }
6815                 fprintf(stderr, "adding new data backref"
6816                                 " on %llu %s %llu owner %llu"
6817                                 " offset %llu found %d\n",
6818                                 (unsigned long long)rec->start,
6819                                 back->full_backref ?
6820                                 "parent" : "root",
6821                                 back->full_backref ?
6822                                 (unsigned long long)parent :
6823                                 (unsigned long long)dback->root,
6824                                 (unsigned long long)dback->owner,
6825                                 (unsigned long long)dback->offset,
6826                                 dback->found_ref);
6827         } else {
6828                 u64 parent;
6829
6830                 tback = to_tree_backref(back);
6831                 if (back->full_backref)
6832                         parent = tback->parent;
6833                 else
6834                         parent = 0;
6835
6836                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6837                                            rec->start, rec->max_size,
6838                                            parent, tback->root, 0, 0);
6839                 fprintf(stderr, "adding new tree backref on "
6840                         "start %llu len %llu parent %llu root %llu\n",
6841                         rec->start, rec->max_size, parent, tback->root);
6842         }
6843 fail:
6844         btrfs_release_path(path);
6845         return ret;
6846 }
6847
6848 static struct extent_entry *find_entry(struct list_head *entries,
6849                                        u64 bytenr, u64 bytes)
6850 {
6851         struct extent_entry *entry = NULL;
6852
6853         list_for_each_entry(entry, entries, list) {
6854                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6855                         return entry;
6856         }
6857
6858         return NULL;
6859 }
6860
6861 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6862 {
6863         struct extent_entry *entry, *best = NULL, *prev = NULL;
6864
6865         list_for_each_entry(entry, entries, list) {
6866                 if (!prev) {
6867                         prev = entry;
6868                         continue;
6869                 }
6870
6871                 /*
6872                  * If there are as many broken entries as entries then we know
6873                  * not to trust this particular entry.
6874                  */
6875                 if (entry->broken == entry->count)
6876                         continue;
6877
6878                 /*
6879                  * If our current entry == best then we can't be sure our best
6880                  * is really the best, so we need to keep searching.
6881                  */
6882                 if (best && best->count == entry->count) {
6883                         prev = entry;
6884                         best = NULL;
6885                         continue;
6886                 }
6887
6888                 /* Prev == entry, not good enough, have to keep searching */
6889                 if (!prev->broken && prev->count == entry->count)
6890                         continue;
6891
6892                 if (!best)
6893                         best = (prev->count > entry->count) ? prev : entry;
6894                 else if (best->count < entry->count)
6895                         best = entry;
6896                 prev = entry;
6897         }
6898
6899         return best;
6900 }
6901
6902 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6903                       struct data_backref *dback, struct extent_entry *entry)
6904 {
6905         struct btrfs_trans_handle *trans;
6906         struct btrfs_root *root;
6907         struct btrfs_file_extent_item *fi;
6908         struct extent_buffer *leaf;
6909         struct btrfs_key key;
6910         u64 bytenr, bytes;
6911         int ret, err;
6912
6913         key.objectid = dback->root;
6914         key.type = BTRFS_ROOT_ITEM_KEY;
6915         key.offset = (u64)-1;
6916         root = btrfs_read_fs_root(info, &key);
6917         if (IS_ERR(root)) {
6918                 fprintf(stderr, "Couldn't find root for our ref\n");
6919                 return -EINVAL;
6920         }
6921
6922         /*
6923          * The backref points to the original offset of the extent if it was
6924          * split, so we need to search down to the offset we have and then walk
6925          * forward until we find the backref we're looking for.
6926          */
6927         key.objectid = dback->owner;
6928         key.type = BTRFS_EXTENT_DATA_KEY;
6929         key.offset = dback->offset;
6930         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6931         if (ret < 0) {
6932                 fprintf(stderr, "Error looking up ref %d\n", ret);
6933                 return ret;
6934         }
6935
6936         while (1) {
6937                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6938                         ret = btrfs_next_leaf(root, path);
6939                         if (ret) {
6940                                 fprintf(stderr, "Couldn't find our ref, next\n");
6941                                 return -EINVAL;
6942                         }
6943                 }
6944                 leaf = path->nodes[0];
6945                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6946                 if (key.objectid != dback->owner ||
6947                     key.type != BTRFS_EXTENT_DATA_KEY) {
6948                         fprintf(stderr, "Couldn't find our ref, search\n");
6949                         return -EINVAL;
6950                 }
6951                 fi = btrfs_item_ptr(leaf, path->slots[0],
6952                                     struct btrfs_file_extent_item);
6953                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6954                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6955
6956                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6957                         break;
6958                 path->slots[0]++;
6959         }
6960
6961         btrfs_release_path(path);
6962
6963         trans = btrfs_start_transaction(root, 1);
6964         if (IS_ERR(trans))
6965                 return PTR_ERR(trans);
6966
6967         /*
6968          * Ok we have the key of the file extent we want to fix, now we can cow
6969          * down to the thing and fix it.
6970          */
6971         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6972         if (ret < 0) {
6973                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6974                         key.objectid, key.type, key.offset, ret);
6975                 goto out;
6976         }
6977         if (ret > 0) {
6978                 fprintf(stderr, "Well that's odd, we just found this key "
6979                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6980                         key.offset);
6981                 ret = -EINVAL;
6982                 goto out;
6983         }
6984         leaf = path->nodes[0];
6985         fi = btrfs_item_ptr(leaf, path->slots[0],
6986                             struct btrfs_file_extent_item);
6987
6988         if (btrfs_file_extent_compression(leaf, fi) &&
6989             dback->disk_bytenr != entry->bytenr) {
6990                 fprintf(stderr, "Ref doesn't match the record start and is "
6991                         "compressed, please take a btrfs-image of this file "
6992                         "system and send it to a btrfs developer so they can "
6993                         "complete this functionality for bytenr %Lu\n",
6994                         dback->disk_bytenr);
6995                 ret = -EINVAL;
6996                 goto out;
6997         }
6998
6999         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
7000                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7001         } else if (dback->disk_bytenr > entry->bytenr) {
7002                 u64 off_diff, offset;
7003
7004                 off_diff = dback->disk_bytenr - entry->bytenr;
7005                 offset = btrfs_file_extent_offset(leaf, fi);
7006                 if (dback->disk_bytenr + offset +
7007                     btrfs_file_extent_num_bytes(leaf, fi) >
7008                     entry->bytenr + entry->bytes) {
7009                         fprintf(stderr, "Ref is past the entry end, please "
7010                                 "take a btrfs-image of this file system and "
7011                                 "send it to a btrfs developer, ref %Lu\n",
7012                                 dback->disk_bytenr);
7013                         ret = -EINVAL;
7014                         goto out;
7015                 }
7016                 offset += off_diff;
7017                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7018                 btrfs_set_file_extent_offset(leaf, fi, offset);
7019         } else if (dback->disk_bytenr < entry->bytenr) {
7020                 u64 offset;
7021
7022                 offset = btrfs_file_extent_offset(leaf, fi);
7023                 if (dback->disk_bytenr + offset < entry->bytenr) {
7024                         fprintf(stderr, "Ref is before the entry start, please"
7025                                 " take a btrfs-image of this file system and "
7026                                 "send it to a btrfs developer, ref %Lu\n",
7027                                 dback->disk_bytenr);
7028                         ret = -EINVAL;
7029                         goto out;
7030                 }
7031
7032                 offset += dback->disk_bytenr;
7033                 offset -= entry->bytenr;
7034                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7035                 btrfs_set_file_extent_offset(leaf, fi, offset);
7036         }
7037
7038         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7039
7040         /*
7041          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7042          * only do this if we aren't using compression, otherwise it's a
7043          * trickier case.
7044          */
7045         if (!btrfs_file_extent_compression(leaf, fi))
7046                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7047         else
7048                 printf("ram bytes may be wrong?\n");
7049         btrfs_mark_buffer_dirty(leaf);
7050 out:
7051         err = btrfs_commit_transaction(trans, root);
7052         btrfs_release_path(path);
7053         return ret ? ret : err;
7054 }
7055
7056 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7057                            struct extent_record *rec)
7058 {
7059         struct extent_backref *back, *tmp;
7060         struct data_backref *dback;
7061         struct extent_entry *entry, *best = NULL;
7062         LIST_HEAD(entries);
7063         int nr_entries = 0;
7064         int broken_entries = 0;
7065         int ret = 0;
7066         short mismatch = 0;
7067
7068         /*
7069          * Metadata is easy and the backrefs should always agree on bytenr and
7070          * size, if not we've got bigger issues.
7071          */
7072         if (rec->metadata)
7073                 return 0;
7074
7075         rbtree_postorder_for_each_entry_safe(back, tmp,
7076                                              &rec->backref_tree, node) {
7077                 if (back->full_backref || !back->is_data)
7078                         continue;
7079
7080                 dback = to_data_backref(back);
7081
7082                 /*
7083                  * We only pay attention to backrefs that we found a real
7084                  * backref for.
7085                  */
7086                 if (dback->found_ref == 0)
7087                         continue;
7088
7089                 /*
7090                  * For now we only catch when the bytes don't match, not the
7091                  * bytenr.  We can easily do this at the same time, but I want
7092                  * to have a fs image to test on before we just add repair
7093                  * functionality willy-nilly so we know we won't screw up the
7094                  * repair.
7095                  */
7096
7097                 entry = find_entry(&entries, dback->disk_bytenr,
7098                                    dback->bytes);
7099                 if (!entry) {
7100                         entry = malloc(sizeof(struct extent_entry));
7101                         if (!entry) {
7102                                 ret = -ENOMEM;
7103                                 goto out;
7104                         }
7105                         memset(entry, 0, sizeof(*entry));
7106                         entry->bytenr = dback->disk_bytenr;
7107                         entry->bytes = dback->bytes;
7108                         list_add_tail(&entry->list, &entries);
7109                         nr_entries++;
7110                 }
7111
7112                 /*
7113                  * If we only have on entry we may think the entries agree when
7114                  * in reality they don't so we have to do some extra checking.
7115                  */
7116                 if (dback->disk_bytenr != rec->start ||
7117                     dback->bytes != rec->nr || back->broken)
7118                         mismatch = 1;
7119
7120                 if (back->broken) {
7121                         entry->broken++;
7122                         broken_entries++;
7123                 }
7124
7125                 entry->count++;
7126         }
7127
7128         /* Yay all the backrefs agree, carry on good sir */
7129         if (nr_entries <= 1 && !mismatch)
7130                 goto out;
7131
7132         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7133                 "%Lu\n", rec->start);
7134
7135         /*
7136          * First we want to see if the backrefs can agree amongst themselves who
7137          * is right, so figure out which one of the entries has the highest
7138          * count.
7139          */
7140         best = find_most_right_entry(&entries);
7141
7142         /*
7143          * Ok so we may have an even split between what the backrefs think, so
7144          * this is where we use the extent ref to see what it thinks.
7145          */
7146         if (!best) {
7147                 entry = find_entry(&entries, rec->start, rec->nr);
7148                 if (!entry && (!broken_entries || !rec->found_rec)) {
7149                         fprintf(stderr, "Backrefs don't agree with each other "
7150                                 "and extent record doesn't agree with anybody,"
7151                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7152                                 rec->start, rec->nr);
7153                         ret = -EINVAL;
7154                         goto out;
7155                 } else if (!entry) {
7156                         /*
7157                          * Ok our backrefs were broken, we'll assume this is the
7158                          * correct value and add an entry for this range.
7159                          */
7160                         entry = malloc(sizeof(struct extent_entry));
7161                         if (!entry) {
7162                                 ret = -ENOMEM;
7163                                 goto out;
7164                         }
7165                         memset(entry, 0, sizeof(*entry));
7166                         entry->bytenr = rec->start;
7167                         entry->bytes = rec->nr;
7168                         list_add_tail(&entry->list, &entries);
7169                         nr_entries++;
7170                 }
7171                 entry->count++;
7172                 best = find_most_right_entry(&entries);
7173                 if (!best) {
7174                         fprintf(stderr, "Backrefs and extent record evenly "
7175                                 "split on who is right, this is going to "
7176                                 "require user input to fix bytenr %Lu bytes "
7177                                 "%Lu\n", rec->start, rec->nr);
7178                         ret = -EINVAL;
7179                         goto out;
7180                 }
7181         }
7182
7183         /*
7184          * I don't think this can happen currently as we'll abort() if we catch
7185          * this case higher up, but in case somebody removes that we still can't
7186          * deal with it properly here yet, so just bail out of that's the case.
7187          */
7188         if (best->bytenr != rec->start) {
7189                 fprintf(stderr, "Extent start and backref starts don't match, "
7190                         "please use btrfs-image on this file system and send "
7191                         "it to a btrfs developer so they can make fsck fix "
7192                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7193                         rec->start, rec->nr);
7194                 ret = -EINVAL;
7195                 goto out;
7196         }
7197
7198         /*
7199          * Ok great we all agreed on an extent record, let's go find the real
7200          * references and fix up the ones that don't match.
7201          */
7202         rbtree_postorder_for_each_entry_safe(back, tmp,
7203                                              &rec->backref_tree, node) {
7204                 if (back->full_backref || !back->is_data)
7205                         continue;
7206
7207                 dback = to_data_backref(back);
7208
7209                 /*
7210                  * Still ignoring backrefs that don't have a real ref attached
7211                  * to them.
7212                  */
7213                 if (dback->found_ref == 0)
7214                         continue;
7215
7216                 if (dback->bytes == best->bytes &&
7217                     dback->disk_bytenr == best->bytenr)
7218                         continue;
7219
7220                 ret = repair_ref(info, path, dback, best);
7221                 if (ret)
7222                         goto out;
7223         }
7224
7225         /*
7226          * Ok we messed with the actual refs, which means we need to drop our
7227          * entire cache and go back and rescan.  I know this is a huge pain and
7228          * adds a lot of extra work, but it's the only way to be safe.  Once all
7229          * the backrefs agree we may not need to do anything to the extent
7230          * record itself.
7231          */
7232         ret = -EAGAIN;
7233 out:
7234         while (!list_empty(&entries)) {
7235                 entry = list_entry(entries.next, struct extent_entry, list);
7236                 list_del_init(&entry->list);
7237                 free(entry);
7238         }
7239         return ret;
7240 }
7241
7242 static int process_duplicates(struct btrfs_root *root,
7243                               struct cache_tree *extent_cache,
7244                               struct extent_record *rec)
7245 {
7246         struct extent_record *good, *tmp;
7247         struct cache_extent *cache;
7248         int ret;
7249
7250         /*
7251          * If we found a extent record for this extent then return, or if we
7252          * have more than one duplicate we are likely going to need to delete
7253          * something.
7254          */
7255         if (rec->found_rec || rec->num_duplicates > 1)
7256                 return 0;
7257
7258         /* Shouldn't happen but just in case */
7259         BUG_ON(!rec->num_duplicates);
7260
7261         /*
7262          * So this happens if we end up with a backref that doesn't match the
7263          * actual extent entry.  So either the backref is bad or the extent
7264          * entry is bad.  Either way we want to have the extent_record actually
7265          * reflect what we found in the extent_tree, so we need to take the
7266          * duplicate out and use that as the extent_record since the only way we
7267          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7268          */
7269         remove_cache_extent(extent_cache, &rec->cache);
7270
7271         good = to_extent_record(rec->dups.next);
7272         list_del_init(&good->list);
7273         INIT_LIST_HEAD(&good->backrefs);
7274         INIT_LIST_HEAD(&good->dups);
7275         good->cache.start = good->start;
7276         good->cache.size = good->nr;
7277         good->content_checked = 0;
7278         good->owner_ref_checked = 0;
7279         good->num_duplicates = 0;
7280         good->refs = rec->refs;
7281         list_splice_init(&rec->backrefs, &good->backrefs);
7282         while (1) {
7283                 cache = lookup_cache_extent(extent_cache, good->start,
7284                                             good->nr);
7285                 if (!cache)
7286                         break;
7287                 tmp = container_of(cache, struct extent_record, cache);
7288
7289                 /*
7290                  * If we find another overlapping extent and it's found_rec is
7291                  * set then it's a duplicate and we need to try and delete
7292                  * something.
7293                  */
7294                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7295                         if (list_empty(&good->list))
7296                                 list_add_tail(&good->list,
7297                                               &duplicate_extents);
7298                         good->num_duplicates += tmp->num_duplicates + 1;
7299                         list_splice_init(&tmp->dups, &good->dups);
7300                         list_del_init(&tmp->list);
7301                         list_add_tail(&tmp->list, &good->dups);
7302                         remove_cache_extent(extent_cache, &tmp->cache);
7303                         continue;
7304                 }
7305
7306                 /*
7307                  * Ok we have another non extent item backed extent rec, so lets
7308                  * just add it to this extent and carry on like we did above.
7309                  */
7310                 good->refs += tmp->refs;
7311                 list_splice_init(&tmp->backrefs, &good->backrefs);
7312                 remove_cache_extent(extent_cache, &tmp->cache);
7313                 free(tmp);
7314         }
7315         ret = insert_cache_extent(extent_cache, &good->cache);
7316         BUG_ON(ret);
7317         free(rec);
7318         return good->num_duplicates ? 0 : 1;
7319 }
7320
7321 static int delete_duplicate_records(struct btrfs_root *root,
7322                                     struct extent_record *rec)
7323 {
7324         struct btrfs_trans_handle *trans;
7325         LIST_HEAD(delete_list);
7326         struct btrfs_path *path;
7327         struct extent_record *tmp, *good, *n;
7328         int nr_del = 0;
7329         int ret = 0, err;
7330         struct btrfs_key key;
7331
7332         path = btrfs_alloc_path();
7333         if (!path) {
7334                 ret = -ENOMEM;
7335                 goto out;
7336         }
7337
7338         good = rec;
7339         /* Find the record that covers all of the duplicates. */
7340         list_for_each_entry(tmp, &rec->dups, list) {
7341                 if (good->start < tmp->start)
7342                         continue;
7343                 if (good->nr > tmp->nr)
7344                         continue;
7345
7346                 if (tmp->start + tmp->nr < good->start + good->nr) {
7347                         fprintf(stderr, "Ok we have overlapping extents that "
7348                                 "aren't completely covered by each other, this "
7349                                 "is going to require more careful thought.  "
7350                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7351                                 tmp->start, tmp->nr, good->start, good->nr);
7352                         abort();
7353                 }
7354                 good = tmp;
7355         }
7356
7357         if (good != rec)
7358                 list_add_tail(&rec->list, &delete_list);
7359
7360         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7361                 if (tmp == good)
7362                         continue;
7363                 list_move_tail(&tmp->list, &delete_list);
7364         }
7365
7366         root = root->fs_info->extent_root;
7367         trans = btrfs_start_transaction(root, 1);
7368         if (IS_ERR(trans)) {
7369                 ret = PTR_ERR(trans);
7370                 goto out;
7371         }
7372
7373         list_for_each_entry(tmp, &delete_list, list) {
7374                 if (tmp->found_rec == 0)
7375                         continue;
7376                 key.objectid = tmp->start;
7377                 key.type = BTRFS_EXTENT_ITEM_KEY;
7378                 key.offset = tmp->nr;
7379
7380                 /* Shouldn't happen but just in case */
7381                 if (tmp->metadata) {
7382                         fprintf(stderr, "Well this shouldn't happen, extent "
7383                                 "record overlaps but is metadata? "
7384                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7385                         abort();
7386                 }
7387
7388                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7389                 if (ret) {
7390                         if (ret > 0)
7391                                 ret = -EINVAL;
7392                         break;
7393                 }
7394                 ret = btrfs_del_item(trans, root, path);
7395                 if (ret)
7396                         break;
7397                 btrfs_release_path(path);
7398                 nr_del++;
7399         }
7400         err = btrfs_commit_transaction(trans, root);
7401         if (err && !ret)
7402                 ret = err;
7403 out:
7404         while (!list_empty(&delete_list)) {
7405                 tmp = to_extent_record(delete_list.next);
7406                 list_del_init(&tmp->list);
7407                 if (tmp == rec)
7408                         continue;
7409                 free(tmp);
7410         }
7411
7412         while (!list_empty(&rec->dups)) {
7413                 tmp = to_extent_record(rec->dups.next);
7414                 list_del_init(&tmp->list);
7415                 free(tmp);
7416         }
7417
7418         btrfs_free_path(path);
7419
7420         if (!ret && !nr_del)
7421                 rec->num_duplicates = 0;
7422
7423         return ret ? ret : nr_del;
7424 }
7425
7426 static int find_possible_backrefs(struct btrfs_fs_info *info,
7427                                   struct btrfs_path *path,
7428                                   struct cache_tree *extent_cache,
7429                                   struct extent_record *rec)
7430 {
7431         struct btrfs_root *root;
7432         struct extent_backref *back, *tmp;
7433         struct data_backref *dback;
7434         struct cache_extent *cache;
7435         struct btrfs_file_extent_item *fi;
7436         struct btrfs_key key;
7437         u64 bytenr, bytes;
7438         int ret;
7439
7440         rbtree_postorder_for_each_entry_safe(back, tmp,
7441                                              &rec->backref_tree, node) {
7442                 /* Don't care about full backrefs (poor unloved backrefs) */
7443                 if (back->full_backref || !back->is_data)
7444                         continue;
7445
7446                 dback = to_data_backref(back);
7447
7448                 /* We found this one, we don't need to do a lookup */
7449                 if (dback->found_ref)
7450                         continue;
7451
7452                 key.objectid = dback->root;
7453                 key.type = BTRFS_ROOT_ITEM_KEY;
7454                 key.offset = (u64)-1;
7455
7456                 root = btrfs_read_fs_root(info, &key);
7457
7458                 /* No root, definitely a bad ref, skip */
7459                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7460                         continue;
7461                 /* Other err, exit */
7462                 if (IS_ERR(root))
7463                         return PTR_ERR(root);
7464
7465                 key.objectid = dback->owner;
7466                 key.type = BTRFS_EXTENT_DATA_KEY;
7467                 key.offset = dback->offset;
7468                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7469                 if (ret) {
7470                         btrfs_release_path(path);
7471                         if (ret < 0)
7472                                 return ret;
7473                         /* Didn't find it, we can carry on */
7474                         ret = 0;
7475                         continue;
7476                 }
7477
7478                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7479                                     struct btrfs_file_extent_item);
7480                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7481                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7482                 btrfs_release_path(path);
7483                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7484                 if (cache) {
7485                         struct extent_record *tmp;
7486                         tmp = container_of(cache, struct extent_record, cache);
7487
7488                         /*
7489                          * If we found an extent record for the bytenr for this
7490                          * particular backref then we can't add it to our
7491                          * current extent record.  We only want to add backrefs
7492                          * that don't have a corresponding extent item in the
7493                          * extent tree since they likely belong to this record
7494                          * and we need to fix it if it doesn't match bytenrs.
7495                          */
7496                         if  (tmp->found_rec)
7497                                 continue;
7498                 }
7499
7500                 dback->found_ref += 1;
7501                 dback->disk_bytenr = bytenr;
7502                 dback->bytes = bytes;
7503
7504                 /*
7505                  * Set this so the verify backref code knows not to trust the
7506                  * values in this backref.
7507                  */
7508                 back->broken = 1;
7509         }
7510
7511         return 0;
7512 }
7513
7514 /*
7515  * Record orphan data ref into corresponding root.
7516  *
7517  * Return 0 if the extent item contains data ref and recorded.
7518  * Return 1 if the extent item contains no useful data ref
7519  *   On that case, it may contains only shared_dataref or metadata backref
7520  *   or the file extent exists(this should be handled by the extent bytenr
7521  *   recovery routine)
7522  * Return <0 if something goes wrong.
7523  */
7524 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7525                                       struct extent_record *rec)
7526 {
7527         struct btrfs_key key;
7528         struct btrfs_root *dest_root;
7529         struct extent_backref *back, *tmp;
7530         struct data_backref *dback;
7531         struct orphan_data_extent *orphan;
7532         struct btrfs_path *path;
7533         int recorded_data_ref = 0;
7534         int ret = 0;
7535
7536         if (rec->metadata)
7537                 return 1;
7538         path = btrfs_alloc_path();
7539         if (!path)
7540                 return -ENOMEM;
7541         rbtree_postorder_for_each_entry_safe(back, tmp,
7542                                              &rec->backref_tree, node) {
7543                 if (back->full_backref || !back->is_data ||
7544                     !back->found_extent_tree)
7545                         continue;
7546                 dback = to_data_backref(back);
7547                 if (dback->found_ref)
7548                         continue;
7549                 key.objectid = dback->root;
7550                 key.type = BTRFS_ROOT_ITEM_KEY;
7551                 key.offset = (u64)-1;
7552
7553                 dest_root = btrfs_read_fs_root(fs_info, &key);
7554
7555                 /* For non-exist root we just skip it */
7556                 if (IS_ERR(dest_root) || !dest_root)
7557                         continue;
7558
7559                 key.objectid = dback->owner;
7560                 key.type = BTRFS_EXTENT_DATA_KEY;
7561                 key.offset = dback->offset;
7562
7563                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7564                 /*
7565                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7566                  * we need to record it for inode/file extent rebuild.
7567                  * For ret > 0, we record it only for file extent rebuild.
7568                  * For ret == 0, the file extent exists but only bytenr
7569                  * mismatch, let the original bytenr fix routine to handle,
7570                  * don't record it.
7571                  */
7572                 if (ret == 0)
7573                         continue;
7574                 ret = 0;
7575                 orphan = malloc(sizeof(*orphan));
7576                 if (!orphan) {
7577                         ret = -ENOMEM;
7578                         goto out;
7579                 }
7580                 INIT_LIST_HEAD(&orphan->list);
7581                 orphan->root = dback->root;
7582                 orphan->objectid = dback->owner;
7583                 orphan->offset = dback->offset;
7584                 orphan->disk_bytenr = rec->cache.start;
7585                 orphan->disk_len = rec->cache.size;
7586                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7587                 recorded_data_ref = 1;
7588         }
7589 out:
7590         btrfs_free_path(path);
7591         if (!ret)
7592                 return !recorded_data_ref;
7593         else
7594                 return ret;
7595 }
7596
7597 /*
7598  * when an incorrect extent item is found, this will delete
7599  * all of the existing entries for it and recreate them
7600  * based on what the tree scan found.
7601  */
7602 static int fixup_extent_refs(struct btrfs_fs_info *info,
7603                              struct cache_tree *extent_cache,
7604                              struct extent_record *rec)
7605 {
7606         struct btrfs_trans_handle *trans = NULL;
7607         int ret;
7608         struct btrfs_path *path;
7609         struct cache_extent *cache;
7610         struct extent_backref *back, *tmp;
7611         int allocated = 0;
7612         u64 flags = 0;
7613
7614         if (rec->flag_block_full_backref)
7615                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7616
7617         path = btrfs_alloc_path();
7618         if (!path)
7619                 return -ENOMEM;
7620
7621         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7622                 /*
7623                  * Sometimes the backrefs themselves are so broken they don't
7624                  * get attached to any meaningful rec, so first go back and
7625                  * check any of our backrefs that we couldn't find and throw
7626                  * them into the list if we find the backref so that
7627                  * verify_backrefs can figure out what to do.
7628                  */
7629                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7630                 if (ret < 0)
7631                         goto out;
7632         }
7633
7634         /* step one, make sure all of the backrefs agree */
7635         ret = verify_backrefs(info, path, rec);
7636         if (ret < 0)
7637                 goto out;
7638
7639         trans = btrfs_start_transaction(info->extent_root, 1);
7640         if (IS_ERR(trans)) {
7641                 ret = PTR_ERR(trans);
7642                 goto out;
7643         }
7644
7645         /* step two, delete all the existing records */
7646         ret = delete_extent_records(trans, info->extent_root, path,
7647                                     rec->start, rec->max_size);
7648
7649         if (ret < 0)
7650                 goto out;
7651
7652         /* was this block corrupt?  If so, don't add references to it */
7653         cache = lookup_cache_extent(info->corrupt_blocks,
7654                                     rec->start, rec->max_size);
7655         if (cache) {
7656                 ret = 0;
7657                 goto out;
7658         }
7659
7660         /* step three, recreate all the refs we did find */
7661         rbtree_postorder_for_each_entry_safe(back, tmp,
7662                                              &rec->backref_tree, node) {
7663                 /*
7664                  * if we didn't find any references, don't create a
7665                  * new extent record
7666                  */
7667                 if (!back->found_ref)
7668                         continue;
7669
7670                 rec->bad_full_backref = 0;
7671                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7672                 allocated = 1;
7673
7674                 if (ret)
7675                         goto out;
7676         }
7677 out:
7678         if (trans) {
7679                 int err = btrfs_commit_transaction(trans, info->extent_root);
7680                 if (!ret)
7681                         ret = err;
7682         }
7683
7684         btrfs_free_path(path);
7685         return ret;
7686 }
7687
7688 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7689                               struct extent_record *rec)
7690 {
7691         struct btrfs_trans_handle *trans;
7692         struct btrfs_root *root = fs_info->extent_root;
7693         struct btrfs_path *path;
7694         struct btrfs_extent_item *ei;
7695         struct btrfs_key key;
7696         u64 flags;
7697         int ret = 0;
7698
7699         key.objectid = rec->start;
7700         if (rec->metadata) {
7701                 key.type = BTRFS_METADATA_ITEM_KEY;
7702                 key.offset = rec->info_level;
7703         } else {
7704                 key.type = BTRFS_EXTENT_ITEM_KEY;
7705                 key.offset = rec->max_size;
7706         }
7707
7708         path = btrfs_alloc_path();
7709         if (!path)
7710                 return -ENOMEM;
7711
7712         trans = btrfs_start_transaction(root, 0);
7713         if (IS_ERR(trans)) {
7714                 btrfs_free_path(path);
7715                 return PTR_ERR(trans);
7716         }
7717
7718         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7719         if (ret < 0) {
7720                 btrfs_free_path(path);
7721                 btrfs_commit_transaction(trans, root);
7722                 return ret;
7723         } else if (ret) {
7724                 fprintf(stderr, "Didn't find extent for %llu\n",
7725                         (unsigned long long)rec->start);
7726                 btrfs_free_path(path);
7727                 btrfs_commit_transaction(trans, root);
7728                 return -ENOENT;
7729         }
7730
7731         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7732                             struct btrfs_extent_item);
7733         flags = btrfs_extent_flags(path->nodes[0], ei);
7734         if (rec->flag_block_full_backref) {
7735                 fprintf(stderr, "setting full backref on %llu\n",
7736                         (unsigned long long)key.objectid);
7737                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7738         } else {
7739                 fprintf(stderr, "clearing full backref on %llu\n",
7740                         (unsigned long long)key.objectid);
7741                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7742         }
7743         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7744         btrfs_mark_buffer_dirty(path->nodes[0]);
7745         btrfs_free_path(path);
7746         return btrfs_commit_transaction(trans, root);
7747 }
7748
7749 /* right now we only prune from the extent allocation tree */
7750 static int prune_one_block(struct btrfs_trans_handle *trans,
7751                            struct btrfs_fs_info *info,
7752                            struct btrfs_corrupt_block *corrupt)
7753 {
7754         int ret;
7755         struct btrfs_path path;
7756         struct extent_buffer *eb;
7757         u64 found;
7758         int slot;
7759         int nritems;
7760         int level = corrupt->level + 1;
7761
7762         btrfs_init_path(&path);
7763 again:
7764         /* we want to stop at the parent to our busted block */
7765         path.lowest_level = level;
7766
7767         ret = btrfs_search_slot(trans, info->extent_root,
7768                                 &corrupt->key, &path, -1, 1);
7769
7770         if (ret < 0)
7771                 goto out;
7772
7773         eb = path.nodes[level];
7774         if (!eb) {
7775                 ret = -ENOENT;
7776                 goto out;
7777         }
7778
7779         /*
7780          * hopefully the search gave us the block we want to prune,
7781          * lets try that first
7782          */
7783         slot = path.slots[level];
7784         found =  btrfs_node_blockptr(eb, slot);
7785         if (found == corrupt->cache.start)
7786                 goto del_ptr;
7787
7788         nritems = btrfs_header_nritems(eb);
7789
7790         /* the search failed, lets scan this node and hope we find it */
7791         for (slot = 0; slot < nritems; slot++) {
7792                 found =  btrfs_node_blockptr(eb, slot);
7793                 if (found == corrupt->cache.start)
7794                         goto del_ptr;
7795         }
7796         /*
7797          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7798          * to this block
7799          */
7800         if (eb == info->extent_root->node) {
7801                 ret = -ENOENT;
7802                 goto out;
7803         } else {
7804                 level++;
7805                 btrfs_release_path(&path);
7806                 goto again;
7807         }
7808
7809 del_ptr:
7810         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7811         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7812
7813 out:
7814         btrfs_release_path(&path);
7815         return ret;
7816 }
7817
7818 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7819 {
7820         struct btrfs_trans_handle *trans = NULL;
7821         struct cache_extent *cache;
7822         struct btrfs_corrupt_block *corrupt;
7823
7824         while (1) {
7825                 cache = search_cache_extent(info->corrupt_blocks, 0);
7826                 if (!cache)
7827                         break;
7828                 if (!trans) {
7829                         trans = btrfs_start_transaction(info->extent_root, 1);
7830                         if (IS_ERR(trans))
7831                                 return PTR_ERR(trans);
7832                 }
7833                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7834                 prune_one_block(trans, info, corrupt);
7835                 remove_cache_extent(info->corrupt_blocks, cache);
7836         }
7837         if (trans)
7838                 return btrfs_commit_transaction(trans, info->extent_root);
7839         return 0;
7840 }
7841
7842 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7843 {
7844         struct btrfs_block_group_cache *cache;
7845         u64 start, end;
7846         int ret;
7847
7848         while (1) {
7849                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7850                                             &start, &end, EXTENT_DIRTY);
7851                 if (ret)
7852                         break;
7853                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7854                                    GFP_NOFS);
7855         }
7856
7857         start = 0;
7858         while (1) {
7859                 cache = btrfs_lookup_first_block_group(fs_info, start);
7860                 if (!cache)
7861                         break;
7862                 if (cache->cached)
7863                         cache->cached = 0;
7864                 start = cache->key.objectid + cache->key.offset;
7865         }
7866 }
7867
7868 static int check_extent_refs(struct btrfs_root *root,
7869                              struct cache_tree *extent_cache)
7870 {
7871         struct extent_record *rec;
7872         struct cache_extent *cache;
7873         int err = 0;
7874         int ret = 0;
7875         int fixed = 0;
7876         int had_dups = 0;
7877         int recorded = 0;
7878
7879         if (repair) {
7880                 /*
7881                  * if we're doing a repair, we have to make sure
7882                  * we don't allocate from the problem extents.
7883                  * In the worst case, this will be all the
7884                  * extents in the FS
7885                  */
7886                 cache = search_cache_extent(extent_cache, 0);
7887                 while(cache) {
7888                         rec = container_of(cache, struct extent_record, cache);
7889                         set_extent_dirty(root->fs_info->excluded_extents,
7890                                          rec->start,
7891                                          rec->start + rec->max_size - 1,
7892                                          GFP_NOFS);
7893                         cache = next_cache_extent(cache);
7894                 }
7895
7896                 /* pin down all the corrupted blocks too */
7897                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7898                 while(cache) {
7899                         set_extent_dirty(root->fs_info->excluded_extents,
7900                                          cache->start,
7901                                          cache->start + cache->size - 1,
7902                                          GFP_NOFS);
7903                         cache = next_cache_extent(cache);
7904                 }
7905                 prune_corrupt_blocks(root->fs_info);
7906                 reset_cached_block_groups(root->fs_info);
7907         }
7908
7909         reset_cached_block_groups(root->fs_info);
7910
7911         /*
7912          * We need to delete any duplicate entries we find first otherwise we
7913          * could mess up the extent tree when we have backrefs that actually
7914          * belong to a different extent item and not the weird duplicate one.
7915          */
7916         while (repair && !list_empty(&duplicate_extents)) {
7917                 rec = to_extent_record(duplicate_extents.next);
7918                 list_del_init(&rec->list);
7919
7920                 /* Sometimes we can find a backref before we find an actual
7921                  * extent, so we need to process it a little bit to see if there
7922                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7923                  * if this is a backref screwup.  If we need to delete stuff
7924                  * process_duplicates() will return 0, otherwise it will return
7925                  * 1 and we
7926                  */
7927                 if (process_duplicates(root, extent_cache, rec))
7928                         continue;
7929                 ret = delete_duplicate_records(root, rec);
7930                 if (ret < 0)
7931                         return ret;
7932                 /*
7933                  * delete_duplicate_records will return the number of entries
7934                  * deleted, so if it's greater than 0 then we know we actually
7935                  * did something and we need to remove.
7936                  */
7937                 if (ret)
7938                         had_dups = 1;
7939         }
7940
7941         if (had_dups)
7942                 return -EAGAIN;
7943
7944         while(1) {
7945                 int cur_err = 0;
7946
7947                 fixed = 0;
7948                 recorded = 0;
7949                 cache = search_cache_extent(extent_cache, 0);
7950                 if (!cache)
7951                         break;
7952                 rec = container_of(cache, struct extent_record, cache);
7953                 if (rec->num_duplicates) {
7954                         fprintf(stderr, "extent item %llu has multiple extent "
7955                                 "items\n", (unsigned long long)rec->start);
7956                         err = 1;
7957                         cur_err = 1;
7958                 }
7959
7960                 if (rec->refs != rec->extent_item_refs) {
7961                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7962                                 (unsigned long long)rec->start,
7963                                 (unsigned long long)rec->nr);
7964                         fprintf(stderr, "extent item %llu, found %llu\n",
7965                                 (unsigned long long)rec->extent_item_refs,
7966                                 (unsigned long long)rec->refs);
7967                         ret = record_orphan_data_extents(root->fs_info, rec);
7968                         if (ret < 0)
7969                                 goto repair_abort;
7970                         if (ret == 0) {
7971                                 recorded = 1;
7972                         } else {
7973                                 /*
7974                                  * we can't use the extent to repair file
7975                                  * extent, let the fallback method handle it.
7976                                  */
7977                                 if (!fixed && repair) {
7978                                         ret = fixup_extent_refs(
7979                                                         root->fs_info,
7980                                                         extent_cache, rec);
7981                                         if (ret)
7982                                                 goto repair_abort;
7983                                         fixed = 1;
7984                                 }
7985                         }
7986                         err = 1;
7987                         cur_err = 1;
7988                 }
7989                 if (all_backpointers_checked(rec, 1)) {
7990                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7991                                 (unsigned long long)rec->start,
7992                                 (unsigned long long)rec->nr);
7993
7994                         if (!fixed && !recorded && repair) {
7995                                 ret = fixup_extent_refs(root->fs_info,
7996                                                         extent_cache, rec);
7997                                 if (ret)
7998                                         goto repair_abort;
7999                                 fixed = 1;
8000                         }
8001                         cur_err = 1;
8002                         err = 1;
8003                 }
8004                 if (!rec->owner_ref_checked) {
8005                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
8006                                 (unsigned long long)rec->start,
8007                                 (unsigned long long)rec->nr);
8008                         if (!fixed && !recorded && repair) {
8009                                 ret = fixup_extent_refs(root->fs_info,
8010                                                         extent_cache, rec);
8011                                 if (ret)
8012                                         goto repair_abort;
8013                                 fixed = 1;
8014                         }
8015                         err = 1;
8016                         cur_err = 1;
8017                 }
8018                 if (rec->bad_full_backref) {
8019                         fprintf(stderr, "bad full backref, on [%llu]\n",
8020                                 (unsigned long long)rec->start);
8021                         if (repair) {
8022                                 ret = fixup_extent_flags(root->fs_info, rec);
8023                                 if (ret)
8024                                         goto repair_abort;
8025                                 fixed = 1;
8026                         }
8027                         err = 1;
8028                         cur_err = 1;
8029                 }
8030                 /*
8031                  * Although it's not a extent ref's problem, we reuse this
8032                  * routine for error reporting.
8033                  * No repair function yet.
8034                  */
8035                 if (rec->crossing_stripes) {
8036                         fprintf(stderr,
8037                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8038                                 rec->start, rec->start + rec->max_size);
8039                         err = 1;
8040                         cur_err = 1;
8041                 }
8042
8043                 if (rec->wrong_chunk_type) {
8044                         fprintf(stderr,
8045                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
8046                                 rec->start, rec->start + rec->max_size);
8047                         err = 1;
8048                         cur_err = 1;
8049                 }
8050
8051                 remove_cache_extent(extent_cache, cache);
8052                 free_all_extent_backrefs(rec);
8053                 if (!init_extent_tree && repair && (!cur_err || fixed))
8054                         clear_extent_dirty(root->fs_info->excluded_extents,
8055                                            rec->start,
8056                                            rec->start + rec->max_size - 1,
8057                                            GFP_NOFS);
8058                 free(rec);
8059         }
8060 repair_abort:
8061         if (repair) {
8062                 if (ret && ret != -EAGAIN) {
8063                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8064                         exit(1);
8065                 } else if (!ret) {
8066                         struct btrfs_trans_handle *trans;
8067
8068                         root = root->fs_info->extent_root;
8069                         trans = btrfs_start_transaction(root, 1);
8070                         if (IS_ERR(trans)) {
8071                                 ret = PTR_ERR(trans);
8072                                 goto repair_abort;
8073                         }
8074
8075                         btrfs_fix_block_accounting(trans, root);
8076                         ret = btrfs_commit_transaction(trans, root);
8077                         if (ret)
8078                                 goto repair_abort;
8079                 }
8080                 if (err)
8081                         fprintf(stderr, "repaired damaged extent references\n");
8082                 return ret;
8083         }
8084         return err;
8085 }
8086
8087 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8088 {
8089         u64 stripe_size;
8090
8091         if (type & BTRFS_BLOCK_GROUP_RAID0) {
8092                 stripe_size = length;
8093                 stripe_size /= num_stripes;
8094         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8095                 stripe_size = length * 2;
8096                 stripe_size /= num_stripes;
8097         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8098                 stripe_size = length;
8099                 stripe_size /= (num_stripes - 1);
8100         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8101                 stripe_size = length;
8102                 stripe_size /= (num_stripes - 2);
8103         } else {
8104                 stripe_size = length;
8105         }
8106         return stripe_size;
8107 }
8108
8109 /*
8110  * Check the chunk with its block group/dev list ref:
8111  * Return 0 if all refs seems valid.
8112  * Return 1 if part of refs seems valid, need later check for rebuild ref
8113  * like missing block group and needs to search extent tree to rebuild them.
8114  * Return -1 if essential refs are missing and unable to rebuild.
8115  */
8116 static int check_chunk_refs(struct chunk_record *chunk_rec,
8117                             struct block_group_tree *block_group_cache,
8118                             struct device_extent_tree *dev_extent_cache,
8119                             int silent)
8120 {
8121         struct cache_extent *block_group_item;
8122         struct block_group_record *block_group_rec;
8123         struct cache_extent *dev_extent_item;
8124         struct device_extent_record *dev_extent_rec;
8125         u64 devid;
8126         u64 offset;
8127         u64 length;
8128         int metadump_v2 = 0;
8129         int i;
8130         int ret = 0;
8131
8132         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8133                                                chunk_rec->offset,
8134                                                chunk_rec->length);
8135         if (block_group_item) {
8136                 block_group_rec = container_of(block_group_item,
8137                                                struct block_group_record,
8138                                                cache);
8139                 if (chunk_rec->length != block_group_rec->offset ||
8140                     chunk_rec->offset != block_group_rec->objectid ||
8141                     (!metadump_v2 &&
8142                      chunk_rec->type_flags != block_group_rec->flags)) {
8143                         if (!silent)
8144                                 fprintf(stderr,
8145                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8146                                         chunk_rec->objectid,
8147                                         chunk_rec->type,
8148                                         chunk_rec->offset,
8149                                         chunk_rec->length,
8150                                         chunk_rec->offset,
8151                                         chunk_rec->type_flags,
8152                                         block_group_rec->objectid,
8153                                         block_group_rec->type,
8154                                         block_group_rec->offset,
8155                                         block_group_rec->offset,
8156                                         block_group_rec->objectid,
8157                                         block_group_rec->flags);
8158                         ret = -1;
8159                 } else {
8160                         list_del_init(&block_group_rec->list);
8161                         chunk_rec->bg_rec = block_group_rec;
8162                 }
8163         } else {
8164                 if (!silent)
8165                         fprintf(stderr,
8166                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8167                                 chunk_rec->objectid,
8168                                 chunk_rec->type,
8169                                 chunk_rec->offset,
8170                                 chunk_rec->length,
8171                                 chunk_rec->offset,
8172                                 chunk_rec->type_flags);
8173                 ret = 1;
8174         }
8175
8176         if (metadump_v2)
8177                 return ret;
8178
8179         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8180                                     chunk_rec->num_stripes);
8181         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8182                 devid = chunk_rec->stripes[i].devid;
8183                 offset = chunk_rec->stripes[i].offset;
8184                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8185                                                        devid, offset, length);
8186                 if (dev_extent_item) {
8187                         dev_extent_rec = container_of(dev_extent_item,
8188                                                 struct device_extent_record,
8189                                                 cache);
8190                         if (dev_extent_rec->objectid != devid ||
8191                             dev_extent_rec->offset != offset ||
8192                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8193                             dev_extent_rec->length != length) {
8194                                 if (!silent)
8195                                         fprintf(stderr,
8196                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8197                                                 chunk_rec->objectid,
8198                                                 chunk_rec->type,
8199                                                 chunk_rec->offset,
8200                                                 chunk_rec->stripes[i].devid,
8201                                                 chunk_rec->stripes[i].offset,
8202                                                 dev_extent_rec->objectid,
8203                                                 dev_extent_rec->offset,
8204                                                 dev_extent_rec->length);
8205                                 ret = -1;
8206                         } else {
8207                                 list_move(&dev_extent_rec->chunk_list,
8208                                           &chunk_rec->dextents);
8209                         }
8210                 } else {
8211                         if (!silent)
8212                                 fprintf(stderr,
8213                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8214                                         chunk_rec->objectid,
8215                                         chunk_rec->type,
8216                                         chunk_rec->offset,
8217                                         chunk_rec->stripes[i].devid,
8218                                         chunk_rec->stripes[i].offset);
8219                         ret = -1;
8220                 }
8221         }
8222         return ret;
8223 }
8224
8225 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8226 int check_chunks(struct cache_tree *chunk_cache,
8227                  struct block_group_tree *block_group_cache,
8228                  struct device_extent_tree *dev_extent_cache,
8229                  struct list_head *good, struct list_head *bad,
8230                  struct list_head *rebuild, int silent)
8231 {
8232         struct cache_extent *chunk_item;
8233         struct chunk_record *chunk_rec;
8234         struct block_group_record *bg_rec;
8235         struct device_extent_record *dext_rec;
8236         int err;
8237         int ret = 0;
8238
8239         chunk_item = first_cache_extent(chunk_cache);
8240         while (chunk_item) {
8241                 chunk_rec = container_of(chunk_item, struct chunk_record,
8242                                          cache);
8243                 err = check_chunk_refs(chunk_rec, block_group_cache,
8244                                        dev_extent_cache, silent);
8245                 if (err < 0)
8246                         ret = err;
8247                 if (err == 0 && good)
8248                         list_add_tail(&chunk_rec->list, good);
8249                 if (err > 0 && rebuild)
8250                         list_add_tail(&chunk_rec->list, rebuild);
8251                 if (err < 0 && bad)
8252                         list_add_tail(&chunk_rec->list, bad);
8253                 chunk_item = next_cache_extent(chunk_item);
8254         }
8255
8256         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8257                 if (!silent)
8258                         fprintf(stderr,
8259                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8260                                 bg_rec->objectid,
8261                                 bg_rec->offset,
8262                                 bg_rec->flags);
8263                 if (!ret)
8264                         ret = 1;
8265         }
8266
8267         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8268                             chunk_list) {
8269                 if (!silent)
8270                         fprintf(stderr,
8271                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8272                                 dext_rec->objectid,
8273                                 dext_rec->offset,
8274                                 dext_rec->length);
8275                 if (!ret)
8276                         ret = 1;
8277         }
8278         return ret;
8279 }
8280
8281
8282 static int check_device_used(struct device_record *dev_rec,
8283                              struct device_extent_tree *dext_cache)
8284 {
8285         struct cache_extent *cache;
8286         struct device_extent_record *dev_extent_rec;
8287         u64 total_byte = 0;
8288
8289         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8290         while (cache) {
8291                 dev_extent_rec = container_of(cache,
8292                                               struct device_extent_record,
8293                                               cache);
8294                 if (dev_extent_rec->objectid != dev_rec->devid)
8295                         break;
8296
8297                 list_del_init(&dev_extent_rec->device_list);
8298                 total_byte += dev_extent_rec->length;
8299                 cache = next_cache_extent(cache);
8300         }
8301
8302         if (total_byte != dev_rec->byte_used) {
8303                 fprintf(stderr,
8304                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8305                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8306                         dev_rec->type, dev_rec->offset);
8307                 return -1;
8308         } else {
8309                 return 0;
8310         }
8311 }
8312
8313 /* check btrfs_dev_item -> btrfs_dev_extent */
8314 static int check_devices(struct rb_root *dev_cache,
8315                          struct device_extent_tree *dev_extent_cache)
8316 {
8317         struct rb_node *dev_node;
8318         struct device_record *dev_rec;
8319         struct device_extent_record *dext_rec;
8320         int err;
8321         int ret = 0;
8322
8323         dev_node = rb_first(dev_cache);
8324         while (dev_node) {
8325                 dev_rec = container_of(dev_node, struct device_record, node);
8326                 err = check_device_used(dev_rec, dev_extent_cache);
8327                 if (err)
8328                         ret = err;
8329
8330                 dev_node = rb_next(dev_node);
8331         }
8332         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8333                             device_list) {
8334                 fprintf(stderr,
8335                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8336                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8337                 if (!ret)
8338                         ret = 1;
8339         }
8340         return ret;
8341 }
8342
8343 static int add_root_item_to_list(struct list_head *head,
8344                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8345                                   u8 level, u8 drop_level,
8346                                   int level_size, struct btrfs_key *drop_key)
8347 {
8348
8349         struct root_item_record *ri_rec;
8350         ri_rec = malloc(sizeof(*ri_rec));
8351         if (!ri_rec)
8352                 return -ENOMEM;
8353         ri_rec->bytenr = bytenr;
8354         ri_rec->objectid = objectid;
8355         ri_rec->level = level;
8356         ri_rec->level_size = level_size;
8357         ri_rec->drop_level = drop_level;
8358         ri_rec->last_snapshot = last_snapshot;
8359         if (drop_key)
8360                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8361         list_add_tail(&ri_rec->list, head);
8362
8363         return 0;
8364 }
8365
8366 static void free_root_item_list(struct list_head *list)
8367 {
8368         struct root_item_record *ri_rec;
8369
8370         while (!list_empty(list)) {
8371                 ri_rec = list_first_entry(list, struct root_item_record,
8372                                           list);
8373                 list_del_init(&ri_rec->list);
8374                 free(ri_rec);
8375         }
8376 }
8377
8378 static int deal_root_from_list(struct list_head *list,
8379                                struct btrfs_root *root,
8380                                struct block_info *bits,
8381                                int bits_nr,
8382                                struct cache_tree *pending,
8383                                struct cache_tree *seen,
8384                                struct cache_tree *reada,
8385                                struct cache_tree *nodes,
8386                                struct cache_tree *extent_cache,
8387                                struct cache_tree *chunk_cache,
8388                                struct rb_root *dev_cache,
8389                                struct block_group_tree *block_group_cache,
8390                                struct device_extent_tree *dev_extent_cache)
8391 {
8392         int ret = 0;
8393         u64 last;
8394
8395         while (!list_empty(list)) {
8396                 struct root_item_record *rec;
8397                 struct extent_buffer *buf;
8398                 rec = list_entry(list->next,
8399                                  struct root_item_record, list);
8400                 last = 0;
8401                 buf = read_tree_block(root->fs_info->tree_root,
8402                                       rec->bytenr, rec->level_size, 0);
8403                 if (!extent_buffer_uptodate(buf)) {
8404                         free_extent_buffer(buf);
8405                         ret = -EIO;
8406                         break;
8407                 }
8408                 add_root_to_pending(buf, extent_cache, pending,
8409                                     seen, nodes, rec->objectid);
8410                 /*
8411                  * To rebuild extent tree, we need deal with snapshot
8412                  * one by one, otherwise we deal with node firstly which
8413                  * can maximize readahead.
8414                  */
8415                 while (1) {
8416                         ret = run_next_block(root, bits, bits_nr, &last,
8417                                              pending, seen, reada, nodes,
8418                                              extent_cache, chunk_cache,
8419                                              dev_cache, block_group_cache,
8420                                              dev_extent_cache, rec);
8421                         if (ret != 0)
8422                                 break;
8423                 }
8424                 free_extent_buffer(buf);
8425                 list_del(&rec->list);
8426                 free(rec);
8427                 if (ret < 0)
8428                         break;
8429         }
8430         while (ret >= 0) {
8431                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8432                                      reada, nodes, extent_cache, chunk_cache,
8433                                      dev_cache, block_group_cache,
8434                                      dev_extent_cache, NULL);
8435                 if (ret != 0) {
8436                         if (ret > 0)
8437                                 ret = 0;
8438                         break;
8439                 }
8440         }
8441         return ret;
8442 }
8443
8444 static int check_chunks_and_extents(struct btrfs_root *root)
8445 {
8446         struct rb_root dev_cache;
8447         struct cache_tree chunk_cache;
8448         struct block_group_tree block_group_cache;
8449         struct device_extent_tree dev_extent_cache;
8450         struct cache_tree extent_cache;
8451         struct cache_tree seen;
8452         struct cache_tree pending;
8453         struct cache_tree reada;
8454         struct cache_tree nodes;
8455         struct extent_io_tree excluded_extents;
8456         struct cache_tree corrupt_blocks;
8457         struct btrfs_path path;
8458         struct btrfs_key key;
8459         struct btrfs_key found_key;
8460         int ret, err = 0;
8461         struct block_info *bits;
8462         int bits_nr;
8463         struct extent_buffer *leaf;
8464         int slot;
8465         struct btrfs_root_item ri;
8466         struct list_head dropping_trees;
8467         struct list_head normal_trees;
8468         struct btrfs_root *root1;
8469         u64 objectid;
8470         u32 level_size;
8471         u8 level;
8472
8473         dev_cache = RB_ROOT;
8474         cache_tree_init(&chunk_cache);
8475         block_group_tree_init(&block_group_cache);
8476         device_extent_tree_init(&dev_extent_cache);
8477
8478         cache_tree_init(&extent_cache);
8479         cache_tree_init(&seen);
8480         cache_tree_init(&pending);
8481         cache_tree_init(&nodes);
8482         cache_tree_init(&reada);
8483         cache_tree_init(&corrupt_blocks);
8484         extent_io_tree_init(&excluded_extents);
8485         INIT_LIST_HEAD(&dropping_trees);
8486         INIT_LIST_HEAD(&normal_trees);
8487
8488         if (repair) {
8489                 root->fs_info->excluded_extents = &excluded_extents;
8490                 root->fs_info->fsck_extent_cache = &extent_cache;
8491                 root->fs_info->free_extent_hook = free_extent_hook;
8492                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8493         }
8494
8495         bits_nr = 1024;
8496         bits = malloc(bits_nr * sizeof(struct block_info));
8497         if (!bits) {
8498                 perror("malloc");
8499                 exit(1);
8500         }
8501
8502         if (ctx.progress_enabled) {
8503                 ctx.tp = TASK_EXTENTS;
8504                 task_start(ctx.info);
8505         }
8506
8507 again:
8508         root1 = root->fs_info->tree_root;
8509         level = btrfs_header_level(root1->node);
8510         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8511                                     root1->node->start, 0, level, 0,
8512                                     root1->nodesize, NULL);
8513         if (ret < 0)
8514                 goto out;
8515         root1 = root->fs_info->chunk_root;
8516         level = btrfs_header_level(root1->node);
8517         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8518                                     root1->node->start, 0, level, 0,
8519                                     root1->nodesize, NULL);
8520         if (ret < 0)
8521                 goto out;
8522         btrfs_init_path(&path);
8523         key.offset = 0;
8524         key.objectid = 0;
8525         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8526         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8527                                         &key, &path, 0, 0);
8528         if (ret < 0)
8529                 goto out;
8530         while(1) {
8531                 leaf = path.nodes[0];
8532                 slot = path.slots[0];
8533                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8534                         ret = btrfs_next_leaf(root, &path);
8535                         if (ret != 0)
8536                                 break;
8537                         leaf = path.nodes[0];
8538                         slot = path.slots[0];
8539                 }
8540                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8541                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8542                         unsigned long offset;
8543                         u64 last_snapshot;
8544
8545                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8546                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8547                         last_snapshot = btrfs_root_last_snapshot(&ri);
8548                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8549                                 level = btrfs_root_level(&ri);
8550                                 level_size = root->nodesize;
8551                                 ret = add_root_item_to_list(&normal_trees,
8552                                                 found_key.objectid,
8553                                                 btrfs_root_bytenr(&ri),
8554                                                 last_snapshot, level,
8555                                                 0, level_size, NULL);
8556                                 if (ret < 0)
8557                                         goto out;
8558                         } else {
8559                                 level = btrfs_root_level(&ri);
8560                                 level_size = root->nodesize;
8561                                 objectid = found_key.objectid;
8562                                 btrfs_disk_key_to_cpu(&found_key,
8563                                                       &ri.drop_progress);
8564                                 ret = add_root_item_to_list(&dropping_trees,
8565                                                 objectid,
8566                                                 btrfs_root_bytenr(&ri),
8567                                                 last_snapshot, level,
8568                                                 ri.drop_level,
8569                                                 level_size, &found_key);
8570                                 if (ret < 0)
8571                                         goto out;
8572                         }
8573                 }
8574                 path.slots[0]++;
8575         }
8576         btrfs_release_path(&path);
8577
8578         /*
8579          * check_block can return -EAGAIN if it fixes something, please keep
8580          * this in mind when dealing with return values from these functions, if
8581          * we get -EAGAIN we want to fall through and restart the loop.
8582          */
8583         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8584                                   &seen, &reada, &nodes, &extent_cache,
8585                                   &chunk_cache, &dev_cache, &block_group_cache,
8586                                   &dev_extent_cache);
8587         if (ret < 0) {
8588                 if (ret == -EAGAIN)
8589                         goto loop;
8590                 goto out;
8591         }
8592         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8593                                   &pending, &seen, &reada, &nodes,
8594                                   &extent_cache, &chunk_cache, &dev_cache,
8595                                   &block_group_cache, &dev_extent_cache);
8596         if (ret < 0) {
8597                 if (ret == -EAGAIN)
8598                         goto loop;
8599                 goto out;
8600         }
8601
8602         ret = check_chunks(&chunk_cache, &block_group_cache,
8603                            &dev_extent_cache, NULL, NULL, NULL, 0);
8604         if (ret) {
8605                 if (ret == -EAGAIN)
8606                         goto loop;
8607                 err = ret;
8608         }
8609
8610         ret = check_extent_refs(root, &extent_cache);
8611         if (ret < 0) {
8612                 if (ret == -EAGAIN)
8613                         goto loop;
8614                 goto out;
8615         }
8616
8617         ret = check_devices(&dev_cache, &dev_extent_cache);
8618         if (ret && err)
8619                 ret = err;
8620
8621 out:
8622         task_stop(ctx.info);
8623         if (repair) {
8624                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8625                 extent_io_tree_cleanup(&excluded_extents);
8626                 root->fs_info->fsck_extent_cache = NULL;
8627                 root->fs_info->free_extent_hook = NULL;
8628                 root->fs_info->corrupt_blocks = NULL;
8629                 root->fs_info->excluded_extents = NULL;
8630         }
8631         free(bits);
8632         free_chunk_cache_tree(&chunk_cache);
8633         free_device_cache_tree(&dev_cache);
8634         free_block_group_tree(&block_group_cache);
8635         free_device_extent_tree(&dev_extent_cache);
8636         free_extent_cache_tree(&seen);
8637         free_extent_cache_tree(&pending);
8638         free_extent_cache_tree(&reada);
8639         free_extent_cache_tree(&nodes);
8640         return ret;
8641 loop:
8642         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8643         free_extent_cache_tree(&seen);
8644         free_extent_cache_tree(&pending);
8645         free_extent_cache_tree(&reada);
8646         free_extent_cache_tree(&nodes);
8647         free_chunk_cache_tree(&chunk_cache);
8648         free_block_group_tree(&block_group_cache);
8649         free_device_cache_tree(&dev_cache);
8650         free_device_extent_tree(&dev_extent_cache);
8651         free_extent_record_cache(root->fs_info, &extent_cache);
8652         free_root_item_list(&normal_trees);
8653         free_root_item_list(&dropping_trees);
8654         extent_io_tree_cleanup(&excluded_extents);
8655         goto again;
8656 }
8657
8658 /*
8659  * Check backrefs of a tree block given by @bytenr or @eb.
8660  *
8661  * @root:       the root containing the @bytenr or @eb
8662  * @eb:         tree block extent buffer, can be NULL
8663  * @bytenr:     bytenr of the tree block to search
8664  * @level:      tree level of the tree block
8665  * @owner:      owner of the tree block
8666  *
8667  * Return >0 for any error found and output error message
8668  * Return 0 for no error found
8669  */
8670 static int check_tree_block_ref(struct btrfs_root *root,
8671                                 struct extent_buffer *eb, u64 bytenr,
8672                                 int level, u64 owner)
8673 {
8674         struct btrfs_key key;
8675         struct btrfs_root *extent_root = root->fs_info->extent_root;
8676         struct btrfs_path path;
8677         struct btrfs_extent_item *ei;
8678         struct btrfs_extent_inline_ref *iref;
8679         struct extent_buffer *leaf;
8680         unsigned long end;
8681         unsigned long ptr;
8682         int slot;
8683         int skinny_level;
8684         int type;
8685         u32 nodesize = root->nodesize;
8686         u32 item_size;
8687         u64 offset;
8688         int found_ref = 0;
8689         int err = 0;
8690         int ret;
8691
8692         btrfs_init_path(&path);
8693         key.objectid = bytenr;
8694         if (btrfs_fs_incompat(root->fs_info,
8695                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8696                 key.type = BTRFS_METADATA_ITEM_KEY;
8697         else
8698                 key.type = BTRFS_EXTENT_ITEM_KEY;
8699         key.offset = (u64)-1;
8700
8701         /* Search for the backref in extent tree */
8702         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8703         if (ret < 0) {
8704                 err |= BACKREF_MISSING;
8705                 goto out;
8706         }
8707         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8708         if (ret) {
8709                 err |= BACKREF_MISSING;
8710                 goto out;
8711         }
8712
8713         leaf = path.nodes[0];
8714         slot = path.slots[0];
8715         btrfs_item_key_to_cpu(leaf, &key, slot);
8716
8717         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8718
8719         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8720                 skinny_level = (int)key.offset;
8721                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8722         } else {
8723                 struct btrfs_tree_block_info *info;
8724
8725                 info = (struct btrfs_tree_block_info *)(ei + 1);
8726                 skinny_level = btrfs_tree_block_level(leaf, info);
8727                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8728         }
8729
8730         if (eb) {
8731                 u64 header_gen;
8732                 u64 extent_gen;
8733
8734                 if (!(btrfs_extent_flags(leaf, ei) &
8735                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8736                         error(
8737                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8738                                 key.objectid, nodesize,
8739                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8740                         err = BACKREF_MISMATCH;
8741                 }
8742                 header_gen = btrfs_header_generation(eb);
8743                 extent_gen = btrfs_extent_generation(leaf, ei);
8744                 if (header_gen != extent_gen) {
8745                         error(
8746         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8747                                 key.objectid, nodesize, header_gen,
8748                                 extent_gen);
8749                         err = BACKREF_MISMATCH;
8750                 }
8751                 if (level != skinny_level) {
8752                         error(
8753                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8754                                 key.objectid, nodesize, level, skinny_level);
8755                         err = BACKREF_MISMATCH;
8756                 }
8757                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8758                         error(
8759                         "extent[%llu %u] is referred by other roots than %llu",
8760                                 key.objectid, nodesize, root->objectid);
8761                         err = BACKREF_MISMATCH;
8762                 }
8763         }
8764
8765         /*
8766          * Iterate the extent/metadata item to find the exact backref
8767          */
8768         item_size = btrfs_item_size_nr(leaf, slot);
8769         ptr = (unsigned long)iref;
8770         end = (unsigned long)ei + item_size;
8771         while (ptr < end) {
8772                 iref = (struct btrfs_extent_inline_ref *)ptr;
8773                 type = btrfs_extent_inline_ref_type(leaf, iref);
8774                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8775
8776                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8777                         (offset == root->objectid || offset == owner)) {
8778                         found_ref = 1;
8779                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8780                         /* Check if the backref points to valid referencer */
8781                         found_ref = !check_tree_block_ref(root, NULL, offset,
8782                                                           level + 1, owner);
8783                 }
8784
8785                 if (found_ref)
8786                         break;
8787                 ptr += btrfs_extent_inline_ref_size(type);
8788         }
8789
8790         /*
8791          * Inlined extent item doesn't have what we need, check
8792          * TREE_BLOCK_REF_KEY
8793          */
8794         if (!found_ref) {
8795                 btrfs_release_path(&path);
8796                 key.objectid = bytenr;
8797                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8798                 key.offset = root->objectid;
8799
8800                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8801                 if (!ret)
8802                         found_ref = 1;
8803         }
8804         if (!found_ref)
8805                 err |= BACKREF_MISSING;
8806 out:
8807         btrfs_release_path(&path);
8808         if (eb && (err & BACKREF_MISSING))
8809                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8810                         bytenr, nodesize, owner, level);
8811         return err;
8812 }
8813
8814 /*
8815  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8816  *
8817  * Return >0 any error found and output error message
8818  * Return 0 for no error found
8819  */
8820 static int check_extent_data_item(struct btrfs_root *root,
8821                                   struct extent_buffer *eb, int slot)
8822 {
8823         struct btrfs_file_extent_item *fi;
8824         struct btrfs_path path;
8825         struct btrfs_root *extent_root = root->fs_info->extent_root;
8826         struct btrfs_key fi_key;
8827         struct btrfs_key dbref_key;
8828         struct extent_buffer *leaf;
8829         struct btrfs_extent_item *ei;
8830         struct btrfs_extent_inline_ref *iref;
8831         struct btrfs_extent_data_ref *dref;
8832         u64 owner;
8833         u64 file_extent_gen;
8834         u64 disk_bytenr;
8835         u64 disk_num_bytes;
8836         u64 extent_num_bytes;
8837         u64 extent_flags;
8838         u64 extent_gen;
8839         u32 item_size;
8840         unsigned long end;
8841         unsigned long ptr;
8842         int type;
8843         u64 ref_root;
8844         int found_dbackref = 0;
8845         int err = 0;
8846         int ret;
8847
8848         btrfs_item_key_to_cpu(eb, &fi_key, slot);
8849         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8850         file_extent_gen = btrfs_file_extent_generation(eb, fi);
8851
8852         /* Nothing to check for hole and inline data extents */
8853         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8854             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8855                 return 0;
8856
8857         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8858         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8859         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8860
8861         /* Check unaligned disk_num_bytes and num_bytes */
8862         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8863                 error(
8864 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8865                         fi_key.objectid, fi_key.offset, disk_num_bytes,
8866                         root->sectorsize);
8867                 err |= BYTES_UNALIGNED;
8868         } else {
8869                 data_bytes_allocated += disk_num_bytes;
8870         }
8871         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8872                 error(
8873 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8874                         fi_key.objectid, fi_key.offset, extent_num_bytes,
8875                         root->sectorsize);
8876                 err |= BYTES_UNALIGNED;
8877         } else {
8878                 data_bytes_referenced += extent_num_bytes;
8879         }
8880         owner = btrfs_header_owner(eb);
8881
8882         /* Check the extent item of the file extent in extent tree */
8883         btrfs_init_path(&path);
8884         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8885         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8886         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8887
8888         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8889         if (ret) {
8890                 err |= BACKREF_MISSING;
8891                 goto error;
8892         }
8893
8894         leaf = path.nodes[0];
8895         slot = path.slots[0];
8896         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8897
8898         extent_flags = btrfs_extent_flags(leaf, ei);
8899         extent_gen = btrfs_extent_generation(leaf, ei);
8900
8901         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8902                 error(
8903                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8904                     disk_bytenr, disk_num_bytes,
8905                     BTRFS_EXTENT_FLAG_DATA);
8906                 err |= BACKREF_MISMATCH;
8907         }
8908
8909         if (file_extent_gen < extent_gen) {
8910                 error(
8911 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8912                         disk_bytenr, disk_num_bytes, file_extent_gen,
8913                         extent_gen);
8914                 err |= BACKREF_MISMATCH;
8915         }
8916
8917         /* Check data backref inside that extent item */
8918         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8919         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8920         ptr = (unsigned long)iref;
8921         end = (unsigned long)ei + item_size;
8922         while (ptr < end) {
8923                 iref = (struct btrfs_extent_inline_ref *)ptr;
8924                 type = btrfs_extent_inline_ref_type(leaf, iref);
8925                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8926
8927                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8928                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
8929                         if (ref_root == owner || ref_root == root->objectid)
8930                                 found_dbackref = 1;
8931                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8932                         found_dbackref = !check_tree_block_ref(root, NULL,
8933                                 btrfs_extent_inline_ref_offset(leaf, iref),
8934                                 0, owner);
8935                 }
8936
8937                 if (found_dbackref)
8938                         break;
8939                 ptr += btrfs_extent_inline_ref_size(type);
8940         }
8941
8942         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8943         if (!found_dbackref) {
8944                 btrfs_release_path(&path);
8945
8946                 btrfs_init_path(&path);
8947                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8948                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8949                 dbref_key.offset = hash_extent_data_ref(root->objectid,
8950                                 fi_key.objectid, fi_key.offset);
8951
8952                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8953                                         &dbref_key, &path, 0, 0);
8954                 if (!ret)
8955                         found_dbackref = 1;
8956         }
8957
8958         if (!found_dbackref)
8959                 err |= BACKREF_MISSING;
8960 error:
8961         btrfs_release_path(&path);
8962         if (err & BACKREF_MISSING) {
8963                 error("data extent[%llu %llu] backref lost",
8964                       disk_bytenr, disk_num_bytes);
8965         }
8966         return err;
8967 }
8968
8969 /*
8970  * Get real tree block level for the case like shared block
8971  * Return >= 0 as tree level
8972  * Return <0 for error
8973  */
8974 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8975 {
8976         struct extent_buffer *eb;
8977         struct btrfs_path path;
8978         struct btrfs_key key;
8979         struct btrfs_extent_item *ei;
8980         u64 flags;
8981         u64 transid;
8982         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8983         u8 backref_level;
8984         u8 header_level;
8985         int ret;
8986
8987         /* Search extent tree for extent generation and level */
8988         key.objectid = bytenr;
8989         key.type = BTRFS_METADATA_ITEM_KEY;
8990         key.offset = (u64)-1;
8991
8992         btrfs_init_path(&path);
8993         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8994         if (ret < 0)
8995                 goto release_out;
8996         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8997         if (ret < 0)
8998                 goto release_out;
8999         if (ret > 0) {
9000                 ret = -ENOENT;
9001                 goto release_out;
9002         }
9003
9004         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9005         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9006                             struct btrfs_extent_item);
9007         flags = btrfs_extent_flags(path.nodes[0], ei);
9008         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9009                 ret = -ENOENT;
9010                 goto release_out;
9011         }
9012
9013         /* Get transid for later read_tree_block() check */
9014         transid = btrfs_extent_generation(path.nodes[0], ei);
9015
9016         /* Get backref level as one source */
9017         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9018                 backref_level = key.offset;
9019         } else {
9020                 struct btrfs_tree_block_info *info;
9021
9022                 info = (struct btrfs_tree_block_info *)(ei + 1);
9023                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9024         }
9025         btrfs_release_path(&path);
9026
9027         /* Get level from tree block as an alternative source */
9028         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9029         if (!extent_buffer_uptodate(eb)) {
9030                 free_extent_buffer(eb);
9031                 return -EIO;
9032         }
9033         header_level = btrfs_header_level(eb);
9034         free_extent_buffer(eb);
9035
9036         if (header_level != backref_level)
9037                 return -EIO;
9038         return header_level;
9039
9040 release_out:
9041         btrfs_release_path(&path);
9042         return ret;
9043 }
9044
9045 /*
9046  * Check if a tree block backref is valid (points to a valid tree block)
9047  * if level == -1, level will be resolved
9048  * Return >0 for any error found and print error message
9049  */
9050 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9051                                     u64 bytenr, int level)
9052 {
9053         struct btrfs_root *root;
9054         struct btrfs_key key;
9055         struct btrfs_path path;
9056         struct extent_buffer *eb;
9057         struct extent_buffer *node;
9058         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9059         int err = 0;
9060         int ret;
9061
9062         /* Query level for level == -1 special case */
9063         if (level == -1)
9064                 level = query_tree_block_level(fs_info, bytenr);
9065         if (level < 0) {
9066                 err |= REFERENCER_MISSING;
9067                 goto out;
9068         }
9069
9070         key.objectid = root_id;
9071         key.type = BTRFS_ROOT_ITEM_KEY;
9072         key.offset = (u64)-1;
9073
9074         root = btrfs_read_fs_root(fs_info, &key);
9075         if (IS_ERR(root)) {
9076                 err |= REFERENCER_MISSING;
9077                 goto out;
9078         }
9079
9080         /* Read out the tree block to get item/node key */
9081         eb = read_tree_block(root, bytenr, root->nodesize, 0);
9082         if (!extent_buffer_uptodate(eb)) {
9083                 err |= REFERENCER_MISSING;
9084                 free_extent_buffer(eb);
9085                 goto out;
9086         }
9087
9088         /* Empty tree, no need to check key */
9089         if (!btrfs_header_nritems(eb) && !level) {
9090                 free_extent_buffer(eb);
9091                 goto out;
9092         }
9093
9094         if (level)
9095                 btrfs_node_key_to_cpu(eb, &key, 0);
9096         else
9097                 btrfs_item_key_to_cpu(eb, &key, 0);
9098
9099         free_extent_buffer(eb);
9100
9101         btrfs_init_path(&path);
9102         /* Search with the first key, to ensure we can reach it */
9103         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9104         if (ret) {
9105                 err |= REFERENCER_MISSING;
9106                 goto release_out;
9107         }
9108
9109         node = path.nodes[level];
9110         if (btrfs_header_bytenr(node) != bytenr) {
9111                 error(
9112         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9113                         bytenr, nodesize, bytenr,
9114                         btrfs_header_bytenr(node));
9115                 err |= REFERENCER_MISMATCH;
9116         }
9117         if (btrfs_header_level(node) != level) {
9118                 error(
9119         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9120                         bytenr, nodesize, level,
9121                         btrfs_header_level(node));
9122                 err |= REFERENCER_MISMATCH;
9123         }
9124
9125 release_out:
9126         btrfs_release_path(&path);
9127 out:
9128         if (err & REFERENCER_MISSING) {
9129                 if (level < 0)
9130                         error("extent [%llu %d] lost referencer (owner: %llu)",
9131                                 bytenr, nodesize, root_id);
9132                 else
9133                         error(
9134                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9135                                 bytenr, nodesize, root_id, level);
9136         }
9137
9138         return err;
9139 }
9140
9141 /*
9142  * Check referencer for shared block backref
9143  * If level == -1, this function will resolve the level.
9144  */
9145 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9146                                      u64 parent, u64 bytenr, int level)
9147 {
9148         struct extent_buffer *eb;
9149         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9150         u32 nr;
9151         int found_parent = 0;
9152         int i;
9153
9154         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9155         if (!extent_buffer_uptodate(eb))
9156                 goto out;
9157
9158         if (level == -1)
9159                 level = query_tree_block_level(fs_info, bytenr);
9160         if (level < 0)
9161                 goto out;
9162
9163         if (level + 1 != btrfs_header_level(eb))
9164                 goto out;
9165
9166         nr = btrfs_header_nritems(eb);
9167         for (i = 0; i < nr; i++) {
9168                 if (bytenr == btrfs_node_blockptr(eb, i)) {
9169                         found_parent = 1;
9170                         break;
9171                 }
9172         }
9173 out:
9174         free_extent_buffer(eb);
9175         if (!found_parent) {
9176                 error(
9177         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9178                         bytenr, nodesize, parent, level);
9179                 return REFERENCER_MISSING;
9180         }
9181         return 0;
9182 }
9183
9184 /*
9185  * Check referencer for normal (inlined) data ref
9186  * If len == 0, it will be resolved by searching in extent tree
9187  */
9188 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9189                                      u64 root_id, u64 objectid, u64 offset,
9190                                      u64 bytenr, u64 len, u32 count)
9191 {
9192         struct btrfs_root *root;
9193         struct btrfs_root *extent_root = fs_info->extent_root;
9194         struct btrfs_key key;
9195         struct btrfs_path path;
9196         struct extent_buffer *leaf;
9197         struct btrfs_file_extent_item *fi;
9198         u32 found_count = 0;
9199         int slot;
9200         int ret = 0;
9201
9202         if (!len) {
9203                 key.objectid = bytenr;
9204                 key.type = BTRFS_EXTENT_ITEM_KEY;
9205                 key.offset = (u64)-1;
9206
9207                 btrfs_init_path(&path);
9208                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9209                 if (ret < 0)
9210                         goto out;
9211                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9212                 if (ret)
9213                         goto out;
9214                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9215                 if (key.objectid != bytenr ||
9216                     key.type != BTRFS_EXTENT_ITEM_KEY)
9217                         goto out;
9218                 len = key.offset;
9219                 btrfs_release_path(&path);
9220         }
9221         key.objectid = root_id;
9222         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
9223         key.offset = (u64)-1;
9224         btrfs_init_path(&path);
9225
9226         root = btrfs_read_fs_root(fs_info, &key);
9227         if (IS_ERR(root))
9228                 goto out;
9229
9230         key.objectid = objectid;
9231         key.type = BTRFS_EXTENT_DATA_KEY;
9232         /*
9233          * It can be nasty as data backref offset is
9234          * file offset - file extent offset, which is smaller or
9235          * equal to original backref offset.  The only special case is
9236          * overflow.  So we need to special check and do further search.
9237          */
9238         key.offset = offset & (1ULL << 63) ? 0 : offset;
9239
9240         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9241         if (ret < 0)
9242                 goto out;
9243
9244         /*
9245          * Search afterwards to get correct one
9246          * NOTE: As we must do a comprehensive check on the data backref to
9247          * make sure the dref count also matches, we must iterate all file
9248          * extents for that inode.
9249          */
9250         while (1) {
9251                 leaf = path.nodes[0];
9252                 slot = path.slots[0];
9253
9254                 btrfs_item_key_to_cpu(leaf, &key, slot);
9255                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9256                         break;
9257                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9258                 /*
9259                  * Except normal disk bytenr and disk num bytes, we still
9260                  * need to do extra check on dbackref offset as
9261                  * dbackref offset = file_offset - file_extent_offset
9262                  */
9263                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9264                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9265                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9266                     offset)
9267                         found_count++;
9268
9269                 ret = btrfs_next_item(root, &path);
9270                 if (ret)
9271                         break;
9272         }
9273 out:
9274         btrfs_release_path(&path);
9275         if (found_count != count) {
9276                 error(
9277 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9278                         bytenr, len, root_id, objectid, offset, count, found_count);
9279                 return REFERENCER_MISSING;
9280         }
9281         return 0;
9282 }
9283
9284 /*
9285  * Check if the referencer of a shared data backref exists
9286  */
9287 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9288                                      u64 parent, u64 bytenr)
9289 {
9290         struct extent_buffer *eb;
9291         struct btrfs_key key;
9292         struct btrfs_file_extent_item *fi;
9293         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9294         u32 nr;
9295         int found_parent = 0;
9296         int i;
9297
9298         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9299         if (!extent_buffer_uptodate(eb))
9300                 goto out;
9301
9302         nr = btrfs_header_nritems(eb);
9303         for (i = 0; i < nr; i++) {
9304                 btrfs_item_key_to_cpu(eb, &key, i);
9305                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9306                         continue;
9307
9308                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9309                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9310                         continue;
9311
9312                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9313                         found_parent = 1;
9314                         break;
9315                 }
9316         }
9317
9318 out:
9319         free_extent_buffer(eb);
9320         if (!found_parent) {
9321                 error("shared extent %llu referencer lost (parent: %llu)",
9322                         bytenr, parent);
9323                 return REFERENCER_MISSING;
9324         }
9325         return 0;
9326 }
9327
9328 /*
9329  * This function will check a given extent item, including its backref and
9330  * itself (like crossing stripe boundary and type)
9331  *
9332  * Since we don't use extent_record anymore, introduce new error bit
9333  */
9334 static int check_extent_item(struct btrfs_fs_info *fs_info,
9335                              struct extent_buffer *eb, int slot)
9336 {
9337         struct btrfs_extent_item *ei;
9338         struct btrfs_extent_inline_ref *iref;
9339         struct btrfs_extent_data_ref *dref;
9340         unsigned long end;
9341         unsigned long ptr;
9342         int type;
9343         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9344         u32 item_size = btrfs_item_size_nr(eb, slot);
9345         u64 flags;
9346         u64 offset;
9347         int metadata = 0;
9348         int level;
9349         struct btrfs_key key;
9350         int ret;
9351         int err = 0;
9352
9353         btrfs_item_key_to_cpu(eb, &key, slot);
9354         if (key.type == BTRFS_EXTENT_ITEM_KEY)
9355                 bytes_used += key.offset;
9356         else
9357                 bytes_used += nodesize;
9358
9359         if (item_size < sizeof(*ei)) {
9360                 /*
9361                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9362                  * old thing when on disk format is still un-determined.
9363                  * No need to care about it anymore
9364                  */
9365                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9366                 return -ENOTTY;
9367         }
9368
9369         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9370         flags = btrfs_extent_flags(eb, ei);
9371
9372         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9373                 metadata = 1;
9374         if (metadata && check_crossing_stripes(key.objectid, eb->len)) {
9375                 error("bad metadata [%llu, %llu) crossing stripe boundary",
9376                       key.objectid, key.objectid + nodesize);
9377                 err |= CROSSING_STRIPE_BOUNDARY;
9378         }
9379
9380         ptr = (unsigned long)(ei + 1);
9381
9382         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9383                 /* Old EXTENT_ITEM metadata */
9384                 struct btrfs_tree_block_info *info;
9385
9386                 info = (struct btrfs_tree_block_info *)ptr;
9387                 level = btrfs_tree_block_level(eb, info);
9388                 ptr += sizeof(struct btrfs_tree_block_info);
9389         } else {
9390                 /* New METADATA_ITEM */
9391                 level = key.offset;
9392         }
9393         end = (unsigned long)ei + item_size;
9394
9395         if (ptr >= end) {
9396                 err |= ITEM_SIZE_MISMATCH;
9397                 goto out;
9398         }
9399
9400         /* Now check every backref in this extent item */
9401 next:
9402         iref = (struct btrfs_extent_inline_ref *)ptr;
9403         type = btrfs_extent_inline_ref_type(eb, iref);
9404         offset = btrfs_extent_inline_ref_offset(eb, iref);
9405         switch (type) {
9406         case BTRFS_TREE_BLOCK_REF_KEY:
9407                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9408                                                level);
9409                 err |= ret;
9410                 break;
9411         case BTRFS_SHARED_BLOCK_REF_KEY:
9412                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9413                                                  level);
9414                 err |= ret;
9415                 break;
9416         case BTRFS_EXTENT_DATA_REF_KEY:
9417                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9418                 ret = check_extent_data_backref(fs_info,
9419                                 btrfs_extent_data_ref_root(eb, dref),
9420                                 btrfs_extent_data_ref_objectid(eb, dref),
9421                                 btrfs_extent_data_ref_offset(eb, dref),
9422                                 key.objectid, key.offset,
9423                                 btrfs_extent_data_ref_count(eb, dref));
9424                 err |= ret;
9425                 break;
9426         case BTRFS_SHARED_DATA_REF_KEY:
9427                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9428                 err |= ret;
9429                 break;
9430         default:
9431                 error("extent[%llu %d %llu] has unknown ref type: %d",
9432                         key.objectid, key.type, key.offset, type);
9433                 err |= UNKNOWN_TYPE;
9434                 goto out;
9435         }
9436
9437         ptr += btrfs_extent_inline_ref_size(type);
9438         if (ptr < end)
9439                 goto next;
9440
9441 out:
9442         return err;
9443 }
9444
9445 /*
9446  * Check if a dev extent item is referred correctly by its chunk
9447  */
9448 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9449                                  struct extent_buffer *eb, int slot)
9450 {
9451         struct btrfs_root *chunk_root = fs_info->chunk_root;
9452         struct btrfs_dev_extent *ptr;
9453         struct btrfs_path path;
9454         struct btrfs_key chunk_key;
9455         struct btrfs_key devext_key;
9456         struct btrfs_chunk *chunk;
9457         struct extent_buffer *l;
9458         int num_stripes;
9459         u64 length;
9460         int i;
9461         int found_chunk = 0;
9462         int ret;
9463
9464         btrfs_item_key_to_cpu(eb, &devext_key, slot);
9465         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9466         length = btrfs_dev_extent_length(eb, ptr);
9467
9468         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9469         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9470         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9471
9472         btrfs_init_path(&path);
9473         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9474         if (ret)
9475                 goto out;
9476
9477         l = path.nodes[0];
9478         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9479         if (btrfs_chunk_length(l, chunk) != length)
9480                 goto out;
9481
9482         num_stripes = btrfs_chunk_num_stripes(l, chunk);
9483         for (i = 0; i < num_stripes; i++) {
9484                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9485                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9486
9487                 if (devid == devext_key.objectid &&
9488                     offset == devext_key.offset) {
9489                         found_chunk = 1;
9490                         break;
9491                 }
9492         }
9493 out:
9494         btrfs_release_path(&path);
9495         if (!found_chunk) {
9496                 error(
9497                 "device extent[%llu, %llu, %llu] did not find the related chunk",
9498                         devext_key.objectid, devext_key.offset, length);
9499                 return REFERENCER_MISSING;
9500         }
9501         return 0;
9502 }
9503
9504 /*
9505  * Check if the used space is correct with the dev item
9506  */
9507 static int check_dev_item(struct btrfs_fs_info *fs_info,
9508                           struct extent_buffer *eb, int slot)
9509 {
9510         struct btrfs_root *dev_root = fs_info->dev_root;
9511         struct btrfs_dev_item *dev_item;
9512         struct btrfs_path path;
9513         struct btrfs_key key;
9514         struct btrfs_dev_extent *ptr;
9515         u64 dev_id;
9516         u64 used;
9517         u64 total = 0;
9518         int ret;
9519
9520         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9521         dev_id = btrfs_device_id(eb, dev_item);
9522         used = btrfs_device_bytes_used(eb, dev_item);
9523
9524         key.objectid = dev_id;
9525         key.type = BTRFS_DEV_EXTENT_KEY;
9526         key.offset = 0;
9527
9528         btrfs_init_path(&path);
9529         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9530         if (ret < 0) {
9531                 btrfs_item_key_to_cpu(eb, &key, slot);
9532                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9533                         key.objectid, key.type, key.offset);
9534                 btrfs_release_path(&path);
9535                 return REFERENCER_MISSING;
9536         }
9537
9538         /* Iterate dev_extents to calculate the used space of a device */
9539         while (1) {
9540                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9541
9542                 if (key.objectid > dev_id)
9543                         break;
9544                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9545                         goto next;
9546
9547                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9548                                      struct btrfs_dev_extent);
9549                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9550 next:
9551                 ret = btrfs_next_item(dev_root, &path);
9552                 if (ret)
9553                         break;
9554         }
9555         btrfs_release_path(&path);
9556
9557         if (used != total) {
9558                 btrfs_item_key_to_cpu(eb, &key, slot);
9559                 error(
9560 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9561                         total, used, BTRFS_ROOT_TREE_OBJECTID,
9562                         BTRFS_DEV_EXTENT_KEY, dev_id);
9563                 return ACCOUNTING_MISMATCH;
9564         }
9565         return 0;
9566 }
9567
9568 /*
9569  * Check a block group item with its referener (chunk) and its used space
9570  * with extent/metadata item
9571  */
9572 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9573                                   struct extent_buffer *eb, int slot)
9574 {
9575         struct btrfs_root *extent_root = fs_info->extent_root;
9576         struct btrfs_root *chunk_root = fs_info->chunk_root;
9577         struct btrfs_block_group_item *bi;
9578         struct btrfs_block_group_item bg_item;
9579         struct btrfs_path path;
9580         struct btrfs_key bg_key;
9581         struct btrfs_key chunk_key;
9582         struct btrfs_key extent_key;
9583         struct btrfs_chunk *chunk;
9584         struct extent_buffer *leaf;
9585         struct btrfs_extent_item *ei;
9586         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9587         u64 flags;
9588         u64 bg_flags;
9589         u64 used;
9590         u64 total = 0;
9591         int ret;
9592         int err = 0;
9593
9594         btrfs_item_key_to_cpu(eb, &bg_key, slot);
9595         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9596         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9597         used = btrfs_block_group_used(&bg_item);
9598         bg_flags = btrfs_block_group_flags(&bg_item);
9599
9600         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9601         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9602         chunk_key.offset = bg_key.objectid;
9603
9604         btrfs_init_path(&path);
9605         /* Search for the referencer chunk */
9606         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9607         if (ret) {
9608                 error(
9609                 "block group[%llu %llu] did not find the related chunk item",
9610                         bg_key.objectid, bg_key.offset);
9611                 err |= REFERENCER_MISSING;
9612         } else {
9613                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9614                                         struct btrfs_chunk);
9615                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9616                                                 bg_key.offset) {
9617                         error(
9618         "block group[%llu %llu] related chunk item length does not match",
9619                                 bg_key.objectid, bg_key.offset);
9620                         err |= REFERENCER_MISMATCH;
9621                 }
9622         }
9623         btrfs_release_path(&path);
9624
9625         /* Search from the block group bytenr */
9626         extent_key.objectid = bg_key.objectid;
9627         extent_key.type = 0;
9628         extent_key.offset = 0;
9629
9630         btrfs_init_path(&path);
9631         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9632         if (ret < 0)
9633                 goto out;
9634
9635         /* Iterate extent tree to account used space */
9636         while (1) {
9637                 leaf = path.nodes[0];
9638                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9639                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9640                         break;
9641
9642                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9643                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9644                         goto next;
9645                 if (extent_key.objectid < bg_key.objectid)
9646                         goto next;
9647
9648                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9649                         total += nodesize;
9650                 else
9651                         total += extent_key.offset;
9652
9653                 ei = btrfs_item_ptr(leaf, path.slots[0],
9654                                     struct btrfs_extent_item);
9655                 flags = btrfs_extent_flags(leaf, ei);
9656                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9657                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9658                                 error(
9659                         "bad extent[%llu, %llu) type mismatch with chunk",
9660                                         extent_key.objectid,
9661                                         extent_key.objectid + extent_key.offset);
9662                                 err |= CHUNK_TYPE_MISMATCH;
9663                         }
9664                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9665                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9666                                     BTRFS_BLOCK_GROUP_METADATA))) {
9667                                 error(
9668                         "bad extent[%llu, %llu) type mismatch with chunk",
9669                                         extent_key.objectid,
9670                                         extent_key.objectid + nodesize);
9671                                 err |= CHUNK_TYPE_MISMATCH;
9672                         }
9673                 }
9674 next:
9675                 ret = btrfs_next_item(extent_root, &path);
9676                 if (ret)
9677                         break;
9678         }
9679
9680 out:
9681         btrfs_release_path(&path);
9682
9683         if (total != used) {
9684                 error(
9685                 "block group[%llu %llu] used %llu but extent items used %llu",
9686                         bg_key.objectid, bg_key.offset, used, total);
9687                 err |= ACCOUNTING_MISMATCH;
9688         }
9689         return err;
9690 }
9691
9692 /*
9693  * Check a chunk item.
9694  * Including checking all referred dev_extents and block group
9695  */
9696 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9697                             struct extent_buffer *eb, int slot)
9698 {
9699         struct btrfs_root *extent_root = fs_info->extent_root;
9700         struct btrfs_root *dev_root = fs_info->dev_root;
9701         struct btrfs_path path;
9702         struct btrfs_key chunk_key;
9703         struct btrfs_key bg_key;
9704         struct btrfs_key devext_key;
9705         struct btrfs_chunk *chunk;
9706         struct extent_buffer *leaf;
9707         struct btrfs_block_group_item *bi;
9708         struct btrfs_block_group_item bg_item;
9709         struct btrfs_dev_extent *ptr;
9710         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9711         u64 length;
9712         u64 chunk_end;
9713         u64 type;
9714         u64 profile;
9715         int num_stripes;
9716         u64 offset;
9717         u64 objectid;
9718         int i;
9719         int ret;
9720         int err = 0;
9721
9722         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9723         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9724         length = btrfs_chunk_length(eb, chunk);
9725         chunk_end = chunk_key.offset + length;
9726         if (!IS_ALIGNED(length, sectorsize)) {
9727                 error("chunk[%llu %llu) not aligned to %u",
9728                         chunk_key.offset, chunk_end, sectorsize);
9729                 err |= BYTES_UNALIGNED;
9730                 goto out;
9731         }
9732
9733         type = btrfs_chunk_type(eb, chunk);
9734         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9735         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9736                 error("chunk[%llu %llu) has no chunk type",
9737                         chunk_key.offset, chunk_end);
9738                 err |= UNKNOWN_TYPE;
9739         }
9740         if (profile && (profile & (profile - 1))) {
9741                 error("chunk[%llu %llu) multiple profiles detected: %llx",
9742                         chunk_key.offset, chunk_end, profile);
9743                 err |= UNKNOWN_TYPE;
9744         }
9745
9746         bg_key.objectid = chunk_key.offset;
9747         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9748         bg_key.offset = length;
9749
9750         btrfs_init_path(&path);
9751         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9752         if (ret) {
9753                 error(
9754                 "chunk[%llu %llu) did not find the related block group item",
9755                         chunk_key.offset, chunk_end);
9756                 err |= REFERENCER_MISSING;
9757         } else{
9758                 leaf = path.nodes[0];
9759                 bi = btrfs_item_ptr(leaf, path.slots[0],
9760                                     struct btrfs_block_group_item);
9761                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9762                                    sizeof(bg_item));
9763                 if (btrfs_block_group_flags(&bg_item) != type) {
9764                         error(
9765 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9766                                 chunk_key.offset, chunk_end, type,
9767                                 btrfs_block_group_flags(&bg_item));
9768                         err |= REFERENCER_MISSING;
9769                 }
9770         }
9771
9772         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9773         for (i = 0; i < num_stripes; i++) {
9774                 btrfs_release_path(&path);
9775                 btrfs_init_path(&path);
9776                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9777                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9778                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9779
9780                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9781                                         0, 0);
9782                 if (ret)
9783                         goto not_match_dev;
9784
9785                 leaf = path.nodes[0];
9786                 ptr = btrfs_item_ptr(leaf, path.slots[0],
9787                                      struct btrfs_dev_extent);
9788                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9789                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9790                 if (objectid != chunk_key.objectid ||
9791                     offset != chunk_key.offset ||
9792                     btrfs_dev_extent_length(leaf, ptr) != length)
9793                         goto not_match_dev;
9794                 continue;
9795 not_match_dev:
9796                 err |= BACKREF_MISSING;
9797                 error(
9798                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9799                         chunk_key.objectid, chunk_end, i);
9800                 continue;
9801         }
9802         btrfs_release_path(&path);
9803 out:
9804         return err;
9805 }
9806
9807 /*
9808  * Main entry function to check known items and update related accounting info
9809  */
9810 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9811 {
9812         struct btrfs_fs_info *fs_info = root->fs_info;
9813         struct btrfs_key key;
9814         int slot = 0;
9815         int type;
9816         struct btrfs_extent_data_ref *dref;
9817         int ret;
9818         int err = 0;
9819
9820 next:
9821         btrfs_item_key_to_cpu(eb, &key, slot);
9822         type = btrfs_key_type(&key);
9823
9824         switch (type) {
9825         case BTRFS_EXTENT_DATA_KEY:
9826                 ret = check_extent_data_item(root, eb, slot);
9827                 err |= ret;
9828                 break;
9829         case BTRFS_BLOCK_GROUP_ITEM_KEY:
9830                 ret = check_block_group_item(fs_info, eb, slot);
9831                 err |= ret;
9832                 break;
9833         case BTRFS_DEV_ITEM_KEY:
9834                 ret = check_dev_item(fs_info, eb, slot);
9835                 err |= ret;
9836                 break;
9837         case BTRFS_CHUNK_ITEM_KEY:
9838                 ret = check_chunk_item(fs_info, eb, slot);
9839                 err |= ret;
9840                 break;
9841         case BTRFS_DEV_EXTENT_KEY:
9842                 ret = check_dev_extent_item(fs_info, eb, slot);
9843                 err |= ret;
9844                 break;
9845         case BTRFS_EXTENT_ITEM_KEY:
9846         case BTRFS_METADATA_ITEM_KEY:
9847                 ret = check_extent_item(fs_info, eb, slot);
9848                 err |= ret;
9849                 break;
9850         case BTRFS_EXTENT_CSUM_KEY:
9851                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9852                 break;
9853         case BTRFS_TREE_BLOCK_REF_KEY:
9854                 ret = check_tree_block_backref(fs_info, key.offset,
9855                                                key.objectid, -1);
9856                 err |= ret;
9857                 break;
9858         case BTRFS_EXTENT_DATA_REF_KEY:
9859                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9860                 ret = check_extent_data_backref(fs_info,
9861                                 btrfs_extent_data_ref_root(eb, dref),
9862                                 btrfs_extent_data_ref_objectid(eb, dref),
9863                                 btrfs_extent_data_ref_offset(eb, dref),
9864                                 key.objectid, 0,
9865                                 btrfs_extent_data_ref_count(eb, dref));
9866                 err |= ret;
9867                 break;
9868         case BTRFS_SHARED_BLOCK_REF_KEY:
9869                 ret = check_shared_block_backref(fs_info, key.offset,
9870                                                  key.objectid, -1);
9871                 err |= ret;
9872                 break;
9873         case BTRFS_SHARED_DATA_REF_KEY:
9874                 ret = check_shared_data_backref(fs_info, key.offset,
9875                                                 key.objectid);
9876                 err |= ret;
9877                 break;
9878         default:
9879                 break;
9880         }
9881
9882         if (++slot < btrfs_header_nritems(eb))
9883                 goto next;
9884
9885         return err;
9886 }
9887
9888 /*
9889  * Helper function for later fs/subvol tree check.  To determine if a tree
9890  * block should be checked.
9891  * This function will ensure only the direct referencer with lowest rootid to
9892  * check a fs/subvolume tree block.
9893  *
9894  * Backref check at extent tree would detect errors like missing subvolume
9895  * tree, so we can do aggressive check to reduce duplicated checks.
9896  */
9897 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9898 {
9899         struct btrfs_root *extent_root = root->fs_info->extent_root;
9900         struct btrfs_key key;
9901         struct btrfs_path path;
9902         struct extent_buffer *leaf;
9903         int slot;
9904         struct btrfs_extent_item *ei;
9905         unsigned long ptr;
9906         unsigned long end;
9907         int type;
9908         u32 item_size;
9909         u64 offset;
9910         struct btrfs_extent_inline_ref *iref;
9911         int ret;
9912
9913         btrfs_init_path(&path);
9914         key.objectid = btrfs_header_bytenr(eb);
9915         key.type = BTRFS_METADATA_ITEM_KEY;
9916         key.offset = (u64)-1;
9917
9918         /*
9919          * Any failure in backref resolving means we can't determine
9920          * whom the tree block belongs to.
9921          * So in that case, we need to check that tree block
9922          */
9923         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9924         if (ret < 0)
9925                 goto need_check;
9926
9927         ret = btrfs_previous_extent_item(extent_root, &path,
9928                                          btrfs_header_bytenr(eb));
9929         if (ret)
9930                 goto need_check;
9931
9932         leaf = path.nodes[0];
9933         slot = path.slots[0];
9934         btrfs_item_key_to_cpu(leaf, &key, slot);
9935         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9936
9937         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9938                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9939         } else {
9940                 struct btrfs_tree_block_info *info;
9941
9942                 info = (struct btrfs_tree_block_info *)(ei + 1);
9943                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9944         }
9945
9946         item_size = btrfs_item_size_nr(leaf, slot);
9947         ptr = (unsigned long)iref;
9948         end = (unsigned long)ei + item_size;
9949         while (ptr < end) {
9950                 iref = (struct btrfs_extent_inline_ref *)ptr;
9951                 type = btrfs_extent_inline_ref_type(leaf, iref);
9952                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9953
9954                 /*
9955                  * We only check the tree block if current root is
9956                  * the lowest referencer of it.
9957                  */
9958                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9959                     offset < root->objectid) {
9960                         btrfs_release_path(&path);
9961                         return 0;
9962                 }
9963
9964                 ptr += btrfs_extent_inline_ref_size(type);
9965         }
9966         /*
9967          * Normally we should also check keyed tree block ref, but that may be
9968          * very time consuming.  Inlined ref should already make us skip a lot
9969          * of refs now.  So skip search keyed tree block ref.
9970          */
9971
9972 need_check:
9973         btrfs_release_path(&path);
9974         return 1;
9975 }
9976
9977 /*
9978  * Traversal function for tree block. We will do:
9979  * 1) Skip shared fs/subvolume tree blocks
9980  * 2) Update related bytes accounting
9981  * 3) Pre-order traversal
9982  */
9983 static int traverse_tree_block(struct btrfs_root *root,
9984                                 struct extent_buffer *node)
9985 {
9986         struct extent_buffer *eb;
9987         int level;
9988         u64 nr;
9989         int i;
9990         int err = 0;
9991         int ret;
9992
9993         /*
9994          * Skip shared fs/subvolume tree block, in that case they will
9995          * be checked by referencer with lowest rootid
9996          */
9997         if (is_fstree(root->objectid) && !should_check(root, node))
9998                 return 0;
9999
10000         /* Update bytes accounting */
10001         total_btree_bytes += node->len;
10002         if (fs_root_objectid(btrfs_header_owner(node)))
10003                 total_fs_tree_bytes += node->len;
10004         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
10005                 total_extent_tree_bytes += node->len;
10006         if (!found_old_backref &&
10007             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
10008             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
10009             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
10010                 found_old_backref = 1;
10011
10012         /* pre-order tranversal, check itself first */
10013         level = btrfs_header_level(node);
10014         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
10015                                    btrfs_header_level(node),
10016                                    btrfs_header_owner(node));
10017         err |= ret;
10018         if (err)
10019                 error(
10020         "check %s failed root %llu bytenr %llu level %d, force continue check",
10021                         level ? "node":"leaf", root->objectid,
10022                         btrfs_header_bytenr(node), btrfs_header_level(node));
10023
10024         if (!level) {
10025                 btree_space_waste += btrfs_leaf_free_space(root, node);
10026                 ret = check_leaf_items(root, node);
10027                 err |= ret;
10028                 return err;
10029         }
10030
10031         nr = btrfs_header_nritems(node);
10032         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10033                 sizeof(struct btrfs_key_ptr);
10034
10035         /* Then check all its children */
10036         for (i = 0; i < nr; i++) {
10037                 u64 blocknr = btrfs_node_blockptr(node, i);
10038
10039                 /*
10040                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10041                  * to call the function itself.
10042                  */
10043                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10044                 if (extent_buffer_uptodate(eb)) {
10045                         ret = traverse_tree_block(root, eb);
10046                         err |= ret;
10047                 }
10048                 free_extent_buffer(eb);
10049         }
10050
10051         return err;
10052 }
10053
10054 /*
10055  * Low memory usage version check_chunks_and_extents.
10056  */
10057 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10058 {
10059         struct btrfs_path path;
10060         struct btrfs_key key;
10061         struct btrfs_root *root1;
10062         struct btrfs_root *cur_root;
10063         int err = 0;
10064         int ret;
10065
10066         root1 = root->fs_info->chunk_root;
10067         ret = traverse_tree_block(root1, root1->node);
10068         err |= ret;
10069
10070         root1 = root->fs_info->tree_root;
10071         ret = traverse_tree_block(root1, root1->node);
10072         err |= ret;
10073
10074         btrfs_init_path(&path);
10075         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10076         key.offset = 0;
10077         key.type = BTRFS_ROOT_ITEM_KEY;
10078
10079         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10080         if (ret) {
10081                 error("cannot find extent treet in tree_root");
10082                 goto out;
10083         }
10084
10085         while (1) {
10086                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10087                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10088                         goto next;
10089                 key.offset = (u64)-1;
10090
10091                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10092                 if (IS_ERR(cur_root) || !cur_root) {
10093                         error("failed to read tree: %lld", key.objectid);
10094                         goto next;
10095                 }
10096
10097                 ret = traverse_tree_block(cur_root, cur_root->node);
10098                 err |= ret;
10099
10100 next:
10101                 ret = btrfs_next_item(root1, &path);
10102                 if (ret)
10103                         goto out;
10104         }
10105
10106 out:
10107         btrfs_release_path(&path);
10108         return err;
10109 }
10110
10111 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10112                            struct btrfs_root *root, int overwrite)
10113 {
10114         struct extent_buffer *c;
10115         struct extent_buffer *old = root->node;
10116         int level;
10117         int ret;
10118         struct btrfs_disk_key disk_key = {0,0,0};
10119
10120         level = 0;
10121
10122         if (overwrite) {
10123                 c = old;
10124                 extent_buffer_get(c);
10125                 goto init;
10126         }
10127         c = btrfs_alloc_free_block(trans, root,
10128                                    root->nodesize,
10129                                    root->root_key.objectid,
10130                                    &disk_key, level, 0, 0);
10131         if (IS_ERR(c)) {
10132                 c = old;
10133                 extent_buffer_get(c);
10134                 overwrite = 1;
10135         }
10136 init:
10137         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10138         btrfs_set_header_level(c, level);
10139         btrfs_set_header_bytenr(c, c->start);
10140         btrfs_set_header_generation(c, trans->transid);
10141         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10142         btrfs_set_header_owner(c, root->root_key.objectid);
10143
10144         write_extent_buffer(c, root->fs_info->fsid,
10145                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
10146
10147         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10148                             btrfs_header_chunk_tree_uuid(c),
10149                             BTRFS_UUID_SIZE);
10150
10151         btrfs_mark_buffer_dirty(c);
10152         /*
10153          * this case can happen in the following case:
10154          *
10155          * 1.overwrite previous root.
10156          *
10157          * 2.reinit reloc data root, this is because we skip pin
10158          * down reloc data tree before which means we can allocate
10159          * same block bytenr here.
10160          */
10161         if (old->start == c->start) {
10162                 btrfs_set_root_generation(&root->root_item,
10163                                           trans->transid);
10164                 root->root_item.level = btrfs_header_level(root->node);
10165                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10166                                         &root->root_key, &root->root_item);
10167                 if (ret) {
10168                         free_extent_buffer(c);
10169                         return ret;
10170                 }
10171         }
10172         free_extent_buffer(old);
10173         root->node = c;
10174         add_root_to_dirty_list(root);
10175         return 0;
10176 }
10177
10178 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10179                                 struct extent_buffer *eb, int tree_root)
10180 {
10181         struct extent_buffer *tmp;
10182         struct btrfs_root_item *ri;
10183         struct btrfs_key key;
10184         u64 bytenr;
10185         u32 nodesize;
10186         int level = btrfs_header_level(eb);
10187         int nritems;
10188         int ret;
10189         int i;
10190
10191         /*
10192          * If we have pinned this block before, don't pin it again.
10193          * This can not only avoid forever loop with broken filesystem
10194          * but also give us some speedups.
10195          */
10196         if (test_range_bit(&fs_info->pinned_extents, eb->start,
10197                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10198                 return 0;
10199
10200         btrfs_pin_extent(fs_info, eb->start, eb->len);
10201
10202         nodesize = btrfs_super_nodesize(fs_info->super_copy);
10203         nritems = btrfs_header_nritems(eb);
10204         for (i = 0; i < nritems; i++) {
10205                 if (level == 0) {
10206                         btrfs_item_key_to_cpu(eb, &key, i);
10207                         if (key.type != BTRFS_ROOT_ITEM_KEY)
10208                                 continue;
10209                         /* Skip the extent root and reloc roots */
10210                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10211                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10212                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10213                                 continue;
10214                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10215                         bytenr = btrfs_disk_root_bytenr(eb, ri);
10216
10217                         /*
10218                          * If at any point we start needing the real root we
10219                          * will have to build a stump root for the root we are
10220                          * in, but for now this doesn't actually use the root so
10221                          * just pass in extent_root.
10222                          */
10223                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10224                                               nodesize, 0);
10225                         if (!extent_buffer_uptodate(tmp)) {
10226                                 fprintf(stderr, "Error reading root block\n");
10227                                 return -EIO;
10228                         }
10229                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
10230                         free_extent_buffer(tmp);
10231                         if (ret)
10232                                 return ret;
10233                 } else {
10234                         bytenr = btrfs_node_blockptr(eb, i);
10235
10236                         /* If we aren't the tree root don't read the block */
10237                         if (level == 1 && !tree_root) {
10238                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
10239                                 continue;
10240                         }
10241
10242                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10243                                               nodesize, 0);
10244                         if (!extent_buffer_uptodate(tmp)) {
10245                                 fprintf(stderr, "Error reading tree block\n");
10246                                 return -EIO;
10247                         }
10248                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10249                         free_extent_buffer(tmp);
10250                         if (ret)
10251                                 return ret;
10252                 }
10253         }
10254
10255         return 0;
10256 }
10257
10258 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10259 {
10260         int ret;
10261
10262         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10263         if (ret)
10264                 return ret;
10265
10266         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10267 }
10268
10269 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10270 {
10271         struct btrfs_block_group_cache *cache;
10272         struct btrfs_path *path;
10273         struct extent_buffer *leaf;
10274         struct btrfs_chunk *chunk;
10275         struct btrfs_key key;
10276         int ret;
10277         u64 start;
10278
10279         path = btrfs_alloc_path();
10280         if (!path)
10281                 return -ENOMEM;
10282
10283         key.objectid = 0;
10284         key.type = BTRFS_CHUNK_ITEM_KEY;
10285         key.offset = 0;
10286
10287         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10288         if (ret < 0) {
10289                 btrfs_free_path(path);
10290                 return ret;
10291         }
10292
10293         /*
10294          * We do this in case the block groups were screwed up and had alloc
10295          * bits that aren't actually set on the chunks.  This happens with
10296          * restored images every time and could happen in real life I guess.
10297          */
10298         fs_info->avail_data_alloc_bits = 0;
10299         fs_info->avail_metadata_alloc_bits = 0;
10300         fs_info->avail_system_alloc_bits = 0;
10301
10302         /* First we need to create the in-memory block groups */
10303         while (1) {
10304                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10305                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
10306                         if (ret < 0) {
10307                                 btrfs_free_path(path);
10308                                 return ret;
10309                         }
10310                         if (ret) {
10311                                 ret = 0;
10312                                 break;
10313                         }
10314                 }
10315                 leaf = path->nodes[0];
10316                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10317                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10318                         path->slots[0]++;
10319                         continue;
10320                 }
10321
10322                 chunk = btrfs_item_ptr(leaf, path->slots[0],
10323                                        struct btrfs_chunk);
10324                 btrfs_add_block_group(fs_info, 0,
10325                                       btrfs_chunk_type(leaf, chunk),
10326                                       key.objectid, key.offset,
10327                                       btrfs_chunk_length(leaf, chunk));
10328                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10329                                  key.offset + btrfs_chunk_length(leaf, chunk),
10330                                  GFP_NOFS);
10331                 path->slots[0]++;
10332         }
10333         start = 0;
10334         while (1) {
10335                 cache = btrfs_lookup_first_block_group(fs_info, start);
10336                 if (!cache)
10337                         break;
10338                 cache->cached = 1;
10339                 start = cache->key.objectid + cache->key.offset;
10340         }
10341
10342         btrfs_free_path(path);
10343         return 0;
10344 }
10345
10346 static int reset_balance(struct btrfs_trans_handle *trans,
10347                          struct btrfs_fs_info *fs_info)
10348 {
10349         struct btrfs_root *root = fs_info->tree_root;
10350         struct btrfs_path *path;
10351         struct extent_buffer *leaf;
10352         struct btrfs_key key;
10353         int del_slot, del_nr = 0;
10354         int ret;
10355         int found = 0;
10356
10357         path = btrfs_alloc_path();
10358         if (!path)
10359                 return -ENOMEM;
10360
10361         key.objectid = BTRFS_BALANCE_OBJECTID;
10362         key.type = BTRFS_BALANCE_ITEM_KEY;
10363         key.offset = 0;
10364
10365         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10366         if (ret) {
10367                 if (ret > 0)
10368                         ret = 0;
10369                 if (!ret)
10370                         goto reinit_data_reloc;
10371                 else
10372                         goto out;
10373         }
10374
10375         ret = btrfs_del_item(trans, root, path);
10376         if (ret)
10377                 goto out;
10378         btrfs_release_path(path);
10379
10380         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10381         key.type = BTRFS_ROOT_ITEM_KEY;
10382         key.offset = 0;
10383
10384         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10385         if (ret < 0)
10386                 goto out;
10387         while (1) {
10388                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10389                         if (!found)
10390                                 break;
10391
10392                         if (del_nr) {
10393                                 ret = btrfs_del_items(trans, root, path,
10394                                                       del_slot, del_nr);
10395                                 del_nr = 0;
10396                                 if (ret)
10397                                         goto out;
10398                         }
10399                         key.offset++;
10400                         btrfs_release_path(path);
10401
10402                         found = 0;
10403                         ret = btrfs_search_slot(trans, root, &key, path,
10404                                                 -1, 1);
10405                         if (ret < 0)
10406                                 goto out;
10407                         continue;
10408                 }
10409                 found = 1;
10410                 leaf = path->nodes[0];
10411                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10412                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10413                         break;
10414                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10415                         path->slots[0]++;
10416                         continue;
10417                 }
10418                 if (!del_nr) {
10419                         del_slot = path->slots[0];
10420                         del_nr = 1;
10421                 } else {
10422                         del_nr++;
10423                 }
10424                 path->slots[0]++;
10425         }
10426
10427         if (del_nr) {
10428                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10429                 if (ret)
10430                         goto out;
10431         }
10432         btrfs_release_path(path);
10433
10434 reinit_data_reloc:
10435         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10436         key.type = BTRFS_ROOT_ITEM_KEY;
10437         key.offset = (u64)-1;
10438         root = btrfs_read_fs_root(fs_info, &key);
10439         if (IS_ERR(root)) {
10440                 fprintf(stderr, "Error reading data reloc tree\n");
10441                 ret = PTR_ERR(root);
10442                 goto out;
10443         }
10444         record_root_in_trans(trans, root);
10445         ret = btrfs_fsck_reinit_root(trans, root, 0);
10446         if (ret)
10447                 goto out;
10448         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10449 out:
10450         btrfs_free_path(path);
10451         return ret;
10452 }
10453
10454 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10455                               struct btrfs_fs_info *fs_info)
10456 {
10457         u64 start = 0;
10458         int ret;
10459
10460         /*
10461          * The only reason we don't do this is because right now we're just
10462          * walking the trees we find and pinning down their bytes, we don't look
10463          * at any of the leaves.  In order to do mixed groups we'd have to check
10464          * the leaves of any fs roots and pin down the bytes for any file
10465          * extents we find.  Not hard but why do it if we don't have to?
10466          */
10467         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10468                 fprintf(stderr, "We don't support re-initing the extent tree "
10469                         "for mixed block groups yet, please notify a btrfs "
10470                         "developer you want to do this so they can add this "
10471                         "functionality.\n");
10472                 return -EINVAL;
10473         }
10474
10475         /*
10476          * first we need to walk all of the trees except the extent tree and pin
10477          * down the bytes that are in use so we don't overwrite any existing
10478          * metadata.
10479          */
10480         ret = pin_metadata_blocks(fs_info);
10481         if (ret) {
10482                 fprintf(stderr, "error pinning down used bytes\n");
10483                 return ret;
10484         }
10485
10486         /*
10487          * Need to drop all the block groups since we're going to recreate all
10488          * of them again.
10489          */
10490         btrfs_free_block_groups(fs_info);
10491         ret = reset_block_groups(fs_info);
10492         if (ret) {
10493                 fprintf(stderr, "error resetting the block groups\n");
10494                 return ret;
10495         }
10496
10497         /* Ok we can allocate now, reinit the extent root */
10498         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10499         if (ret) {
10500                 fprintf(stderr, "extent root initialization failed\n");
10501                 /*
10502                  * When the transaction code is updated we should end the
10503                  * transaction, but for now progs only knows about commit so
10504                  * just return an error.
10505                  */
10506                 return ret;
10507         }
10508
10509         /*
10510          * Now we have all the in-memory block groups setup so we can make
10511          * allocations properly, and the metadata we care about is safe since we
10512          * pinned all of it above.
10513          */
10514         while (1) {
10515                 struct btrfs_block_group_cache *cache;
10516
10517                 cache = btrfs_lookup_first_block_group(fs_info, start);
10518                 if (!cache)
10519                         break;
10520                 start = cache->key.objectid + cache->key.offset;
10521                 ret = btrfs_insert_item(trans, fs_info->extent_root,
10522                                         &cache->key, &cache->item,
10523                                         sizeof(cache->item));
10524                 if (ret) {
10525                         fprintf(stderr, "Error adding block group\n");
10526                         return ret;
10527                 }
10528                 btrfs_extent_post_op(trans, fs_info->extent_root);
10529         }
10530
10531         ret = reset_balance(trans, fs_info);
10532         if (ret)
10533                 fprintf(stderr, "error resetting the pending balance\n");
10534
10535         return ret;
10536 }
10537
10538 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10539 {
10540         struct btrfs_path *path;
10541         struct btrfs_trans_handle *trans;
10542         struct btrfs_key key;
10543         int ret;
10544
10545         printf("Recowing metadata block %llu\n", eb->start);
10546         key.objectid = btrfs_header_owner(eb);
10547         key.type = BTRFS_ROOT_ITEM_KEY;
10548         key.offset = (u64)-1;
10549
10550         root = btrfs_read_fs_root(root->fs_info, &key);
10551         if (IS_ERR(root)) {
10552                 fprintf(stderr, "Couldn't find owner root %llu\n",
10553                         key.objectid);
10554                 return PTR_ERR(root);
10555         }
10556
10557         path = btrfs_alloc_path();
10558         if (!path)
10559                 return -ENOMEM;
10560
10561         trans = btrfs_start_transaction(root, 1);
10562         if (IS_ERR(trans)) {
10563                 btrfs_free_path(path);
10564                 return PTR_ERR(trans);
10565         }
10566
10567         path->lowest_level = btrfs_header_level(eb);
10568         if (path->lowest_level)
10569                 btrfs_node_key_to_cpu(eb, &key, 0);
10570         else
10571                 btrfs_item_key_to_cpu(eb, &key, 0);
10572
10573         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10574         btrfs_commit_transaction(trans, root);
10575         btrfs_free_path(path);
10576         return ret;
10577 }
10578
10579 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10580 {
10581         struct btrfs_path *path;
10582         struct btrfs_trans_handle *trans;
10583         struct btrfs_key key;
10584         int ret;
10585
10586         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10587                bad->key.type, bad->key.offset);
10588         key.objectid = bad->root_id;
10589         key.type = BTRFS_ROOT_ITEM_KEY;
10590         key.offset = (u64)-1;
10591
10592         root = btrfs_read_fs_root(root->fs_info, &key);
10593         if (IS_ERR(root)) {
10594                 fprintf(stderr, "Couldn't find owner root %llu\n",
10595                         key.objectid);
10596                 return PTR_ERR(root);
10597         }
10598
10599         path = btrfs_alloc_path();
10600         if (!path)
10601                 return -ENOMEM;
10602
10603         trans = btrfs_start_transaction(root, 1);
10604         if (IS_ERR(trans)) {
10605                 btrfs_free_path(path);
10606                 return PTR_ERR(trans);
10607         }
10608
10609         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10610         if (ret) {
10611                 if (ret > 0)
10612                         ret = 0;
10613                 goto out;
10614         }
10615         ret = btrfs_del_item(trans, root, path);
10616 out:
10617         btrfs_commit_transaction(trans, root);
10618         btrfs_free_path(path);
10619         return ret;
10620 }
10621
10622 static int zero_log_tree(struct btrfs_root *root)
10623 {
10624         struct btrfs_trans_handle *trans;
10625         int ret;
10626
10627         trans = btrfs_start_transaction(root, 1);
10628         if (IS_ERR(trans)) {
10629                 ret = PTR_ERR(trans);
10630                 return ret;
10631         }
10632         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10633         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10634         ret = btrfs_commit_transaction(trans, root);
10635         return ret;
10636 }
10637
10638 static int populate_csum(struct btrfs_trans_handle *trans,
10639                          struct btrfs_root *csum_root, char *buf, u64 start,
10640                          u64 len)
10641 {
10642         u64 offset = 0;
10643         u64 sectorsize;
10644         int ret = 0;
10645
10646         while (offset < len) {
10647                 sectorsize = csum_root->sectorsize;
10648                 ret = read_extent_data(csum_root, buf, start + offset,
10649                                        &sectorsize, 0);
10650                 if (ret)
10651                         break;
10652                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10653                                             start + offset, buf, sectorsize);
10654                 if (ret)
10655                         break;
10656                 offset += sectorsize;
10657         }
10658         return ret;
10659 }
10660
10661 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10662                                       struct btrfs_root *csum_root,
10663                                       struct btrfs_root *cur_root)
10664 {
10665         struct btrfs_path *path;
10666         struct btrfs_key key;
10667         struct extent_buffer *node;
10668         struct btrfs_file_extent_item *fi;
10669         char *buf = NULL;
10670         u64 start = 0;
10671         u64 len = 0;
10672         int slot = 0;
10673         int ret = 0;
10674
10675         path = btrfs_alloc_path();
10676         if (!path)
10677                 return -ENOMEM;
10678         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10679         if (!buf) {
10680                 ret = -ENOMEM;
10681                 goto out;
10682         }
10683
10684         key.objectid = 0;
10685         key.offset = 0;
10686         key.type = 0;
10687
10688         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10689         if (ret < 0)
10690                 goto out;
10691         /* Iterate all regular file extents and fill its csum */
10692         while (1) {
10693                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10694
10695                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10696                         goto next;
10697                 node = path->nodes[0];
10698                 slot = path->slots[0];
10699                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10700                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10701                         goto next;
10702                 start = btrfs_file_extent_disk_bytenr(node, fi);
10703                 len = btrfs_file_extent_disk_num_bytes(node, fi);
10704
10705                 ret = populate_csum(trans, csum_root, buf, start, len);
10706                 if (ret == -EEXIST)
10707                         ret = 0;
10708                 if (ret < 0)
10709                         goto out;
10710 next:
10711                 /*
10712                  * TODO: if next leaf is corrupted, jump to nearest next valid
10713                  * leaf.
10714                  */
10715                 ret = btrfs_next_item(cur_root, path);
10716                 if (ret < 0)
10717                         goto out;
10718                 if (ret > 0) {
10719                         ret = 0;
10720                         goto out;
10721                 }
10722         }
10723
10724 out:
10725         btrfs_free_path(path);
10726         free(buf);
10727         return ret;
10728 }
10729
10730 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10731                                   struct btrfs_root *csum_root)
10732 {
10733         struct btrfs_fs_info *fs_info = csum_root->fs_info;
10734         struct btrfs_path *path;
10735         struct btrfs_root *tree_root = fs_info->tree_root;
10736         struct btrfs_root *cur_root;
10737         struct extent_buffer *node;
10738         struct btrfs_key key;
10739         int slot = 0;
10740         int ret = 0;
10741
10742         path = btrfs_alloc_path();
10743         if (!path)
10744                 return -ENOMEM;
10745
10746         key.objectid = BTRFS_FS_TREE_OBJECTID;
10747         key.offset = 0;
10748         key.type = BTRFS_ROOT_ITEM_KEY;
10749
10750         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10751         if (ret < 0)
10752                 goto out;
10753         if (ret > 0) {
10754                 ret = -ENOENT;
10755                 goto out;
10756         }
10757
10758         while (1) {
10759                 node = path->nodes[0];
10760                 slot = path->slots[0];
10761                 btrfs_item_key_to_cpu(node, &key, slot);
10762                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10763                         goto out;
10764                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10765                         goto next;
10766                 if (!is_fstree(key.objectid))
10767                         goto next;
10768                 key.offset = (u64)-1;
10769
10770                 cur_root = btrfs_read_fs_root(fs_info, &key);
10771                 if (IS_ERR(cur_root) || !cur_root) {
10772                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10773                                 key.objectid);
10774                         goto out;
10775                 }
10776                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10777                                 cur_root);
10778                 if (ret < 0)
10779                         goto out;
10780 next:
10781                 ret = btrfs_next_item(tree_root, path);
10782                 if (ret > 0) {
10783                         ret = 0;
10784                         goto out;
10785                 }
10786                 if (ret < 0)
10787                         goto out;
10788         }
10789
10790 out:
10791         btrfs_free_path(path);
10792         return ret;
10793 }
10794
10795 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10796                                       struct btrfs_root *csum_root)
10797 {
10798         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10799         struct btrfs_path *path;
10800         struct btrfs_extent_item *ei;
10801         struct extent_buffer *leaf;
10802         char *buf;
10803         struct btrfs_key key;
10804         int ret;
10805
10806         path = btrfs_alloc_path();
10807         if (!path)
10808                 return -ENOMEM;
10809
10810         key.objectid = 0;
10811         key.type = BTRFS_EXTENT_ITEM_KEY;
10812         key.offset = 0;
10813
10814         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10815         if (ret < 0) {
10816                 btrfs_free_path(path);
10817                 return ret;
10818         }
10819
10820         buf = malloc(csum_root->sectorsize);
10821         if (!buf) {
10822                 btrfs_free_path(path);
10823                 return -ENOMEM;
10824         }
10825
10826         while (1) {
10827                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10828                         ret = btrfs_next_leaf(extent_root, path);
10829                         if (ret < 0)
10830                                 break;
10831                         if (ret) {
10832                                 ret = 0;
10833                                 break;
10834                         }
10835                 }
10836                 leaf = path->nodes[0];
10837
10838                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10839                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10840                         path->slots[0]++;
10841                         continue;
10842                 }
10843
10844                 ei = btrfs_item_ptr(leaf, path->slots[0],
10845                                     struct btrfs_extent_item);
10846                 if (!(btrfs_extent_flags(leaf, ei) &
10847                       BTRFS_EXTENT_FLAG_DATA)) {
10848                         path->slots[0]++;
10849                         continue;
10850                 }
10851
10852                 ret = populate_csum(trans, csum_root, buf, key.objectid,
10853                                     key.offset);
10854                 if (ret)
10855                         break;
10856                 path->slots[0]++;
10857         }
10858
10859         btrfs_free_path(path);
10860         free(buf);
10861         return ret;
10862 }
10863
10864 /*
10865  * Recalculate the csum and put it into the csum tree.
10866  *
10867  * Extent tree init will wipe out all the extent info, so in that case, we
10868  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
10869  * will use fs/subvol trees to init the csum tree.
10870  */
10871 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10872                           struct btrfs_root *csum_root,
10873                           int search_fs_tree)
10874 {
10875         if (search_fs_tree)
10876                 return fill_csum_tree_from_fs(trans, csum_root);
10877         else
10878                 return fill_csum_tree_from_extent(trans, csum_root);
10879 }
10880
10881 static void free_roots_info_cache(void)
10882 {
10883         if (!roots_info_cache)
10884                 return;
10885
10886         while (!cache_tree_empty(roots_info_cache)) {
10887                 struct cache_extent *entry;
10888                 struct root_item_info *rii;
10889
10890                 entry = first_cache_extent(roots_info_cache);
10891                 if (!entry)
10892                         break;
10893                 remove_cache_extent(roots_info_cache, entry);
10894                 rii = container_of(entry, struct root_item_info, cache_extent);
10895                 free(rii);
10896         }
10897
10898         free(roots_info_cache);
10899         roots_info_cache = NULL;
10900 }
10901
10902 static int build_roots_info_cache(struct btrfs_fs_info *info)
10903 {
10904         int ret = 0;
10905         struct btrfs_key key;
10906         struct extent_buffer *leaf;
10907         struct btrfs_path *path;
10908
10909         if (!roots_info_cache) {
10910                 roots_info_cache = malloc(sizeof(*roots_info_cache));
10911                 if (!roots_info_cache)
10912                         return -ENOMEM;
10913                 cache_tree_init(roots_info_cache);
10914         }
10915
10916         path = btrfs_alloc_path();
10917         if (!path)
10918                 return -ENOMEM;
10919
10920         key.objectid = 0;
10921         key.type = BTRFS_EXTENT_ITEM_KEY;
10922         key.offset = 0;
10923
10924         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10925         if (ret < 0)
10926                 goto out;
10927         leaf = path->nodes[0];
10928
10929         while (1) {
10930                 struct btrfs_key found_key;
10931                 struct btrfs_extent_item *ei;
10932                 struct btrfs_extent_inline_ref *iref;
10933                 int slot = path->slots[0];
10934                 int type;
10935                 u64 flags;
10936                 u64 root_id;
10937                 u8 level;
10938                 struct cache_extent *entry;
10939                 struct root_item_info *rii;
10940
10941                 if (slot >= btrfs_header_nritems(leaf)) {
10942                         ret = btrfs_next_leaf(info->extent_root, path);
10943                         if (ret < 0) {
10944                                 break;
10945                         } else if (ret) {
10946                                 ret = 0;
10947                                 break;
10948                         }
10949                         leaf = path->nodes[0];
10950                         slot = path->slots[0];
10951                 }
10952
10953                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10954
10955                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10956                     found_key.type != BTRFS_METADATA_ITEM_KEY)
10957                         goto next;
10958
10959                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10960                 flags = btrfs_extent_flags(leaf, ei);
10961
10962                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10963                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10964                         goto next;
10965
10966                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10967                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10968                         level = found_key.offset;
10969                 } else {
10970                         struct btrfs_tree_block_info *binfo;
10971
10972                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
10973                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10974                         level = btrfs_tree_block_level(leaf, binfo);
10975                 }
10976
10977                 /*
10978                  * For a root extent, it must be of the following type and the
10979                  * first (and only one) iref in the item.
10980                  */
10981                 type = btrfs_extent_inline_ref_type(leaf, iref);
10982                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10983                         goto next;
10984
10985                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10986                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10987                 if (!entry) {
10988                         rii = malloc(sizeof(struct root_item_info));
10989                         if (!rii) {
10990                                 ret = -ENOMEM;
10991                                 goto out;
10992                         }
10993                         rii->cache_extent.start = root_id;
10994                         rii->cache_extent.size = 1;
10995                         rii->level = (u8)-1;
10996                         entry = &rii->cache_extent;
10997                         ret = insert_cache_extent(roots_info_cache, entry);
10998                         ASSERT(ret == 0);
10999                 } else {
11000                         rii = container_of(entry, struct root_item_info,
11001                                            cache_extent);
11002                 }
11003
11004                 ASSERT(rii->cache_extent.start == root_id);
11005                 ASSERT(rii->cache_extent.size == 1);
11006
11007                 if (level > rii->level || rii->level == (u8)-1) {
11008                         rii->level = level;
11009                         rii->bytenr = found_key.objectid;
11010                         rii->gen = btrfs_extent_generation(leaf, ei);
11011                         rii->node_count = 1;
11012                 } else if (level == rii->level) {
11013                         rii->node_count++;
11014                 }
11015 next:
11016                 path->slots[0]++;
11017         }
11018
11019 out:
11020         btrfs_free_path(path);
11021
11022         return ret;
11023 }
11024
11025 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11026                                   struct btrfs_path *path,
11027                                   const struct btrfs_key *root_key,
11028                                   const int read_only_mode)
11029 {
11030         const u64 root_id = root_key->objectid;
11031         struct cache_extent *entry;
11032         struct root_item_info *rii;
11033         struct btrfs_root_item ri;
11034         unsigned long offset;
11035
11036         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11037         if (!entry) {
11038                 fprintf(stderr,
11039                         "Error: could not find extent items for root %llu\n",
11040                         root_key->objectid);
11041                 return -ENOENT;
11042         }
11043
11044         rii = container_of(entry, struct root_item_info, cache_extent);
11045         ASSERT(rii->cache_extent.start == root_id);
11046         ASSERT(rii->cache_extent.size == 1);
11047
11048         if (rii->node_count != 1) {
11049                 fprintf(stderr,
11050                         "Error: could not find btree root extent for root %llu\n",
11051                         root_id);
11052                 return -ENOENT;
11053         }
11054
11055         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11056         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11057
11058         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11059             btrfs_root_level(&ri) != rii->level ||
11060             btrfs_root_generation(&ri) != rii->gen) {
11061
11062                 /*
11063                  * If we're in repair mode but our caller told us to not update
11064                  * the root item, i.e. just check if it needs to be updated, don't
11065                  * print this message, since the caller will call us again shortly
11066                  * for the same root item without read only mode (the caller will
11067                  * open a transaction first).
11068                  */
11069                 if (!(read_only_mode && repair))
11070                         fprintf(stderr,
11071                                 "%sroot item for root %llu,"
11072                                 " current bytenr %llu, current gen %llu, current level %u,"
11073                                 " new bytenr %llu, new gen %llu, new level %u\n",
11074                                 (read_only_mode ? "" : "fixing "),
11075                                 root_id,
11076                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11077                                 btrfs_root_level(&ri),
11078                                 rii->bytenr, rii->gen, rii->level);
11079
11080                 if (btrfs_root_generation(&ri) > rii->gen) {
11081                         fprintf(stderr,
11082                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11083                                 root_id, btrfs_root_generation(&ri), rii->gen);
11084                         return -EINVAL;
11085                 }
11086
11087                 if (!read_only_mode) {
11088                         btrfs_set_root_bytenr(&ri, rii->bytenr);
11089                         btrfs_set_root_level(&ri, rii->level);
11090                         btrfs_set_root_generation(&ri, rii->gen);
11091                         write_extent_buffer(path->nodes[0], &ri,
11092                                             offset, sizeof(ri));
11093                 }
11094
11095                 return 1;
11096         }
11097
11098         return 0;
11099 }
11100
11101 /*
11102  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11103  * caused read-only snapshots to be corrupted if they were created at a moment
11104  * when the source subvolume/snapshot had orphan items. The issue was that the
11105  * on-disk root items became incorrect, referring to the pre orphan cleanup root
11106  * node instead of the post orphan cleanup root node.
11107  * So this function, and its callees, just detects and fixes those cases. Even
11108  * though the regression was for read-only snapshots, this function applies to
11109  * any snapshot/subvolume root.
11110  * This must be run before any other repair code - not doing it so, makes other
11111  * repair code delete or modify backrefs in the extent tree for example, which
11112  * will result in an inconsistent fs after repairing the root items.
11113  */
11114 static int repair_root_items(struct btrfs_fs_info *info)
11115 {
11116         struct btrfs_path *path = NULL;
11117         struct btrfs_key key;
11118         struct extent_buffer *leaf;
11119         struct btrfs_trans_handle *trans = NULL;
11120         int ret = 0;
11121         int bad_roots = 0;
11122         int need_trans = 0;
11123
11124         ret = build_roots_info_cache(info);
11125         if (ret)
11126                 goto out;
11127
11128         path = btrfs_alloc_path();
11129         if (!path) {
11130                 ret = -ENOMEM;
11131                 goto out;
11132         }
11133
11134         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11135         key.type = BTRFS_ROOT_ITEM_KEY;
11136         key.offset = 0;
11137
11138 again:
11139         /*
11140          * Avoid opening and committing transactions if a leaf doesn't have
11141          * any root items that need to be fixed, so that we avoid rotating
11142          * backup roots unnecessarily.
11143          */
11144         if (need_trans) {
11145                 trans = btrfs_start_transaction(info->tree_root, 1);
11146                 if (IS_ERR(trans)) {
11147                         ret = PTR_ERR(trans);
11148                         goto out;
11149                 }
11150         }
11151
11152         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11153                                 0, trans ? 1 : 0);
11154         if (ret < 0)
11155                 goto out;
11156         leaf = path->nodes[0];
11157
11158         while (1) {
11159                 struct btrfs_key found_key;
11160
11161                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11162                         int no_more_keys = find_next_key(path, &key);
11163
11164                         btrfs_release_path(path);
11165                         if (trans) {
11166                                 ret = btrfs_commit_transaction(trans,
11167                                                                info->tree_root);
11168                                 trans = NULL;
11169                                 if (ret < 0)
11170                                         goto out;
11171                         }
11172                         need_trans = 0;
11173                         if (no_more_keys)
11174                                 break;
11175                         goto again;
11176                 }
11177
11178                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11179
11180                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11181                         goto next;
11182                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11183                         goto next;
11184
11185                 ret = maybe_repair_root_item(info, path, &found_key,
11186                                              trans ? 0 : 1);
11187                 if (ret < 0)
11188                         goto out;
11189                 if (ret) {
11190                         if (!trans && repair) {
11191                                 need_trans = 1;
11192                                 key = found_key;
11193                                 btrfs_release_path(path);
11194                                 goto again;
11195                         }
11196                         bad_roots++;
11197                 }
11198 next:
11199                 path->slots[0]++;
11200         }
11201         ret = 0;
11202 out:
11203         free_roots_info_cache();
11204         btrfs_free_path(path);
11205         if (trans)
11206                 btrfs_commit_transaction(trans, info->tree_root);
11207         if (ret < 0)
11208                 return ret;
11209
11210         return bad_roots;
11211 }
11212
11213 const char * const cmd_check_usage[] = {
11214         "btrfs check [options] <device>",
11215         "Check structural integrity of a filesystem (unmounted).",
11216         "Check structural integrity of an unmounted filesystem. Verify internal",
11217         "trees' consistency and item connectivity. In the repair mode try to",
11218         "fix the problems found. ",
11219         "WARNING: the repair mode is considered dangerous",
11220         "",
11221         "-s|--super <superblock>     use this superblock copy",
11222         "-b|--backup                 use the first valid backup root copy",
11223         "--repair                    try to repair the filesystem",
11224         "--readonly                  run in read-only mode (default)",
11225         "--init-csum-tree            create a new CRC tree",
11226         "--init-extent-tree          create a new extent tree",
11227         "--mode <MODE>               select mode, allows to make some memory/IO",
11228         "                            trade-offs, where MODE is one of:",
11229         "                            original - read inodes and extents to memory (requires",
11230         "                                       more memory, does less IO)",
11231         "                            lowmem   - try to use less memory but read blocks again",
11232         "                                       when needed",
11233         "--check-data-csum           verify checksums of data blocks",
11234         "-Q|--qgroup-report           print a report on qgroup consistency",
11235         "-E|--subvol-extents <subvolid>",
11236         "                            print subvolume extents and sharing state",
11237         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
11238         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
11239         "-p|--progress               indicate progress",
11240         NULL
11241 };
11242
11243 int cmd_check(int argc, char **argv)
11244 {
11245         struct cache_tree root_cache;
11246         struct btrfs_root *root;
11247         struct btrfs_fs_info *info;
11248         u64 bytenr = 0;
11249         u64 subvolid = 0;
11250         u64 tree_root_bytenr = 0;
11251         u64 chunk_root_bytenr = 0;
11252         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11253         int ret;
11254         u64 num;
11255         int init_csum_tree = 0;
11256         int readonly = 0;
11257         int qgroup_report = 0;
11258         int qgroups_repaired = 0;
11259         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11260
11261         while(1) {
11262                 int c;
11263                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11264                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11265                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11266                         GETOPT_VAL_MODE };
11267                 static const struct option long_options[] = {
11268                         { "super", required_argument, NULL, 's' },
11269                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11270                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11271                         { "init-csum-tree", no_argument, NULL,
11272                                 GETOPT_VAL_INIT_CSUM },
11273                         { "init-extent-tree", no_argument, NULL,
11274                                 GETOPT_VAL_INIT_EXTENT },
11275                         { "check-data-csum", no_argument, NULL,
11276                                 GETOPT_VAL_CHECK_CSUM },
11277                         { "backup", no_argument, NULL, 'b' },
11278                         { "subvol-extents", required_argument, NULL, 'E' },
11279                         { "qgroup-report", no_argument, NULL, 'Q' },
11280                         { "tree-root", required_argument, NULL, 'r' },
11281                         { "chunk-root", required_argument, NULL,
11282                                 GETOPT_VAL_CHUNK_TREE },
11283                         { "progress", no_argument, NULL, 'p' },
11284                         { "mode", required_argument, NULL,
11285                                 GETOPT_VAL_MODE },
11286                         { NULL, 0, NULL, 0}
11287                 };
11288
11289                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11290                 if (c < 0)
11291                         break;
11292                 switch(c) {
11293                         case 'a': /* ignored */ break;
11294                         case 'b':
11295                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11296                                 break;
11297                         case 's':
11298                                 num = arg_strtou64(optarg);
11299                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11300                                         fprintf(stderr,
11301                                                 "ERROR: super mirror should be less than: %d\n",
11302                                                 BTRFS_SUPER_MIRROR_MAX);
11303                                         exit(1);
11304                                 }
11305                                 bytenr = btrfs_sb_offset(((int)num));
11306                                 printf("using SB copy %llu, bytenr %llu\n", num,
11307                                        (unsigned long long)bytenr);
11308                                 break;
11309                         case 'Q':
11310                                 qgroup_report = 1;
11311                                 break;
11312                         case 'E':
11313                                 subvolid = arg_strtou64(optarg);
11314                                 break;
11315                         case 'r':
11316                                 tree_root_bytenr = arg_strtou64(optarg);
11317                                 break;
11318                         case GETOPT_VAL_CHUNK_TREE:
11319                                 chunk_root_bytenr = arg_strtou64(optarg);
11320                                 break;
11321                         case 'p':
11322                                 ctx.progress_enabled = true;
11323                                 break;
11324                         case '?':
11325                         case 'h':
11326                                 usage(cmd_check_usage);
11327                         case GETOPT_VAL_REPAIR:
11328                                 printf("enabling repair mode\n");
11329                                 repair = 1;
11330                                 ctree_flags |= OPEN_CTREE_WRITES;
11331                                 break;
11332                         case GETOPT_VAL_READONLY:
11333                                 readonly = 1;
11334                                 break;
11335                         case GETOPT_VAL_INIT_CSUM:
11336                                 printf("Creating a new CRC tree\n");
11337                                 init_csum_tree = 1;
11338                                 repair = 1;
11339                                 ctree_flags |= OPEN_CTREE_WRITES;
11340                                 break;
11341                         case GETOPT_VAL_INIT_EXTENT:
11342                                 init_extent_tree = 1;
11343                                 ctree_flags |= (OPEN_CTREE_WRITES |
11344                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
11345                                 repair = 1;
11346                                 break;
11347                         case GETOPT_VAL_CHECK_CSUM:
11348                                 check_data_csum = 1;
11349                                 break;
11350                         case GETOPT_VAL_MODE:
11351                                 check_mode = parse_check_mode(optarg);
11352                                 if (check_mode == CHECK_MODE_UNKNOWN) {
11353                                         error("unknown mode: %s", optarg);
11354                                         exit(1);
11355                                 }
11356                                 break;
11357                 }
11358         }
11359
11360         if (check_argc_exact(argc - optind, 1))
11361                 usage(cmd_check_usage);
11362
11363         if (ctx.progress_enabled) {
11364                 ctx.tp = TASK_NOTHING;
11365                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11366         }
11367
11368         /* This check is the only reason for --readonly to exist */
11369         if (readonly && repair) {
11370                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
11371                 exit(1);
11372         }
11373
11374         /*
11375          * Not supported yet
11376          */
11377         if (repair && check_mode == CHECK_MODE_LOWMEM) {
11378                 error("Low memory mode doesn't support repair yet");
11379                 exit(1);
11380         }
11381
11382         radix_tree_init();
11383         cache_tree_init(&root_cache);
11384
11385         if((ret = check_mounted(argv[optind])) < 0) {
11386                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
11387                 goto err_out;
11388         } else if(ret) {
11389                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
11390                 ret = -EBUSY;
11391                 goto err_out;
11392         }
11393
11394         /* only allow partial opening under repair mode */
11395         if (repair)
11396                 ctree_flags |= OPEN_CTREE_PARTIAL;
11397
11398         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11399                                   chunk_root_bytenr, ctree_flags);
11400         if (!info) {
11401                 fprintf(stderr, "Couldn't open file system\n");
11402                 ret = -EIO;
11403                 goto err_out;
11404         }
11405
11406         global_info = info;
11407         root = info->fs_root;
11408
11409         /*
11410          * repair mode will force us to commit transaction which
11411          * will make us fail to load log tree when mounting.
11412          */
11413         if (repair && btrfs_super_log_root(info->super_copy)) {
11414                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
11415                 if (!ret) {
11416                         ret = 1;
11417                         goto close_out;
11418                 }
11419                 ret = zero_log_tree(root);
11420                 if (ret) {
11421                         fprintf(stderr, "fail to zero log tree\n");
11422                         goto close_out;
11423                 }
11424         }
11425
11426         uuid_unparse(info->super_copy->fsid, uuidbuf);
11427         if (qgroup_report) {
11428                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11429                        uuidbuf);
11430                 ret = qgroup_verify_all(info);
11431                 if (ret == 0)
11432                         report_qgroups(1);
11433                 goto close_out;
11434         }
11435         if (subvolid) {
11436                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11437                        subvolid, argv[optind], uuidbuf);
11438                 ret = print_extent_state(info, subvolid);
11439                 goto close_out;
11440         }
11441         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11442
11443         if (!extent_buffer_uptodate(info->tree_root->node) ||
11444             !extent_buffer_uptodate(info->dev_root->node) ||
11445             !extent_buffer_uptodate(info->chunk_root->node)) {
11446                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11447                 ret = -EIO;
11448                 goto close_out;
11449         }
11450
11451         if (init_extent_tree || init_csum_tree) {
11452                 struct btrfs_trans_handle *trans;
11453
11454                 trans = btrfs_start_transaction(info->extent_root, 0);
11455                 if (IS_ERR(trans)) {
11456                         fprintf(stderr, "Error starting transaction\n");
11457                         ret = PTR_ERR(trans);
11458                         goto close_out;
11459                 }
11460
11461                 if (init_extent_tree) {
11462                         printf("Creating a new extent tree\n");
11463                         ret = reinit_extent_tree(trans, info);
11464                         if (ret)
11465                                 goto close_out;
11466                 }
11467
11468                 if (init_csum_tree) {
11469                         fprintf(stderr, "Reinit crc root\n");
11470                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11471                         if (ret) {
11472                                 fprintf(stderr, "crc root initialization failed\n");
11473                                 ret = -EIO;
11474                                 goto close_out;
11475                         }
11476
11477                         ret = fill_csum_tree(trans, info->csum_root,
11478                                              init_extent_tree);
11479                         if (ret) {
11480                                 fprintf(stderr, "crc refilling failed\n");
11481                                 return -EIO;
11482                         }
11483                 }
11484                 /*
11485                  * Ok now we commit and run the normal fsck, which will add
11486                  * extent entries for all of the items it finds.
11487                  */
11488                 ret = btrfs_commit_transaction(trans, info->extent_root);
11489                 if (ret)
11490                         goto close_out;
11491         }
11492         if (!extent_buffer_uptodate(info->extent_root->node)) {
11493                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11494                 ret = -EIO;
11495                 goto close_out;
11496         }
11497         if (!extent_buffer_uptodate(info->csum_root->node)) {
11498                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
11499                 ret = -EIO;
11500                 goto close_out;
11501         }
11502
11503         if (!ctx.progress_enabled)
11504                 fprintf(stderr, "checking extents\n");
11505         if (check_mode == CHECK_MODE_LOWMEM)
11506                 ret = check_chunks_and_extents_v2(root);
11507         else
11508                 ret = check_chunks_and_extents(root);
11509         if (ret)
11510                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
11511
11512         ret = repair_root_items(info);
11513         if (ret < 0)
11514                 goto close_out;
11515         if (repair) {
11516                 fprintf(stderr, "Fixed %d roots.\n", ret);
11517                 ret = 0;
11518         } else if (ret > 0) {
11519                 fprintf(stderr,
11520                        "Found %d roots with an outdated root item.\n",
11521                        ret);
11522                 fprintf(stderr,
11523                         "Please run a filesystem check with the option --repair to fix them.\n");
11524                 ret = 1;
11525                 goto close_out;
11526         }
11527
11528         if (!ctx.progress_enabled) {
11529                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11530                         fprintf(stderr, "checking free space tree\n");
11531                 else
11532                         fprintf(stderr, "checking free space cache\n");
11533         }
11534         ret = check_space_cache(root);
11535         if (ret)
11536                 goto out;
11537
11538         /*
11539          * We used to have to have these hole extents in between our real
11540          * extents so if we don't have this flag set we need to make sure there
11541          * are no gaps in the file extents for inodes, otherwise we can just
11542          * ignore it when this happens.
11543          */
11544         no_holes = btrfs_fs_incompat(root->fs_info,
11545                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11546         if (!ctx.progress_enabled)
11547                 fprintf(stderr, "checking fs roots\n");
11548         ret = check_fs_roots(root, &root_cache);
11549         if (ret)
11550                 goto out;
11551
11552         fprintf(stderr, "checking csums\n");
11553         ret = check_csums(root);
11554         if (ret)
11555                 goto out;
11556
11557         fprintf(stderr, "checking root refs\n");
11558         ret = check_root_refs(root, &root_cache);
11559         if (ret)
11560                 goto out;
11561
11562         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11563                 struct extent_buffer *eb;
11564
11565                 eb = list_first_entry(&root->fs_info->recow_ebs,
11566                                       struct extent_buffer, recow);
11567                 list_del_init(&eb->recow);
11568                 ret = recow_extent_buffer(root, eb);
11569                 if (ret)
11570                         break;
11571         }
11572
11573         while (!list_empty(&delete_items)) {
11574                 struct bad_item *bad;
11575
11576                 bad = list_first_entry(&delete_items, struct bad_item, list);
11577                 list_del_init(&bad->list);
11578                 if (repair)
11579                         ret = delete_bad_item(root, bad);
11580                 free(bad);
11581         }
11582
11583         if (info->quota_enabled) {
11584                 int err;
11585                 fprintf(stderr, "checking quota groups\n");
11586                 err = qgroup_verify_all(info);
11587                 if (err)
11588                         goto out;
11589                 report_qgroups(0);
11590                 err = repair_qgroups(info, &qgroups_repaired);
11591                 if (err)
11592                         goto out;
11593         }
11594
11595         if (!list_empty(&root->fs_info->recow_ebs)) {
11596                 fprintf(stderr, "Transid errors in file system\n");
11597                 ret = 1;
11598         }
11599 out:
11600         /* Don't override original ret */
11601         if (!ret && qgroups_repaired)
11602                 ret = qgroups_repaired;
11603
11604         if (found_old_backref) { /*
11605                  * there was a disk format change when mixed
11606                  * backref was in testing tree. The old format
11607                  * existed about one week.
11608                  */
11609                 printf("\n * Found old mixed backref format. "
11610                        "The old format is not supported! *"
11611                        "\n * Please mount the FS in readonly mode, "
11612                        "backup data and re-format the FS. *\n\n");
11613                 ret = 1;
11614         }
11615         printf("found %llu bytes used err is %d\n",
11616                (unsigned long long)bytes_used, ret);
11617         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11618         printf("total tree bytes: %llu\n",
11619                (unsigned long long)total_btree_bytes);
11620         printf("total fs tree bytes: %llu\n",
11621                (unsigned long long)total_fs_tree_bytes);
11622         printf("total extent tree bytes: %llu\n",
11623                (unsigned long long)total_extent_tree_bytes);
11624         printf("btree space waste bytes: %llu\n",
11625                (unsigned long long)btree_space_waste);
11626         printf("file data blocks allocated: %llu\n referenced %llu\n",
11627                 (unsigned long long)data_bytes_allocated,
11628                 (unsigned long long)data_bytes_referenced);
11629
11630         free_qgroup_counts();
11631         free_root_recs_tree(&root_cache);
11632 close_out:
11633         close_ctree(root);
11634 err_out:
11635         if (ctx.progress_enabled)
11636                 task_deinit(ctx.info);
11637
11638         return ret;
11639 }