617b867f965679e02500ad0c821634acfa4d26bc
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
76
77 enum btrfs_check_mode {
78         CHECK_MODE_ORIGINAL,
79         CHECK_MODE_LOWMEM,
80         CHECK_MODE_UNKNOWN,
81         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
82 };
83
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
85
86 struct extent_backref {
87         struct rb_node node;
88         unsigned int is_data:1;
89         unsigned int found_extent_tree:1;
90         unsigned int full_backref:1;
91         unsigned int found_ref:1;
92         unsigned int broken:1;
93 };
94
95 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
96 {
97         return rb_entry(node, struct extent_backref, node);
98 }
99
100 struct data_backref {
101         struct extent_backref node;
102         union {
103                 u64 parent;
104                 u64 root;
105         };
106         u64 owner;
107         u64 offset;
108         u64 disk_bytenr;
109         u64 bytes;
110         u64 ram_bytes;
111         u32 num_refs;
112         u32 found_ref;
113 };
114
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
116 {
117         return container_of(back, struct data_backref, node);
118 }
119
120 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
121 {
122         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
123         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
124         struct data_backref *back1 = to_data_backref(ext1);
125         struct data_backref *back2 = to_data_backref(ext2);
126
127         WARN_ON(!ext1->is_data);
128         WARN_ON(!ext2->is_data);
129
130         /* parent and root are a union, so this covers both */
131         if (back1->parent > back2->parent)
132                 return 1;
133         if (back1->parent < back2->parent)
134                 return -1;
135
136         /* This is a full backref and the parents match. */
137         if (back1->node.full_backref)
138                 return 0;
139
140         if (back1->owner > back2->owner)
141                 return 1;
142         if (back1->owner < back2->owner)
143                 return -1;
144
145         if (back1->offset > back2->offset)
146                 return 1;
147         if (back1->offset < back2->offset)
148                 return -1;
149
150         if (back1->bytes > back2->bytes)
151                 return 1;
152         if (back1->bytes < back2->bytes)
153                 return -1;
154
155         if (back1->found_ref && back2->found_ref) {
156                 if (back1->disk_bytenr > back2->disk_bytenr)
157                         return 1;
158                 if (back1->disk_bytenr < back2->disk_bytenr)
159                         return -1;
160
161                 if (back1->found_ref > back2->found_ref)
162                         return 1;
163                 if (back1->found_ref < back2->found_ref)
164                         return -1;
165         }
166
167         return 0;
168 }
169
170 /*
171  * Much like data_backref, just removed the undetermined members
172  * and change it to use list_head.
173  * During extent scan, it is stored in root->orphan_data_extent.
174  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
175  */
176 struct orphan_data_extent {
177         struct list_head list;
178         u64 root;
179         u64 objectid;
180         u64 offset;
181         u64 disk_bytenr;
182         u64 disk_len;
183 };
184
185 struct tree_backref {
186         struct extent_backref node;
187         union {
188                 u64 parent;
189                 u64 root;
190         };
191 };
192
193 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
194 {
195         return container_of(back, struct tree_backref, node);
196 }
197
198 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
199 {
200         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
201         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
202         struct tree_backref *back1 = to_tree_backref(ext1);
203         struct tree_backref *back2 = to_tree_backref(ext2);
204
205         WARN_ON(ext1->is_data);
206         WARN_ON(ext2->is_data);
207
208         /* parent and root are a union, so this covers both */
209         if (back1->parent > back2->parent)
210                 return 1;
211         if (back1->parent < back2->parent)
212                 return -1;
213
214         return 0;
215 }
216
217 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
218 {
219         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
220         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
221
222         if (ext1->is_data > ext2->is_data)
223                 return 1;
224
225         if (ext1->is_data < ext2->is_data)
226                 return -1;
227
228         if (ext1->full_backref > ext2->full_backref)
229                 return 1;
230         if (ext1->full_backref < ext2->full_backref)
231                 return -1;
232
233         if (ext1->is_data)
234                 return compare_data_backref(node1, node2);
235         else
236                 return compare_tree_backref(node1, node2);
237 }
238
239 /* Explicit initialization for extent_record::flag_block_full_backref */
240 enum { FLAG_UNSET = 2 };
241
242 struct extent_record {
243         struct list_head backrefs;
244         struct list_head dups;
245         struct rb_root backref_tree;
246         struct list_head list;
247         struct cache_extent cache;
248         struct btrfs_disk_key parent_key;
249         u64 start;
250         u64 max_size;
251         u64 nr;
252         u64 refs;
253         u64 extent_item_refs;
254         u64 generation;
255         u64 parent_generation;
256         u64 info_objectid;
257         u32 num_duplicates;
258         u8 info_level;
259         unsigned int flag_block_full_backref:2;
260         unsigned int found_rec:1;
261         unsigned int content_checked:1;
262         unsigned int owner_ref_checked:1;
263         unsigned int is_root:1;
264         unsigned int metadata:1;
265         unsigned int bad_full_backref:1;
266         unsigned int crossing_stripes:1;
267         unsigned int wrong_chunk_type:1;
268 };
269
270 static inline struct extent_record* to_extent_record(struct list_head *entry)
271 {
272         return container_of(entry, struct extent_record, list);
273 }
274
275 struct inode_backref {
276         struct list_head list;
277         unsigned int found_dir_item:1;
278         unsigned int found_dir_index:1;
279         unsigned int found_inode_ref:1;
280         unsigned int filetype:8;
281         int errors;
282         unsigned int ref_type;
283         u64 dir;
284         u64 index;
285         u16 namelen;
286         char name[0];
287 };
288
289 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
290 {
291         return list_entry(entry, struct inode_backref, list);
292 }
293
294 struct root_item_record {
295         struct list_head list;
296         u64 objectid;
297         u64 bytenr;
298         u64 last_snapshot;
299         u8 level;
300         u8 drop_level;
301         int level_size;
302         struct btrfs_key drop_key;
303 };
304
305 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
306 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
307 #define REF_ERR_NO_INODE_REF            (1 << 2)
308 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
309 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
310 #define REF_ERR_DUP_INODE_REF           (1 << 5)
311 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
312 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
313 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
314 #define REF_ERR_NO_ROOT_REF             (1 << 9)
315 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
316 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
317 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
318
319 struct file_extent_hole {
320         struct rb_node node;
321         u64 start;
322         u64 len;
323 };
324
325 struct inode_record {
326         struct list_head backrefs;
327         unsigned int checked:1;
328         unsigned int merging:1;
329         unsigned int found_inode_item:1;
330         unsigned int found_dir_item:1;
331         unsigned int found_file_extent:1;
332         unsigned int found_csum_item:1;
333         unsigned int some_csum_missing:1;
334         unsigned int nodatasum:1;
335         int errors;
336
337         u64 ino;
338         u32 nlink;
339         u32 imode;
340         u64 isize;
341         u64 nbytes;
342
343         u32 found_link;
344         u64 found_size;
345         u64 extent_start;
346         u64 extent_end;
347         struct rb_root holes;
348         struct list_head orphan_extents;
349
350         u32 refs;
351 };
352
353 #define I_ERR_NO_INODE_ITEM             (1 << 0)
354 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
355 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
356 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
357 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
358 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
359 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
360 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
361 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
362 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
363 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
364 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
365 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
366 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
367 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
368
369 struct root_backref {
370         struct list_head list;
371         unsigned int found_dir_item:1;
372         unsigned int found_dir_index:1;
373         unsigned int found_back_ref:1;
374         unsigned int found_forward_ref:1;
375         unsigned int reachable:1;
376         int errors;
377         u64 ref_root;
378         u64 dir;
379         u64 index;
380         u16 namelen;
381         char name[0];
382 };
383
384 static inline struct root_backref* to_root_backref(struct list_head *entry)
385 {
386         return list_entry(entry, struct root_backref, list);
387 }
388
389 struct root_record {
390         struct list_head backrefs;
391         struct cache_extent cache;
392         unsigned int found_root_item:1;
393         u64 objectid;
394         u32 found_ref;
395 };
396
397 struct ptr_node {
398         struct cache_extent cache;
399         void *data;
400 };
401
402 struct shared_node {
403         struct cache_extent cache;
404         struct cache_tree root_cache;
405         struct cache_tree inode_cache;
406         struct inode_record *current;
407         u32 refs;
408 };
409
410 struct block_info {
411         u64 start;
412         u32 size;
413 };
414
415 struct walk_control {
416         struct cache_tree shared;
417         struct shared_node *nodes[BTRFS_MAX_LEVEL];
418         int active_node;
419         int root_level;
420 };
421
422 struct bad_item {
423         struct btrfs_key key;
424         u64 root_id;
425         struct list_head list;
426 };
427
428 struct extent_entry {
429         u64 bytenr;
430         u64 bytes;
431         int count;
432         int broken;
433         struct list_head list;
434 };
435
436 struct root_item_info {
437         /* level of the root */
438         u8 level;
439         /* number of nodes at this level, must be 1 for a root */
440         int node_count;
441         u64 bytenr;
442         u64 gen;
443         struct cache_extent cache_extent;
444 };
445
446 /*
447  * Error bit for low memory mode check.
448  *
449  * Currently no caller cares about it yet.  Just internal use for error
450  * classification.
451  */
452 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
453 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
454 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
455 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
456 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
457 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
458 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
459 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
460 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
461 #define CHUNK_TYPE_MISMATCH     (1 << 8)
462
463 static void *print_status_check(void *p)
464 {
465         struct task_ctx *priv = p;
466         const char work_indicator[] = { '.', 'o', 'O', 'o' };
467         uint32_t count = 0;
468         static char *task_position_string[] = {
469                 "checking extents",
470                 "checking free space cache",
471                 "checking fs roots",
472         };
473
474         task_period_start(priv->info, 1000 /* 1s */);
475
476         if (priv->tp == TASK_NOTHING)
477                 return NULL;
478
479         while (1) {
480                 printf("%s [%c]\r", task_position_string[priv->tp],
481                                 work_indicator[count % 4]);
482                 count++;
483                 fflush(stdout);
484                 task_period_wait(priv->info);
485         }
486         return NULL;
487 }
488
489 static int print_status_return(void *p)
490 {
491         printf("\n");
492         fflush(stdout);
493
494         return 0;
495 }
496
497 static enum btrfs_check_mode parse_check_mode(const char *str)
498 {
499         if (strcmp(str, "lowmem") == 0)
500                 return CHECK_MODE_LOWMEM;
501         if (strcmp(str, "orig") == 0)
502                 return CHECK_MODE_ORIGINAL;
503         if (strcmp(str, "original") == 0)
504                 return CHECK_MODE_ORIGINAL;
505
506         return CHECK_MODE_UNKNOWN;
507 }
508
509 /* Compatible function to allow reuse of old codes */
510 static u64 first_extent_gap(struct rb_root *holes)
511 {
512         struct file_extent_hole *hole;
513
514         if (RB_EMPTY_ROOT(holes))
515                 return (u64)-1;
516
517         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
518         return hole->start;
519 }
520
521 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
522 {
523         struct file_extent_hole *hole1;
524         struct file_extent_hole *hole2;
525
526         hole1 = rb_entry(node1, struct file_extent_hole, node);
527         hole2 = rb_entry(node2, struct file_extent_hole, node);
528
529         if (hole1->start > hole2->start)
530                 return -1;
531         if (hole1->start < hole2->start)
532                 return 1;
533         /* Now hole1->start == hole2->start */
534         if (hole1->len >= hole2->len)
535                 /*
536                  * Hole 1 will be merge center
537                  * Same hole will be merged later
538                  */
539                 return -1;
540         /* Hole 2 will be merge center */
541         return 1;
542 }
543
544 /*
545  * Add a hole to the record
546  *
547  * This will do hole merge for copy_file_extent_holes(),
548  * which will ensure there won't be continuous holes.
549  */
550 static int add_file_extent_hole(struct rb_root *holes,
551                                 u64 start, u64 len)
552 {
553         struct file_extent_hole *hole;
554         struct file_extent_hole *prev = NULL;
555         struct file_extent_hole *next = NULL;
556
557         hole = malloc(sizeof(*hole));
558         if (!hole)
559                 return -ENOMEM;
560         hole->start = start;
561         hole->len = len;
562         /* Since compare will not return 0, no -EEXIST will happen */
563         rb_insert(holes, &hole->node, compare_hole);
564
565         /* simple merge with previous hole */
566         if (rb_prev(&hole->node))
567                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
568                                 node);
569         if (prev && prev->start + prev->len >= hole->start) {
570                 hole->len = hole->start + hole->len - prev->start;
571                 hole->start = prev->start;
572                 rb_erase(&prev->node, holes);
573                 free(prev);
574                 prev = NULL;
575         }
576
577         /* iterate merge with next holes */
578         while (1) {
579                 if (!rb_next(&hole->node))
580                         break;
581                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
582                                         node);
583                 if (hole->start + hole->len >= next->start) {
584                         if (hole->start + hole->len <= next->start + next->len)
585                                 hole->len = next->start + next->len -
586                                             hole->start;
587                         rb_erase(&next->node, holes);
588                         free(next);
589                         next = NULL;
590                 } else
591                         break;
592         }
593         return 0;
594 }
595
596 static int compare_hole_range(struct rb_node *node, void *data)
597 {
598         struct file_extent_hole *hole;
599         u64 start;
600
601         hole = (struct file_extent_hole *)data;
602         start = hole->start;
603
604         hole = rb_entry(node, struct file_extent_hole, node);
605         if (start < hole->start)
606                 return -1;
607         if (start >= hole->start && start < hole->start + hole->len)
608                 return 0;
609         return 1;
610 }
611
612 /*
613  * Delete a hole in the record
614  *
615  * This will do the hole split and is much restrict than add.
616  */
617 static int del_file_extent_hole(struct rb_root *holes,
618                                 u64 start, u64 len)
619 {
620         struct file_extent_hole *hole;
621         struct file_extent_hole tmp;
622         u64 prev_start = 0;
623         u64 prev_len = 0;
624         u64 next_start = 0;
625         u64 next_len = 0;
626         struct rb_node *node;
627         int have_prev = 0;
628         int have_next = 0;
629         int ret = 0;
630
631         tmp.start = start;
632         tmp.len = len;
633         node = rb_search(holes, &tmp, compare_hole_range, NULL);
634         if (!node)
635                 return -EEXIST;
636         hole = rb_entry(node, struct file_extent_hole, node);
637         if (start + len > hole->start + hole->len)
638                 return -EEXIST;
639
640         /*
641          * Now there will be no overlap, delete the hole and re-add the
642          * split(s) if they exists.
643          */
644         if (start > hole->start) {
645                 prev_start = hole->start;
646                 prev_len = start - hole->start;
647                 have_prev = 1;
648         }
649         if (hole->start + hole->len > start + len) {
650                 next_start = start + len;
651                 next_len = hole->start + hole->len - start - len;
652                 have_next = 1;
653         }
654         rb_erase(node, holes);
655         free(hole);
656         if (have_prev) {
657                 ret = add_file_extent_hole(holes, prev_start, prev_len);
658                 if (ret < 0)
659                         return ret;
660         }
661         if (have_next) {
662                 ret = add_file_extent_hole(holes, next_start, next_len);
663                 if (ret < 0)
664                         return ret;
665         }
666         return 0;
667 }
668
669 static int copy_file_extent_holes(struct rb_root *dst,
670                                   struct rb_root *src)
671 {
672         struct file_extent_hole *hole;
673         struct rb_node *node;
674         int ret = 0;
675
676         node = rb_first(src);
677         while (node) {
678                 hole = rb_entry(node, struct file_extent_hole, node);
679                 ret = add_file_extent_hole(dst, hole->start, hole->len);
680                 if (ret)
681                         break;
682                 node = rb_next(node);
683         }
684         return ret;
685 }
686
687 static void free_file_extent_holes(struct rb_root *holes)
688 {
689         struct rb_node *node;
690         struct file_extent_hole *hole;
691
692         node = rb_first(holes);
693         while (node) {
694                 hole = rb_entry(node, struct file_extent_hole, node);
695                 rb_erase(node, holes);
696                 free(hole);
697                 node = rb_first(holes);
698         }
699 }
700
701 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
702
703 static void record_root_in_trans(struct btrfs_trans_handle *trans,
704                                  struct btrfs_root *root)
705 {
706         if (root->last_trans != trans->transid) {
707                 root->track_dirty = 1;
708                 root->last_trans = trans->transid;
709                 root->commit_root = root->node;
710                 extent_buffer_get(root->node);
711         }
712 }
713
714 static u8 imode_to_type(u32 imode)
715 {
716 #define S_SHIFT 12
717         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
718                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
719                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
720                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
721                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
722                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
723                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
724                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
725         };
726
727         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
728 #undef S_SHIFT
729 }
730
731 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
732 {
733         struct device_record *rec1;
734         struct device_record *rec2;
735
736         rec1 = rb_entry(node1, struct device_record, node);
737         rec2 = rb_entry(node2, struct device_record, node);
738         if (rec1->devid > rec2->devid)
739                 return -1;
740         else if (rec1->devid < rec2->devid)
741                 return 1;
742         else
743                 return 0;
744 }
745
746 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
747 {
748         struct inode_record *rec;
749         struct inode_backref *backref;
750         struct inode_backref *orig;
751         struct inode_backref *tmp;
752         struct orphan_data_extent *src_orphan;
753         struct orphan_data_extent *dst_orphan;
754         size_t size;
755         int ret;
756
757         rec = malloc(sizeof(*rec));
758         if (!rec)
759                 return ERR_PTR(-ENOMEM);
760         memcpy(rec, orig_rec, sizeof(*rec));
761         rec->refs = 1;
762         INIT_LIST_HEAD(&rec->backrefs);
763         INIT_LIST_HEAD(&rec->orphan_extents);
764         rec->holes = RB_ROOT;
765
766         list_for_each_entry(orig, &orig_rec->backrefs, list) {
767                 size = sizeof(*orig) + orig->namelen + 1;
768                 backref = malloc(size);
769                 if (!backref) {
770                         ret = -ENOMEM;
771                         goto cleanup;
772                 }
773                 memcpy(backref, orig, size);
774                 list_add_tail(&backref->list, &rec->backrefs);
775         }
776         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
777                 dst_orphan = malloc(sizeof(*dst_orphan));
778                 if (!dst_orphan) {
779                         ret = -ENOMEM;
780                         goto cleanup;
781                 }
782                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
783                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
784         }
785         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
786         BUG_ON(ret < 0);
787
788         return rec;
789
790 cleanup:
791         if (!list_empty(&rec->backrefs))
792                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
793                         list_del(&orig->list);
794                         free(orig);
795                 }
796
797         if (!list_empty(&rec->orphan_extents))
798                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
799                         list_del(&orig->list);
800                         free(orig);
801                 }
802
803         free(rec);
804
805         return ERR_PTR(ret);
806 }
807
808 static void print_orphan_data_extents(struct list_head *orphan_extents,
809                                       u64 objectid)
810 {
811         struct orphan_data_extent *orphan;
812
813         if (list_empty(orphan_extents))
814                 return;
815         printf("The following data extent is lost in tree %llu:\n",
816                objectid);
817         list_for_each_entry(orphan, orphan_extents, list) {
818                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
819                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
820                        orphan->disk_len);
821         }
822 }
823
824 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
825 {
826         u64 root_objectid = root->root_key.objectid;
827         int errors = rec->errors;
828
829         if (!errors)
830                 return;
831         /* reloc root errors, we print its corresponding fs root objectid*/
832         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
833                 root_objectid = root->root_key.offset;
834                 fprintf(stderr, "reloc");
835         }
836         fprintf(stderr, "root %llu inode %llu errors %x",
837                 (unsigned long long) root_objectid,
838                 (unsigned long long) rec->ino, rec->errors);
839
840         if (errors & I_ERR_NO_INODE_ITEM)
841                 fprintf(stderr, ", no inode item");
842         if (errors & I_ERR_NO_ORPHAN_ITEM)
843                 fprintf(stderr, ", no orphan item");
844         if (errors & I_ERR_DUP_INODE_ITEM)
845                 fprintf(stderr, ", dup inode item");
846         if (errors & I_ERR_DUP_DIR_INDEX)
847                 fprintf(stderr, ", dup dir index");
848         if (errors & I_ERR_ODD_DIR_ITEM)
849                 fprintf(stderr, ", odd dir item");
850         if (errors & I_ERR_ODD_FILE_EXTENT)
851                 fprintf(stderr, ", odd file extent");
852         if (errors & I_ERR_BAD_FILE_EXTENT)
853                 fprintf(stderr, ", bad file extent");
854         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
855                 fprintf(stderr, ", file extent overlap");
856         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
857                 fprintf(stderr, ", file extent discount");
858         if (errors & I_ERR_DIR_ISIZE_WRONG)
859                 fprintf(stderr, ", dir isize wrong");
860         if (errors & I_ERR_FILE_NBYTES_WRONG)
861                 fprintf(stderr, ", nbytes wrong");
862         if (errors & I_ERR_ODD_CSUM_ITEM)
863                 fprintf(stderr, ", odd csum item");
864         if (errors & I_ERR_SOME_CSUM_MISSING)
865                 fprintf(stderr, ", some csum missing");
866         if (errors & I_ERR_LINK_COUNT_WRONG)
867                 fprintf(stderr, ", link count wrong");
868         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
869                 fprintf(stderr, ", orphan file extent");
870         fprintf(stderr, "\n");
871         /* Print the orphan extents if needed */
872         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
873                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
874
875         /* Print the holes if needed */
876         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
877                 struct file_extent_hole *hole;
878                 struct rb_node *node;
879                 int found = 0;
880
881                 node = rb_first(&rec->holes);
882                 fprintf(stderr, "Found file extent holes:\n");
883                 while (node) {
884                         found = 1;
885                         hole = rb_entry(node, struct file_extent_hole, node);
886                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
887                                 hole->start, hole->len);
888                         node = rb_next(node);
889                 }
890                 if (!found)
891                         fprintf(stderr, "\tstart: 0, len: %llu\n",
892                                 round_up(rec->isize, root->sectorsize));
893         }
894 }
895
896 static void print_ref_error(int errors)
897 {
898         if (errors & REF_ERR_NO_DIR_ITEM)
899                 fprintf(stderr, ", no dir item");
900         if (errors & REF_ERR_NO_DIR_INDEX)
901                 fprintf(stderr, ", no dir index");
902         if (errors & REF_ERR_NO_INODE_REF)
903                 fprintf(stderr, ", no inode ref");
904         if (errors & REF_ERR_DUP_DIR_ITEM)
905                 fprintf(stderr, ", dup dir item");
906         if (errors & REF_ERR_DUP_DIR_INDEX)
907                 fprintf(stderr, ", dup dir index");
908         if (errors & REF_ERR_DUP_INODE_REF)
909                 fprintf(stderr, ", dup inode ref");
910         if (errors & REF_ERR_INDEX_UNMATCH)
911                 fprintf(stderr, ", index mismatch");
912         if (errors & REF_ERR_FILETYPE_UNMATCH)
913                 fprintf(stderr, ", filetype mismatch");
914         if (errors & REF_ERR_NAME_TOO_LONG)
915                 fprintf(stderr, ", name too long");
916         if (errors & REF_ERR_NO_ROOT_REF)
917                 fprintf(stderr, ", no root ref");
918         if (errors & REF_ERR_NO_ROOT_BACKREF)
919                 fprintf(stderr, ", no root backref");
920         if (errors & REF_ERR_DUP_ROOT_REF)
921                 fprintf(stderr, ", dup root ref");
922         if (errors & REF_ERR_DUP_ROOT_BACKREF)
923                 fprintf(stderr, ", dup root backref");
924         fprintf(stderr, "\n");
925 }
926
927 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
928                                           u64 ino, int mod)
929 {
930         struct ptr_node *node;
931         struct cache_extent *cache;
932         struct inode_record *rec = NULL;
933         int ret;
934
935         cache = lookup_cache_extent(inode_cache, ino, 1);
936         if (cache) {
937                 node = container_of(cache, struct ptr_node, cache);
938                 rec = node->data;
939                 if (mod && rec->refs > 1) {
940                         node->data = clone_inode_rec(rec);
941                         if (IS_ERR(node->data))
942                                 return node->data;
943                         rec->refs--;
944                         rec = node->data;
945                 }
946         } else if (mod) {
947                 rec = calloc(1, sizeof(*rec));
948                 if (!rec)
949                         return ERR_PTR(-ENOMEM);
950                 rec->ino = ino;
951                 rec->extent_start = (u64)-1;
952                 rec->refs = 1;
953                 INIT_LIST_HEAD(&rec->backrefs);
954                 INIT_LIST_HEAD(&rec->orphan_extents);
955                 rec->holes = RB_ROOT;
956
957                 node = malloc(sizeof(*node));
958                 if (!node) {
959                         free(rec);
960                         return ERR_PTR(-ENOMEM);
961                 }
962                 node->cache.start = ino;
963                 node->cache.size = 1;
964                 node->data = rec;
965
966                 if (ino == BTRFS_FREE_INO_OBJECTID)
967                         rec->found_link = 1;
968
969                 ret = insert_cache_extent(inode_cache, &node->cache);
970                 if (ret)
971                         return ERR_PTR(-EEXIST);
972         }
973         return rec;
974 }
975
976 static void free_orphan_data_extents(struct list_head *orphan_extents)
977 {
978         struct orphan_data_extent *orphan;
979
980         while (!list_empty(orphan_extents)) {
981                 orphan = list_entry(orphan_extents->next,
982                                     struct orphan_data_extent, list);
983                 list_del(&orphan->list);
984                 free(orphan);
985         }
986 }
987
988 static void free_inode_rec(struct inode_record *rec)
989 {
990         struct inode_backref *backref;
991
992         if (--rec->refs > 0)
993                 return;
994
995         while (!list_empty(&rec->backrefs)) {
996                 backref = to_inode_backref(rec->backrefs.next);
997                 list_del(&backref->list);
998                 free(backref);
999         }
1000         free_orphan_data_extents(&rec->orphan_extents);
1001         free_file_extent_holes(&rec->holes);
1002         free(rec);
1003 }
1004
1005 static int can_free_inode_rec(struct inode_record *rec)
1006 {
1007         if (!rec->errors && rec->checked && rec->found_inode_item &&
1008             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1009                 return 1;
1010         return 0;
1011 }
1012
1013 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1014                                  struct inode_record *rec)
1015 {
1016         struct cache_extent *cache;
1017         struct inode_backref *tmp, *backref;
1018         struct ptr_node *node;
1019         unsigned char filetype;
1020
1021         if (!rec->found_inode_item)
1022                 return;
1023
1024         filetype = imode_to_type(rec->imode);
1025         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1026                 if (backref->found_dir_item && backref->found_dir_index) {
1027                         if (backref->filetype != filetype)
1028                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1029                         if (!backref->errors && backref->found_inode_ref &&
1030                             rec->nlink == rec->found_link) {
1031                                 list_del(&backref->list);
1032                                 free(backref);
1033                         }
1034                 }
1035         }
1036
1037         if (!rec->checked || rec->merging)
1038                 return;
1039
1040         if (S_ISDIR(rec->imode)) {
1041                 if (rec->found_size != rec->isize)
1042                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1043                 if (rec->found_file_extent)
1044                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1045         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1046                 if (rec->found_dir_item)
1047                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1048                 if (rec->found_size != rec->nbytes)
1049                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1050                 if (rec->nlink > 0 && !no_holes &&
1051                     (rec->extent_end < rec->isize ||
1052                      first_extent_gap(&rec->holes) < rec->isize))
1053                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1054         }
1055
1056         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1057                 if (rec->found_csum_item && rec->nodatasum)
1058                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1059                 if (rec->some_csum_missing && !rec->nodatasum)
1060                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1061         }
1062
1063         BUG_ON(rec->refs != 1);
1064         if (can_free_inode_rec(rec)) {
1065                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1066                 node = container_of(cache, struct ptr_node, cache);
1067                 BUG_ON(node->data != rec);
1068                 remove_cache_extent(inode_cache, &node->cache);
1069                 free(node);
1070                 free_inode_rec(rec);
1071         }
1072 }
1073
1074 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1075 {
1076         struct btrfs_path path;
1077         struct btrfs_key key;
1078         int ret;
1079
1080         key.objectid = BTRFS_ORPHAN_OBJECTID;
1081         key.type = BTRFS_ORPHAN_ITEM_KEY;
1082         key.offset = ino;
1083
1084         btrfs_init_path(&path);
1085         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1086         btrfs_release_path(&path);
1087         if (ret > 0)
1088                 ret = -ENOENT;
1089         return ret;
1090 }
1091
1092 static int process_inode_item(struct extent_buffer *eb,
1093                               int slot, struct btrfs_key *key,
1094                               struct shared_node *active_node)
1095 {
1096         struct inode_record *rec;
1097         struct btrfs_inode_item *item;
1098
1099         rec = active_node->current;
1100         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1101         if (rec->found_inode_item) {
1102                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1103                 return 1;
1104         }
1105         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1106         rec->nlink = btrfs_inode_nlink(eb, item);
1107         rec->isize = btrfs_inode_size(eb, item);
1108         rec->nbytes = btrfs_inode_nbytes(eb, item);
1109         rec->imode = btrfs_inode_mode(eb, item);
1110         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1111                 rec->nodatasum = 1;
1112         rec->found_inode_item = 1;
1113         if (rec->nlink == 0)
1114                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1115         maybe_free_inode_rec(&active_node->inode_cache, rec);
1116         return 0;
1117 }
1118
1119 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1120                                                 const char *name,
1121                                                 int namelen, u64 dir)
1122 {
1123         struct inode_backref *backref;
1124
1125         list_for_each_entry(backref, &rec->backrefs, list) {
1126                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1127                         break;
1128                 if (backref->dir != dir || backref->namelen != namelen)
1129                         continue;
1130                 if (memcmp(name, backref->name, namelen))
1131                         continue;
1132                 return backref;
1133         }
1134
1135         backref = malloc(sizeof(*backref) + namelen + 1);
1136         if (!backref)
1137                 return NULL;
1138         memset(backref, 0, sizeof(*backref));
1139         backref->dir = dir;
1140         backref->namelen = namelen;
1141         memcpy(backref->name, name, namelen);
1142         backref->name[namelen] = '\0';
1143         list_add_tail(&backref->list, &rec->backrefs);
1144         return backref;
1145 }
1146
1147 static int add_inode_backref(struct cache_tree *inode_cache,
1148                              u64 ino, u64 dir, u64 index,
1149                              const char *name, int namelen,
1150                              int filetype, int itemtype, int errors)
1151 {
1152         struct inode_record *rec;
1153         struct inode_backref *backref;
1154
1155         rec = get_inode_rec(inode_cache, ino, 1);
1156         BUG_ON(IS_ERR(rec));
1157         backref = get_inode_backref(rec, name, namelen, dir);
1158         BUG_ON(!backref);
1159         if (errors)
1160                 backref->errors |= errors;
1161         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1162                 if (backref->found_dir_index)
1163                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1164                 if (backref->found_inode_ref && backref->index != index)
1165                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1166                 if (backref->found_dir_item && backref->filetype != filetype)
1167                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1168
1169                 backref->index = index;
1170                 backref->filetype = filetype;
1171                 backref->found_dir_index = 1;
1172         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1173                 rec->found_link++;
1174                 if (backref->found_dir_item)
1175                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1176                 if (backref->found_dir_index && backref->filetype != filetype)
1177                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1178
1179                 backref->filetype = filetype;
1180                 backref->found_dir_item = 1;
1181         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1182                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1183                 if (backref->found_inode_ref)
1184                         backref->errors |= REF_ERR_DUP_INODE_REF;
1185                 if (backref->found_dir_index && backref->index != index)
1186                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1187                 else
1188                         backref->index = index;
1189
1190                 backref->ref_type = itemtype;
1191                 backref->found_inode_ref = 1;
1192         } else {
1193                 BUG_ON(1);
1194         }
1195
1196         maybe_free_inode_rec(inode_cache, rec);
1197         return 0;
1198 }
1199
1200 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1201                             struct cache_tree *dst_cache)
1202 {
1203         struct inode_backref *backref;
1204         u32 dir_count = 0;
1205         int ret = 0;
1206
1207         dst->merging = 1;
1208         list_for_each_entry(backref, &src->backrefs, list) {
1209                 if (backref->found_dir_index) {
1210                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1211                                         backref->index, backref->name,
1212                                         backref->namelen, backref->filetype,
1213                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1214                 }
1215                 if (backref->found_dir_item) {
1216                         dir_count++;
1217                         add_inode_backref(dst_cache, dst->ino,
1218                                         backref->dir, 0, backref->name,
1219                                         backref->namelen, backref->filetype,
1220                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1221                 }
1222                 if (backref->found_inode_ref) {
1223                         add_inode_backref(dst_cache, dst->ino,
1224                                         backref->dir, backref->index,
1225                                         backref->name, backref->namelen, 0,
1226                                         backref->ref_type, backref->errors);
1227                 }
1228         }
1229
1230         if (src->found_dir_item)
1231                 dst->found_dir_item = 1;
1232         if (src->found_file_extent)
1233                 dst->found_file_extent = 1;
1234         if (src->found_csum_item)
1235                 dst->found_csum_item = 1;
1236         if (src->some_csum_missing)
1237                 dst->some_csum_missing = 1;
1238         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1239                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1240                 if (ret < 0)
1241                         return ret;
1242         }
1243
1244         BUG_ON(src->found_link < dir_count);
1245         dst->found_link += src->found_link - dir_count;
1246         dst->found_size += src->found_size;
1247         if (src->extent_start != (u64)-1) {
1248                 if (dst->extent_start == (u64)-1) {
1249                         dst->extent_start = src->extent_start;
1250                         dst->extent_end = src->extent_end;
1251                 } else {
1252                         if (dst->extent_end > src->extent_start)
1253                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1254                         else if (dst->extent_end < src->extent_start) {
1255                                 ret = add_file_extent_hole(&dst->holes,
1256                                         dst->extent_end,
1257                                         src->extent_start - dst->extent_end);
1258                         }
1259                         if (dst->extent_end < src->extent_end)
1260                                 dst->extent_end = src->extent_end;
1261                 }
1262         }
1263
1264         dst->errors |= src->errors;
1265         if (src->found_inode_item) {
1266                 if (!dst->found_inode_item) {
1267                         dst->nlink = src->nlink;
1268                         dst->isize = src->isize;
1269                         dst->nbytes = src->nbytes;
1270                         dst->imode = src->imode;
1271                         dst->nodatasum = src->nodatasum;
1272                         dst->found_inode_item = 1;
1273                 } else {
1274                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1275                 }
1276         }
1277         dst->merging = 0;
1278
1279         return 0;
1280 }
1281
1282 static int splice_shared_node(struct shared_node *src_node,
1283                               struct shared_node *dst_node)
1284 {
1285         struct cache_extent *cache;
1286         struct ptr_node *node, *ins;
1287         struct cache_tree *src, *dst;
1288         struct inode_record *rec, *conflict;
1289         u64 current_ino = 0;
1290         int splice = 0;
1291         int ret;
1292
1293         if (--src_node->refs == 0)
1294                 splice = 1;
1295         if (src_node->current)
1296                 current_ino = src_node->current->ino;
1297
1298         src = &src_node->root_cache;
1299         dst = &dst_node->root_cache;
1300 again:
1301         cache = search_cache_extent(src, 0);
1302         while (cache) {
1303                 node = container_of(cache, struct ptr_node, cache);
1304                 rec = node->data;
1305                 cache = next_cache_extent(cache);
1306
1307                 if (splice) {
1308                         remove_cache_extent(src, &node->cache);
1309                         ins = node;
1310                 } else {
1311                         ins = malloc(sizeof(*ins));
1312                         BUG_ON(!ins);
1313                         ins->cache.start = node->cache.start;
1314                         ins->cache.size = node->cache.size;
1315                         ins->data = rec;
1316                         rec->refs++;
1317                 }
1318                 ret = insert_cache_extent(dst, &ins->cache);
1319                 if (ret == -EEXIST) {
1320                         conflict = get_inode_rec(dst, rec->ino, 1);
1321                         BUG_ON(IS_ERR(conflict));
1322                         merge_inode_recs(rec, conflict, dst);
1323                         if (rec->checked) {
1324                                 conflict->checked = 1;
1325                                 if (dst_node->current == conflict)
1326                                         dst_node->current = NULL;
1327                         }
1328                         maybe_free_inode_rec(dst, conflict);
1329                         free_inode_rec(rec);
1330                         free(ins);
1331                 } else {
1332                         BUG_ON(ret);
1333                 }
1334         }
1335
1336         if (src == &src_node->root_cache) {
1337                 src = &src_node->inode_cache;
1338                 dst = &dst_node->inode_cache;
1339                 goto again;
1340         }
1341
1342         if (current_ino > 0 && (!dst_node->current ||
1343             current_ino > dst_node->current->ino)) {
1344                 if (dst_node->current) {
1345                         dst_node->current->checked = 1;
1346                         maybe_free_inode_rec(dst, dst_node->current);
1347                 }
1348                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1349                 BUG_ON(IS_ERR(dst_node->current));
1350         }
1351         return 0;
1352 }
1353
1354 static void free_inode_ptr(struct cache_extent *cache)
1355 {
1356         struct ptr_node *node;
1357         struct inode_record *rec;
1358
1359         node = container_of(cache, struct ptr_node, cache);
1360         rec = node->data;
1361         free_inode_rec(rec);
1362         free(node);
1363 }
1364
1365 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1366
1367 static struct shared_node *find_shared_node(struct cache_tree *shared,
1368                                             u64 bytenr)
1369 {
1370         struct cache_extent *cache;
1371         struct shared_node *node;
1372
1373         cache = lookup_cache_extent(shared, bytenr, 1);
1374         if (cache) {
1375                 node = container_of(cache, struct shared_node, cache);
1376                 return node;
1377         }
1378         return NULL;
1379 }
1380
1381 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1382 {
1383         int ret;
1384         struct shared_node *node;
1385
1386         node = calloc(1, sizeof(*node));
1387         if (!node)
1388                 return -ENOMEM;
1389         node->cache.start = bytenr;
1390         node->cache.size = 1;
1391         cache_tree_init(&node->root_cache);
1392         cache_tree_init(&node->inode_cache);
1393         node->refs = refs;
1394
1395         ret = insert_cache_extent(shared, &node->cache);
1396
1397         return ret;
1398 }
1399
1400 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1401                              struct walk_control *wc, int level)
1402 {
1403         struct shared_node *node;
1404         struct shared_node *dest;
1405         int ret;
1406
1407         if (level == wc->active_node)
1408                 return 0;
1409
1410         BUG_ON(wc->active_node <= level);
1411         node = find_shared_node(&wc->shared, bytenr);
1412         if (!node) {
1413                 ret = add_shared_node(&wc->shared, bytenr, refs);
1414                 BUG_ON(ret);
1415                 node = find_shared_node(&wc->shared, bytenr);
1416                 wc->nodes[level] = node;
1417                 wc->active_node = level;
1418                 return 0;
1419         }
1420
1421         if (wc->root_level == wc->active_node &&
1422             btrfs_root_refs(&root->root_item) == 0) {
1423                 if (--node->refs == 0) {
1424                         free_inode_recs_tree(&node->root_cache);
1425                         free_inode_recs_tree(&node->inode_cache);
1426                         remove_cache_extent(&wc->shared, &node->cache);
1427                         free(node);
1428                 }
1429                 return 1;
1430         }
1431
1432         dest = wc->nodes[wc->active_node];
1433         splice_shared_node(node, dest);
1434         if (node->refs == 0) {
1435                 remove_cache_extent(&wc->shared, &node->cache);
1436                 free(node);
1437         }
1438         return 1;
1439 }
1440
1441 static int leave_shared_node(struct btrfs_root *root,
1442                              struct walk_control *wc, int level)
1443 {
1444         struct shared_node *node;
1445         struct shared_node *dest;
1446         int i;
1447
1448         if (level == wc->root_level)
1449                 return 0;
1450
1451         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1452                 if (wc->nodes[i])
1453                         break;
1454         }
1455         BUG_ON(i >= BTRFS_MAX_LEVEL);
1456
1457         node = wc->nodes[wc->active_node];
1458         wc->nodes[wc->active_node] = NULL;
1459         wc->active_node = i;
1460
1461         dest = wc->nodes[wc->active_node];
1462         if (wc->active_node < wc->root_level ||
1463             btrfs_root_refs(&root->root_item) > 0) {
1464                 BUG_ON(node->refs <= 1);
1465                 splice_shared_node(node, dest);
1466         } else {
1467                 BUG_ON(node->refs < 2);
1468                 node->refs--;
1469         }
1470         return 0;
1471 }
1472
1473 /*
1474  * Returns:
1475  * < 0 - on error
1476  * 1   - if the root with id child_root_id is a child of root parent_root_id
1477  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1478  *       has other root(s) as parent(s)
1479  * 2   - if the root child_root_id doesn't have any parent roots
1480  */
1481 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1482                          u64 child_root_id)
1483 {
1484         struct btrfs_path path;
1485         struct btrfs_key key;
1486         struct extent_buffer *leaf;
1487         int has_parent = 0;
1488         int ret;
1489
1490         btrfs_init_path(&path);
1491
1492         key.objectid = parent_root_id;
1493         key.type = BTRFS_ROOT_REF_KEY;
1494         key.offset = child_root_id;
1495         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1496                                 0, 0);
1497         if (ret < 0)
1498                 return ret;
1499         btrfs_release_path(&path);
1500         if (!ret)
1501                 return 1;
1502
1503         key.objectid = child_root_id;
1504         key.type = BTRFS_ROOT_BACKREF_KEY;
1505         key.offset = 0;
1506         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1507                                 0, 0);
1508         if (ret < 0)
1509                 goto out;
1510
1511         while (1) {
1512                 leaf = path.nodes[0];
1513                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1514                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1515                         if (ret)
1516                                 break;
1517                         leaf = path.nodes[0];
1518                 }
1519
1520                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1521                 if (key.objectid != child_root_id ||
1522                     key.type != BTRFS_ROOT_BACKREF_KEY)
1523                         break;
1524
1525                 has_parent = 1;
1526
1527                 if (key.offset == parent_root_id) {
1528                         btrfs_release_path(&path);
1529                         return 1;
1530                 }
1531
1532                 path.slots[0]++;
1533         }
1534 out:
1535         btrfs_release_path(&path);
1536         if (ret < 0)
1537                 return ret;
1538         return has_parent ? 0 : 2;
1539 }
1540
1541 static int process_dir_item(struct btrfs_root *root,
1542                             struct extent_buffer *eb,
1543                             int slot, struct btrfs_key *key,
1544                             struct shared_node *active_node)
1545 {
1546         u32 total;
1547         u32 cur = 0;
1548         u32 len;
1549         u32 name_len;
1550         u32 data_len;
1551         int error;
1552         int nritems = 0;
1553         int filetype;
1554         struct btrfs_dir_item *di;
1555         struct inode_record *rec;
1556         struct cache_tree *root_cache;
1557         struct cache_tree *inode_cache;
1558         struct btrfs_key location;
1559         char namebuf[BTRFS_NAME_LEN];
1560
1561         root_cache = &active_node->root_cache;
1562         inode_cache = &active_node->inode_cache;
1563         rec = active_node->current;
1564         rec->found_dir_item = 1;
1565
1566         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1567         total = btrfs_item_size_nr(eb, slot);
1568         while (cur < total) {
1569                 nritems++;
1570                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1571                 name_len = btrfs_dir_name_len(eb, di);
1572                 data_len = btrfs_dir_data_len(eb, di);
1573                 filetype = btrfs_dir_type(eb, di);
1574
1575                 rec->found_size += name_len;
1576                 if (name_len <= BTRFS_NAME_LEN) {
1577                         len = name_len;
1578                         error = 0;
1579                 } else {
1580                         len = BTRFS_NAME_LEN;
1581                         error = REF_ERR_NAME_TOO_LONG;
1582                 }
1583                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1584
1585                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1586                         add_inode_backref(inode_cache, location.objectid,
1587                                           key->objectid, key->offset, namebuf,
1588                                           len, filetype, key->type, error);
1589                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1590                         add_inode_backref(root_cache, location.objectid,
1591                                           key->objectid, key->offset,
1592                                           namebuf, len, filetype,
1593                                           key->type, error);
1594                 } else {
1595                         fprintf(stderr, "invalid location in dir item %u\n",
1596                                 location.type);
1597                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1598                                           key->objectid, key->offset, namebuf,
1599                                           len, filetype, key->type, error);
1600                 }
1601
1602                 len = sizeof(*di) + name_len + data_len;
1603                 di = (struct btrfs_dir_item *)((char *)di + len);
1604                 cur += len;
1605         }
1606         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1607                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1608
1609         return 0;
1610 }
1611
1612 static int process_inode_ref(struct extent_buffer *eb,
1613                              int slot, struct btrfs_key *key,
1614                              struct shared_node *active_node)
1615 {
1616         u32 total;
1617         u32 cur = 0;
1618         u32 len;
1619         u32 name_len;
1620         u64 index;
1621         int error;
1622         struct cache_tree *inode_cache;
1623         struct btrfs_inode_ref *ref;
1624         char namebuf[BTRFS_NAME_LEN];
1625
1626         inode_cache = &active_node->inode_cache;
1627
1628         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1629         total = btrfs_item_size_nr(eb, slot);
1630         while (cur < total) {
1631                 name_len = btrfs_inode_ref_name_len(eb, ref);
1632                 index = btrfs_inode_ref_index(eb, ref);
1633                 if (name_len <= BTRFS_NAME_LEN) {
1634                         len = name_len;
1635                         error = 0;
1636                 } else {
1637                         len = BTRFS_NAME_LEN;
1638                         error = REF_ERR_NAME_TOO_LONG;
1639                 }
1640                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1641                 add_inode_backref(inode_cache, key->objectid, key->offset,
1642                                   index, namebuf, len, 0, key->type, error);
1643
1644                 len = sizeof(*ref) + name_len;
1645                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1646                 cur += len;
1647         }
1648         return 0;
1649 }
1650
1651 static int process_inode_extref(struct extent_buffer *eb,
1652                                 int slot, struct btrfs_key *key,
1653                                 struct shared_node *active_node)
1654 {
1655         u32 total;
1656         u32 cur = 0;
1657         u32 len;
1658         u32 name_len;
1659         u64 index;
1660         u64 parent;
1661         int error;
1662         struct cache_tree *inode_cache;
1663         struct btrfs_inode_extref *extref;
1664         char namebuf[BTRFS_NAME_LEN];
1665
1666         inode_cache = &active_node->inode_cache;
1667
1668         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1669         total = btrfs_item_size_nr(eb, slot);
1670         while (cur < total) {
1671                 name_len = btrfs_inode_extref_name_len(eb, extref);
1672                 index = btrfs_inode_extref_index(eb, extref);
1673                 parent = btrfs_inode_extref_parent(eb, extref);
1674                 if (name_len <= BTRFS_NAME_LEN) {
1675                         len = name_len;
1676                         error = 0;
1677                 } else {
1678                         len = BTRFS_NAME_LEN;
1679                         error = REF_ERR_NAME_TOO_LONG;
1680                 }
1681                 read_extent_buffer(eb, namebuf,
1682                                    (unsigned long)(extref + 1), len);
1683                 add_inode_backref(inode_cache, key->objectid, parent,
1684                                   index, namebuf, len, 0, key->type, error);
1685
1686                 len = sizeof(*extref) + name_len;
1687                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1688                 cur += len;
1689         }
1690         return 0;
1691
1692 }
1693
1694 static int count_csum_range(struct btrfs_root *root, u64 start,
1695                             u64 len, u64 *found)
1696 {
1697         struct btrfs_key key;
1698         struct btrfs_path path;
1699         struct extent_buffer *leaf;
1700         int ret;
1701         size_t size;
1702         *found = 0;
1703         u64 csum_end;
1704         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1705
1706         btrfs_init_path(&path);
1707
1708         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1709         key.offset = start;
1710         key.type = BTRFS_EXTENT_CSUM_KEY;
1711
1712         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1713                                 &key, &path, 0, 0);
1714         if (ret < 0)
1715                 goto out;
1716         if (ret > 0 && path.slots[0] > 0) {
1717                 leaf = path.nodes[0];
1718                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1719                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1720                     key.type == BTRFS_EXTENT_CSUM_KEY)
1721                         path.slots[0]--;
1722         }
1723
1724         while (len > 0) {
1725                 leaf = path.nodes[0];
1726                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1727                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1728                         if (ret > 0)
1729                                 break;
1730                         else if (ret < 0)
1731                                 goto out;
1732                         leaf = path.nodes[0];
1733                 }
1734
1735                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1736                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1737                     key.type != BTRFS_EXTENT_CSUM_KEY)
1738                         break;
1739
1740                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1741                 if (key.offset >= start + len)
1742                         break;
1743
1744                 if (key.offset > start)
1745                         start = key.offset;
1746
1747                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1748                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1749                 if (csum_end > start) {
1750                         size = min(csum_end - start, len);
1751                         len -= size;
1752                         start += size;
1753                         *found += size;
1754                 }
1755
1756                 path.slots[0]++;
1757         }
1758 out:
1759         btrfs_release_path(&path);
1760         if (ret < 0)
1761                 return ret;
1762         return 0;
1763 }
1764
1765 static int process_file_extent(struct btrfs_root *root,
1766                                 struct extent_buffer *eb,
1767                                 int slot, struct btrfs_key *key,
1768                                 struct shared_node *active_node)
1769 {
1770         struct inode_record *rec;
1771         struct btrfs_file_extent_item *fi;
1772         u64 num_bytes = 0;
1773         u64 disk_bytenr = 0;
1774         u64 extent_offset = 0;
1775         u64 mask = root->sectorsize - 1;
1776         int extent_type;
1777         int ret;
1778
1779         rec = active_node->current;
1780         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1781         rec->found_file_extent = 1;
1782
1783         if (rec->extent_start == (u64)-1) {
1784                 rec->extent_start = key->offset;
1785                 rec->extent_end = key->offset;
1786         }
1787
1788         if (rec->extent_end > key->offset)
1789                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1790         else if (rec->extent_end < key->offset) {
1791                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1792                                            key->offset - rec->extent_end);
1793                 if (ret < 0)
1794                         return ret;
1795         }
1796
1797         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1798         extent_type = btrfs_file_extent_type(eb, fi);
1799
1800         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1801                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1802                 if (num_bytes == 0)
1803                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1804                 rec->found_size += num_bytes;
1805                 num_bytes = (num_bytes + mask) & ~mask;
1806         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1807                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1808                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1809                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1810                 extent_offset = btrfs_file_extent_offset(eb, fi);
1811                 if (num_bytes == 0 || (num_bytes & mask))
1812                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1813                 if (num_bytes + extent_offset >
1814                     btrfs_file_extent_ram_bytes(eb, fi))
1815                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1816                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1817                     (btrfs_file_extent_compression(eb, fi) ||
1818                      btrfs_file_extent_encryption(eb, fi) ||
1819                      btrfs_file_extent_other_encoding(eb, fi)))
1820                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1821                 if (disk_bytenr > 0)
1822                         rec->found_size += num_bytes;
1823         } else {
1824                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1825         }
1826         rec->extent_end = key->offset + num_bytes;
1827
1828         /*
1829          * The data reloc tree will copy full extents into its inode and then
1830          * copy the corresponding csums.  Because the extent it copied could be
1831          * a preallocated extent that hasn't been written to yet there may be no
1832          * csums to copy, ergo we won't have csums for our file extent.  This is
1833          * ok so just don't bother checking csums if the inode belongs to the
1834          * data reloc tree.
1835          */
1836         if (disk_bytenr > 0 &&
1837             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1838                 u64 found;
1839                 if (btrfs_file_extent_compression(eb, fi))
1840                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1841                 else
1842                         disk_bytenr += extent_offset;
1843
1844                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1845                 if (ret < 0)
1846                         return ret;
1847                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1848                         if (found > 0)
1849                                 rec->found_csum_item = 1;
1850                         if (found < num_bytes)
1851                                 rec->some_csum_missing = 1;
1852                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1853                         if (found > 0)
1854                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1855                 }
1856         }
1857         return 0;
1858 }
1859
1860 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1861                             struct walk_control *wc)
1862 {
1863         struct btrfs_key key;
1864         u32 nritems;
1865         int i;
1866         int ret = 0;
1867         struct cache_tree *inode_cache;
1868         struct shared_node *active_node;
1869
1870         if (wc->root_level == wc->active_node &&
1871             btrfs_root_refs(&root->root_item) == 0)
1872                 return 0;
1873
1874         active_node = wc->nodes[wc->active_node];
1875         inode_cache = &active_node->inode_cache;
1876         nritems = btrfs_header_nritems(eb);
1877         for (i = 0; i < nritems; i++) {
1878                 btrfs_item_key_to_cpu(eb, &key, i);
1879
1880                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1881                         continue;
1882                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1883                         continue;
1884
1885                 if (active_node->current == NULL ||
1886                     active_node->current->ino < key.objectid) {
1887                         if (active_node->current) {
1888                                 active_node->current->checked = 1;
1889                                 maybe_free_inode_rec(inode_cache,
1890                                                      active_node->current);
1891                         }
1892                         active_node->current = get_inode_rec(inode_cache,
1893                                                              key.objectid, 1);
1894                         BUG_ON(IS_ERR(active_node->current));
1895                 }
1896                 switch (key.type) {
1897                 case BTRFS_DIR_ITEM_KEY:
1898                 case BTRFS_DIR_INDEX_KEY:
1899                         ret = process_dir_item(root, eb, i, &key, active_node);
1900                         break;
1901                 case BTRFS_INODE_REF_KEY:
1902                         ret = process_inode_ref(eb, i, &key, active_node);
1903                         break;
1904                 case BTRFS_INODE_EXTREF_KEY:
1905                         ret = process_inode_extref(eb, i, &key, active_node);
1906                         break;
1907                 case BTRFS_INODE_ITEM_KEY:
1908                         ret = process_inode_item(eb, i, &key, active_node);
1909                         break;
1910                 case BTRFS_EXTENT_DATA_KEY:
1911                         ret = process_file_extent(root, eb, i, &key,
1912                                                   active_node);
1913                         break;
1914                 default:
1915                         break;
1916                 };
1917         }
1918         return ret;
1919 }
1920
1921 static void reada_walk_down(struct btrfs_root *root,
1922                             struct extent_buffer *node, int slot)
1923 {
1924         u64 bytenr;
1925         u64 ptr_gen;
1926         u32 nritems;
1927         u32 blocksize;
1928         int i;
1929         int level;
1930
1931         level = btrfs_header_level(node);
1932         if (level != 1)
1933                 return;
1934
1935         nritems = btrfs_header_nritems(node);
1936         blocksize = root->nodesize;
1937         for (i = slot; i < nritems; i++) {
1938                 bytenr = btrfs_node_blockptr(node, i);
1939                 ptr_gen = btrfs_node_ptr_generation(node, i);
1940                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1941         }
1942 }
1943
1944 /*
1945  * Check the child node/leaf by the following condition:
1946  * 1. the first item key of the node/leaf should be the same with the one
1947  *    in parent.
1948  * 2. block in parent node should match the child node/leaf.
1949  * 3. generation of parent node and child's header should be consistent.
1950  *
1951  * Or the child node/leaf pointed by the key in parent is not valid.
1952  *
1953  * We hope to check leaf owner too, but since subvol may share leaves,
1954  * which makes leaf owner check not so strong, key check should be
1955  * sufficient enough for that case.
1956  */
1957 static int check_child_node(struct btrfs_root *root,
1958                             struct extent_buffer *parent, int slot,
1959                             struct extent_buffer *child)
1960 {
1961         struct btrfs_key parent_key;
1962         struct btrfs_key child_key;
1963         int ret = 0;
1964
1965         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1966         if (btrfs_header_level(child) == 0)
1967                 btrfs_item_key_to_cpu(child, &child_key, 0);
1968         else
1969                 btrfs_node_key_to_cpu(child, &child_key, 0);
1970
1971         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1972                 ret = -EINVAL;
1973                 fprintf(stderr,
1974                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1975                         parent_key.objectid, parent_key.type, parent_key.offset,
1976                         child_key.objectid, child_key.type, child_key.offset);
1977         }
1978         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1979                 ret = -EINVAL;
1980                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1981                         btrfs_node_blockptr(parent, slot),
1982                         btrfs_header_bytenr(child));
1983         }
1984         if (btrfs_node_ptr_generation(parent, slot) !=
1985             btrfs_header_generation(child)) {
1986                 ret = -EINVAL;
1987                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1988                         btrfs_header_generation(child),
1989                         btrfs_node_ptr_generation(parent, slot));
1990         }
1991         return ret;
1992 }
1993
1994 struct node_refs {
1995         u64 bytenr[BTRFS_MAX_LEVEL];
1996         u64 refs[BTRFS_MAX_LEVEL];
1997 };
1998
1999 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2000                           struct walk_control *wc, int *level,
2001                           struct node_refs *nrefs)
2002 {
2003         enum btrfs_tree_block_status status;
2004         u64 bytenr;
2005         u64 ptr_gen;
2006         struct extent_buffer *next;
2007         struct extent_buffer *cur;
2008         u32 blocksize;
2009         int ret, err = 0;
2010         u64 refs;
2011
2012         WARN_ON(*level < 0);
2013         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2014
2015         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2016                 refs = nrefs->refs[*level];
2017                 ret = 0;
2018         } else {
2019                 ret = btrfs_lookup_extent_info(NULL, root,
2020                                        path->nodes[*level]->start,
2021                                        *level, 1, &refs, NULL);
2022                 if (ret < 0) {
2023                         err = ret;
2024                         goto out;
2025                 }
2026                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2027                 nrefs->refs[*level] = refs;
2028         }
2029
2030         if (refs > 1) {
2031                 ret = enter_shared_node(root, path->nodes[*level]->start,
2032                                         refs, wc, *level);
2033                 if (ret > 0) {
2034                         err = ret;
2035                         goto out;
2036                 }
2037         }
2038
2039         while (*level >= 0) {
2040                 WARN_ON(*level < 0);
2041                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2042                 cur = path->nodes[*level];
2043
2044                 if (btrfs_header_level(cur) != *level)
2045                         WARN_ON(1);
2046
2047                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2048                         break;
2049                 if (*level == 0) {
2050                         ret = process_one_leaf(root, cur, wc);
2051                         if (ret < 0)
2052                                 err = ret;
2053                         break;
2054                 }
2055                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2056                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2057                 blocksize = root->nodesize;
2058
2059                 if (bytenr == nrefs->bytenr[*level - 1]) {
2060                         refs = nrefs->refs[*level - 1];
2061                 } else {
2062                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2063                                         *level - 1, 1, &refs, NULL);
2064                         if (ret < 0) {
2065                                 refs = 0;
2066                         } else {
2067                                 nrefs->bytenr[*level - 1] = bytenr;
2068                                 nrefs->refs[*level - 1] = refs;
2069                         }
2070                 }
2071
2072                 if (refs > 1) {
2073                         ret = enter_shared_node(root, bytenr, refs,
2074                                                 wc, *level - 1);
2075                         if (ret > 0) {
2076                                 path->slots[*level]++;
2077                                 continue;
2078                         }
2079                 }
2080
2081                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2082                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2083                         free_extent_buffer(next);
2084                         reada_walk_down(root, cur, path->slots[*level]);
2085                         next = read_tree_block(root, bytenr, blocksize,
2086                                                ptr_gen);
2087                         if (!extent_buffer_uptodate(next)) {
2088                                 struct btrfs_key node_key;
2089
2090                                 btrfs_node_key_to_cpu(path->nodes[*level],
2091                                                       &node_key,
2092                                                       path->slots[*level]);
2093                                 btrfs_add_corrupt_extent_record(root->fs_info,
2094                                                 &node_key,
2095                                                 path->nodes[*level]->start,
2096                                                 root->nodesize, *level);
2097                                 err = -EIO;
2098                                 goto out;
2099                         }
2100                 }
2101
2102                 ret = check_child_node(root, cur, path->slots[*level], next);
2103                 if (ret) {
2104                         err = ret;
2105                         goto out;
2106                 }
2107
2108                 if (btrfs_is_leaf(next))
2109                         status = btrfs_check_leaf(root, NULL, next);
2110                 else
2111                         status = btrfs_check_node(root, NULL, next);
2112                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2113                         free_extent_buffer(next);
2114                         err = -EIO;
2115                         goto out;
2116                 }
2117
2118                 *level = *level - 1;
2119                 free_extent_buffer(path->nodes[*level]);
2120                 path->nodes[*level] = next;
2121                 path->slots[*level] = 0;
2122         }
2123 out:
2124         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2125         return err;
2126 }
2127
2128 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2129                         struct walk_control *wc, int *level)
2130 {
2131         int i;
2132         struct extent_buffer *leaf;
2133
2134         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2135                 leaf = path->nodes[i];
2136                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2137                         path->slots[i]++;
2138                         *level = i;
2139                         return 0;
2140                 } else {
2141                         free_extent_buffer(path->nodes[*level]);
2142                         path->nodes[*level] = NULL;
2143                         BUG_ON(*level > wc->active_node);
2144                         if (*level == wc->active_node)
2145                                 leave_shared_node(root, wc, *level);
2146                         *level = i + 1;
2147                 }
2148         }
2149         return 1;
2150 }
2151
2152 static int check_root_dir(struct inode_record *rec)
2153 {
2154         struct inode_backref *backref;
2155         int ret = -1;
2156
2157         if (!rec->found_inode_item || rec->errors)
2158                 goto out;
2159         if (rec->nlink != 1 || rec->found_link != 0)
2160                 goto out;
2161         if (list_empty(&rec->backrefs))
2162                 goto out;
2163         backref = to_inode_backref(rec->backrefs.next);
2164         if (!backref->found_inode_ref)
2165                 goto out;
2166         if (backref->index != 0 || backref->namelen != 2 ||
2167             memcmp(backref->name, "..", 2))
2168                 goto out;
2169         if (backref->found_dir_index || backref->found_dir_item)
2170                 goto out;
2171         ret = 0;
2172 out:
2173         return ret;
2174 }
2175
2176 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2177                               struct btrfs_root *root, struct btrfs_path *path,
2178                               struct inode_record *rec)
2179 {
2180         struct btrfs_inode_item *ei;
2181         struct btrfs_key key;
2182         int ret;
2183
2184         key.objectid = rec->ino;
2185         key.type = BTRFS_INODE_ITEM_KEY;
2186         key.offset = (u64)-1;
2187
2188         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2189         if (ret < 0)
2190                 goto out;
2191         if (ret) {
2192                 if (!path->slots[0]) {
2193                         ret = -ENOENT;
2194                         goto out;
2195                 }
2196                 path->slots[0]--;
2197                 ret = 0;
2198         }
2199         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2200         if (key.objectid != rec->ino) {
2201                 ret = -ENOENT;
2202                 goto out;
2203         }
2204
2205         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2206                             struct btrfs_inode_item);
2207         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2208         btrfs_mark_buffer_dirty(path->nodes[0]);
2209         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2210         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2211                root->root_key.objectid);
2212 out:
2213         btrfs_release_path(path);
2214         return ret;
2215 }
2216
2217 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2218                                     struct btrfs_root *root,
2219                                     struct btrfs_path *path,
2220                                     struct inode_record *rec)
2221 {
2222         int ret;
2223
2224         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2225         btrfs_release_path(path);
2226         if (!ret)
2227                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2228         return ret;
2229 }
2230
2231 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2232                                struct btrfs_root *root,
2233                                struct btrfs_path *path,
2234                                struct inode_record *rec)
2235 {
2236         struct btrfs_inode_item *ei;
2237         struct btrfs_key key;
2238         int ret = 0;
2239
2240         key.objectid = rec->ino;
2241         key.type = BTRFS_INODE_ITEM_KEY;
2242         key.offset = 0;
2243
2244         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2245         if (ret) {
2246                 if (ret > 0)
2247                         ret = -ENOENT;
2248                 goto out;
2249         }
2250
2251         /* Since ret == 0, no need to check anything */
2252         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2253                             struct btrfs_inode_item);
2254         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2255         btrfs_mark_buffer_dirty(path->nodes[0]);
2256         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2257         printf("reset nbytes for ino %llu root %llu\n",
2258                rec->ino, root->root_key.objectid);
2259 out:
2260         btrfs_release_path(path);
2261         return ret;
2262 }
2263
2264 static int add_missing_dir_index(struct btrfs_root *root,
2265                                  struct cache_tree *inode_cache,
2266                                  struct inode_record *rec,
2267                                  struct inode_backref *backref)
2268 {
2269         struct btrfs_path *path;
2270         struct btrfs_trans_handle *trans;
2271         struct btrfs_dir_item *dir_item;
2272         struct extent_buffer *leaf;
2273         struct btrfs_key key;
2274         struct btrfs_disk_key disk_key;
2275         struct inode_record *dir_rec;
2276         unsigned long name_ptr;
2277         u32 data_size = sizeof(*dir_item) + backref->namelen;
2278         int ret;
2279
2280         path = btrfs_alloc_path();
2281         if (!path)
2282                 return -ENOMEM;
2283
2284         trans = btrfs_start_transaction(root, 1);
2285         if (IS_ERR(trans)) {
2286                 btrfs_free_path(path);
2287                 return PTR_ERR(trans);
2288         }
2289
2290         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2291                 (unsigned long long)rec->ino);
2292         key.objectid = backref->dir;
2293         key.type = BTRFS_DIR_INDEX_KEY;
2294         key.offset = backref->index;
2295
2296         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2297         BUG_ON(ret);
2298
2299         leaf = path->nodes[0];
2300         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2301
2302         disk_key.objectid = cpu_to_le64(rec->ino);
2303         disk_key.type = BTRFS_INODE_ITEM_KEY;
2304         disk_key.offset = 0;
2305
2306         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2307         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2308         btrfs_set_dir_data_len(leaf, dir_item, 0);
2309         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2310         name_ptr = (unsigned long)(dir_item + 1);
2311         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2312         btrfs_mark_buffer_dirty(leaf);
2313         btrfs_free_path(path);
2314         btrfs_commit_transaction(trans, root);
2315
2316         backref->found_dir_index = 1;
2317         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2318         BUG_ON(IS_ERR(dir_rec));
2319         if (!dir_rec)
2320                 return 0;
2321         dir_rec->found_size += backref->namelen;
2322         if (dir_rec->found_size == dir_rec->isize &&
2323             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2324                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2325         if (dir_rec->found_size != dir_rec->isize)
2326                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2327
2328         return 0;
2329 }
2330
2331 static int delete_dir_index(struct btrfs_root *root,
2332                             struct cache_tree *inode_cache,
2333                             struct inode_record *rec,
2334                             struct inode_backref *backref)
2335 {
2336         struct btrfs_trans_handle *trans;
2337         struct btrfs_dir_item *di;
2338         struct btrfs_path *path;
2339         int ret = 0;
2340
2341         path = btrfs_alloc_path();
2342         if (!path)
2343                 return -ENOMEM;
2344
2345         trans = btrfs_start_transaction(root, 1);
2346         if (IS_ERR(trans)) {
2347                 btrfs_free_path(path);
2348                 return PTR_ERR(trans);
2349         }
2350
2351
2352         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2353                 (unsigned long long)backref->dir,
2354                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2355                 (unsigned long long)root->objectid);
2356
2357         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2358                                     backref->name, backref->namelen,
2359                                     backref->index, -1);
2360         if (IS_ERR(di)) {
2361                 ret = PTR_ERR(di);
2362                 btrfs_free_path(path);
2363                 btrfs_commit_transaction(trans, root);
2364                 if (ret == -ENOENT)
2365                         return 0;
2366                 return ret;
2367         }
2368
2369         if (!di)
2370                 ret = btrfs_del_item(trans, root, path);
2371         else
2372                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2373         BUG_ON(ret);
2374         btrfs_free_path(path);
2375         btrfs_commit_transaction(trans, root);
2376         return ret;
2377 }
2378
2379 static int create_inode_item(struct btrfs_root *root,
2380                              struct inode_record *rec,
2381                              struct inode_backref *backref, int root_dir)
2382 {
2383         struct btrfs_trans_handle *trans;
2384         struct btrfs_inode_item inode_item;
2385         time_t now = time(NULL);
2386         int ret;
2387
2388         trans = btrfs_start_transaction(root, 1);
2389         if (IS_ERR(trans)) {
2390                 ret = PTR_ERR(trans);
2391                 return ret;
2392         }
2393
2394         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2395                 "be incomplete, please check permissions and content after "
2396                 "the fsck completes.\n", (unsigned long long)root->objectid,
2397                 (unsigned long long)rec->ino);
2398
2399         memset(&inode_item, 0, sizeof(inode_item));
2400         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2401         if (root_dir)
2402                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2403         else
2404                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2405         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2406         if (rec->found_dir_item) {
2407                 if (rec->found_file_extent)
2408                         fprintf(stderr, "root %llu inode %llu has both a dir "
2409                                 "item and extents, unsure if it is a dir or a "
2410                                 "regular file so setting it as a directory\n",
2411                                 (unsigned long long)root->objectid,
2412                                 (unsigned long long)rec->ino);
2413                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2414                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2415         } else if (!rec->found_dir_item) {
2416                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2417                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2418         }
2419         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2420         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2421         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2422         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2423         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2424         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2425         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2426         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2427
2428         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2429         BUG_ON(ret);
2430         btrfs_commit_transaction(trans, root);
2431         return 0;
2432 }
2433
2434 static int repair_inode_backrefs(struct btrfs_root *root,
2435                                  struct inode_record *rec,
2436                                  struct cache_tree *inode_cache,
2437                                  int delete)
2438 {
2439         struct inode_backref *tmp, *backref;
2440         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2441         int ret = 0;
2442         int repaired = 0;
2443
2444         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2445                 if (!delete && rec->ino == root_dirid) {
2446                         if (!rec->found_inode_item) {
2447                                 ret = create_inode_item(root, rec, backref, 1);
2448                                 if (ret)
2449                                         break;
2450                                 repaired++;
2451                         }
2452                 }
2453
2454                 /* Index 0 for root dir's are special, don't mess with it */
2455                 if (rec->ino == root_dirid && backref->index == 0)
2456                         continue;
2457
2458                 if (delete &&
2459                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2460                      (backref->found_dir_index && backref->found_inode_ref &&
2461                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2462                         ret = delete_dir_index(root, inode_cache, rec, backref);
2463                         if (ret)
2464                                 break;
2465                         repaired++;
2466                         list_del(&backref->list);
2467                         free(backref);
2468                 }
2469
2470                 if (!delete && !backref->found_dir_index &&
2471                     backref->found_dir_item && backref->found_inode_ref) {
2472                         ret = add_missing_dir_index(root, inode_cache, rec,
2473                                                     backref);
2474                         if (ret)
2475                                 break;
2476                         repaired++;
2477                         if (backref->found_dir_item &&
2478                             backref->found_dir_index &&
2479                             backref->found_dir_index) {
2480                                 if (!backref->errors &&
2481                                     backref->found_inode_ref) {
2482                                         list_del(&backref->list);
2483                                         free(backref);
2484                                 }
2485                         }
2486                 }
2487
2488                 if (!delete && (!backref->found_dir_index &&
2489                                 !backref->found_dir_item &&
2490                                 backref->found_inode_ref)) {
2491                         struct btrfs_trans_handle *trans;
2492                         struct btrfs_key location;
2493
2494                         ret = check_dir_conflict(root, backref->name,
2495                                                  backref->namelen,
2496                                                  backref->dir,
2497                                                  backref->index);
2498                         if (ret) {
2499                                 /*
2500                                  * let nlink fixing routine to handle it,
2501                                  * which can do it better.
2502                                  */
2503                                 ret = 0;
2504                                 break;
2505                         }
2506                         location.objectid = rec->ino;
2507                         location.type = BTRFS_INODE_ITEM_KEY;
2508                         location.offset = 0;
2509
2510                         trans = btrfs_start_transaction(root, 1);
2511                         if (IS_ERR(trans)) {
2512                                 ret = PTR_ERR(trans);
2513                                 break;
2514                         }
2515                         fprintf(stderr, "adding missing dir index/item pair "
2516                                 "for inode %llu\n",
2517                                 (unsigned long long)rec->ino);
2518                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2519                                                     backref->namelen,
2520                                                     backref->dir, &location,
2521                                                     imode_to_type(rec->imode),
2522                                                     backref->index);
2523                         BUG_ON(ret);
2524                         btrfs_commit_transaction(trans, root);
2525                         repaired++;
2526                 }
2527
2528                 if (!delete && (backref->found_inode_ref &&
2529                                 backref->found_dir_index &&
2530                                 backref->found_dir_item &&
2531                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2532                                 !rec->found_inode_item)) {
2533                         ret = create_inode_item(root, rec, backref, 0);
2534                         if (ret)
2535                                 break;
2536                         repaired++;
2537                 }
2538
2539         }
2540         return ret ? ret : repaired;
2541 }
2542
2543 /*
2544  * To determine the file type for nlink/inode_item repair
2545  *
2546  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2547  * Return -ENOENT if file type is not found.
2548  */
2549 static int find_file_type(struct inode_record *rec, u8 *type)
2550 {
2551         struct inode_backref *backref;
2552
2553         /* For inode item recovered case */
2554         if (rec->found_inode_item) {
2555                 *type = imode_to_type(rec->imode);
2556                 return 0;
2557         }
2558
2559         list_for_each_entry(backref, &rec->backrefs, list) {
2560                 if (backref->found_dir_index || backref->found_dir_item) {
2561                         *type = backref->filetype;
2562                         return 0;
2563                 }
2564         }
2565         return -ENOENT;
2566 }
2567
2568 /*
2569  * To determine the file name for nlink repair
2570  *
2571  * Return 0 if file name is found, set name and namelen.
2572  * Return -ENOENT if file name is not found.
2573  */
2574 static int find_file_name(struct inode_record *rec,
2575                           char *name, int *namelen)
2576 {
2577         struct inode_backref *backref;
2578
2579         list_for_each_entry(backref, &rec->backrefs, list) {
2580                 if (backref->found_dir_index || backref->found_dir_item ||
2581                     backref->found_inode_ref) {
2582                         memcpy(name, backref->name, backref->namelen);
2583                         *namelen = backref->namelen;
2584                         return 0;
2585                 }
2586         }
2587         return -ENOENT;
2588 }
2589
2590 /* Reset the nlink of the inode to the correct one */
2591 static int reset_nlink(struct btrfs_trans_handle *trans,
2592                        struct btrfs_root *root,
2593                        struct btrfs_path *path,
2594                        struct inode_record *rec)
2595 {
2596         struct inode_backref *backref;
2597         struct inode_backref *tmp;
2598         struct btrfs_key key;
2599         struct btrfs_inode_item *inode_item;
2600         int ret = 0;
2601
2602         /* We don't believe this either, reset it and iterate backref */
2603         rec->found_link = 0;
2604
2605         /* Remove all backref including the valid ones */
2606         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2607                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2608                                    backref->index, backref->name,
2609                                    backref->namelen, 0);
2610                 if (ret < 0)
2611                         goto out;
2612
2613                 /* remove invalid backref, so it won't be added back */
2614                 if (!(backref->found_dir_index &&
2615                       backref->found_dir_item &&
2616                       backref->found_inode_ref)) {
2617                         list_del(&backref->list);
2618                         free(backref);
2619                 } else {
2620                         rec->found_link++;
2621                 }
2622         }
2623
2624         /* Set nlink to 0 */
2625         key.objectid = rec->ino;
2626         key.type = BTRFS_INODE_ITEM_KEY;
2627         key.offset = 0;
2628         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2629         if (ret < 0)
2630                 goto out;
2631         if (ret > 0) {
2632                 ret = -ENOENT;
2633                 goto out;
2634         }
2635         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2636                                     struct btrfs_inode_item);
2637         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2638         btrfs_mark_buffer_dirty(path->nodes[0]);
2639         btrfs_release_path(path);
2640
2641         /*
2642          * Add back valid inode_ref/dir_item/dir_index,
2643          * add_link() will handle the nlink inc, so new nlink must be correct
2644          */
2645         list_for_each_entry(backref, &rec->backrefs, list) {
2646                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2647                                      backref->name, backref->namelen,
2648                                      backref->filetype, &backref->index, 1);
2649                 if (ret < 0)
2650                         goto out;
2651         }
2652 out:
2653         btrfs_release_path(path);
2654         return ret;
2655 }
2656
2657 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2658                                struct btrfs_root *root,
2659                                struct btrfs_path *path,
2660                                struct inode_record *rec)
2661 {
2662         char *dir_name = "lost+found";
2663         char namebuf[BTRFS_NAME_LEN] = {0};
2664         u64 lost_found_ino;
2665         u32 mode = 0700;
2666         u8 type = 0;
2667         int namelen = 0;
2668         int name_recovered = 0;
2669         int type_recovered = 0;
2670         int ret = 0;
2671
2672         /*
2673          * Get file name and type first before these invalid inode ref
2674          * are deleted by remove_all_invalid_backref()
2675          */
2676         name_recovered = !find_file_name(rec, namebuf, &namelen);
2677         type_recovered = !find_file_type(rec, &type);
2678
2679         if (!name_recovered) {
2680                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2681                        rec->ino, rec->ino);
2682                 namelen = count_digits(rec->ino);
2683                 sprintf(namebuf, "%llu", rec->ino);
2684                 name_recovered = 1;
2685         }
2686         if (!type_recovered) {
2687                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2688                        rec->ino);
2689                 type = BTRFS_FT_REG_FILE;
2690                 type_recovered = 1;
2691         }
2692
2693         ret = reset_nlink(trans, root, path, rec);
2694         if (ret < 0) {
2695                 fprintf(stderr,
2696                         "Failed to reset nlink for inode %llu: %s\n",
2697                         rec->ino, strerror(-ret));
2698                 goto out;
2699         }
2700
2701         if (rec->found_link == 0) {
2702                 lost_found_ino = root->highest_inode;
2703                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2704                         ret = -EOVERFLOW;
2705                         goto out;
2706                 }
2707                 lost_found_ino++;
2708                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2709                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2710                                   mode);
2711                 if (ret < 0) {
2712                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2713                                 dir_name, strerror(-ret));
2714                         goto out;
2715                 }
2716                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2717                                      namebuf, namelen, type, NULL, 1);
2718                 /*
2719                  * Add ".INO" suffix several times to handle case where
2720                  * "FILENAME.INO" is already taken by another file.
2721                  */
2722                 while (ret == -EEXIST) {
2723                         /*
2724                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2725                          */
2726                         if (namelen + count_digits(rec->ino) + 1 >
2727                             BTRFS_NAME_LEN) {
2728                                 ret = -EFBIG;
2729                                 goto out;
2730                         }
2731                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2732                                  ".%llu", rec->ino);
2733                         namelen += count_digits(rec->ino) + 1;
2734                         ret = btrfs_add_link(trans, root, rec->ino,
2735                                              lost_found_ino, namebuf,
2736                                              namelen, type, NULL, 1);
2737                 }
2738                 if (ret < 0) {
2739                         fprintf(stderr,
2740                                 "Failed to link the inode %llu to %s dir: %s\n",
2741                                 rec->ino, dir_name, strerror(-ret));
2742                         goto out;
2743                 }
2744                 /*
2745                  * Just increase the found_link, don't actually add the
2746                  * backref. This will make things easier and this inode
2747                  * record will be freed after the repair is done.
2748                  * So fsck will not report problem about this inode.
2749                  */
2750                 rec->found_link++;
2751                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2752                        namelen, namebuf, dir_name);
2753         }
2754         printf("Fixed the nlink of inode %llu\n", rec->ino);
2755 out:
2756         /*
2757          * Clear the flag anyway, or we will loop forever for the same inode
2758          * as it will not be removed from the bad inode list and the dead loop
2759          * happens.
2760          */
2761         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2762         btrfs_release_path(path);
2763         return ret;
2764 }
2765
2766 /*
2767  * Check if there is any normal(reg or prealloc) file extent for given
2768  * ino.
2769  * This is used to determine the file type when neither its dir_index/item or
2770  * inode_item exists.
2771  *
2772  * This will *NOT* report error, if any error happens, just consider it does
2773  * not have any normal file extent.
2774  */
2775 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2776 {
2777         struct btrfs_path *path;
2778         struct btrfs_key key;
2779         struct btrfs_key found_key;
2780         struct btrfs_file_extent_item *fi;
2781         u8 type;
2782         int ret = 0;
2783
2784         path = btrfs_alloc_path();
2785         if (!path)
2786                 goto out;
2787         key.objectid = ino;
2788         key.type = BTRFS_EXTENT_DATA_KEY;
2789         key.offset = 0;
2790
2791         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2792         if (ret < 0) {
2793                 ret = 0;
2794                 goto out;
2795         }
2796         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2797                 ret = btrfs_next_leaf(root, path);
2798                 if (ret) {
2799                         ret = 0;
2800                         goto out;
2801                 }
2802         }
2803         while (1) {
2804                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2805                                       path->slots[0]);
2806                 if (found_key.objectid != ino ||
2807                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2808                         break;
2809                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2810                                     struct btrfs_file_extent_item);
2811                 type = btrfs_file_extent_type(path->nodes[0], fi);
2812                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2813                         ret = 1;
2814                         goto out;
2815                 }
2816         }
2817 out:
2818         btrfs_free_path(path);
2819         return ret;
2820 }
2821
2822 static u32 btrfs_type_to_imode(u8 type)
2823 {
2824         static u32 imode_by_btrfs_type[] = {
2825                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2826                 [BTRFS_FT_DIR]          = S_IFDIR,
2827                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2828                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2829                 [BTRFS_FT_FIFO]         = S_IFIFO,
2830                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2831                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2832         };
2833
2834         return imode_by_btrfs_type[(type)];
2835 }
2836
2837 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2838                                 struct btrfs_root *root,
2839                                 struct btrfs_path *path,
2840                                 struct inode_record *rec)
2841 {
2842         u8 filetype;
2843         u32 mode = 0700;
2844         int type_recovered = 0;
2845         int ret = 0;
2846
2847         printf("Trying to rebuild inode:%llu\n", rec->ino);
2848
2849         type_recovered = !find_file_type(rec, &filetype);
2850
2851         /*
2852          * Try to determine inode type if type not found.
2853          *
2854          * For found regular file extent, it must be FILE.
2855          * For found dir_item/index, it must be DIR.
2856          *
2857          * For undetermined one, use FILE as fallback.
2858          *
2859          * TODO:
2860          * 1. If found backref(inode_index/item is already handled) to it,
2861          *    it must be DIR.
2862          *    Need new inode-inode ref structure to allow search for that.
2863          */
2864         if (!type_recovered) {
2865                 if (rec->found_file_extent &&
2866                     find_normal_file_extent(root, rec->ino)) {
2867                         type_recovered = 1;
2868                         filetype = BTRFS_FT_REG_FILE;
2869                 } else if (rec->found_dir_item) {
2870                         type_recovered = 1;
2871                         filetype = BTRFS_FT_DIR;
2872                 } else if (!list_empty(&rec->orphan_extents)) {
2873                         type_recovered = 1;
2874                         filetype = BTRFS_FT_REG_FILE;
2875                 } else{
2876                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2877                                rec->ino);
2878                         type_recovered = 1;
2879                         filetype = BTRFS_FT_REG_FILE;
2880                 }
2881         }
2882
2883         ret = btrfs_new_inode(trans, root, rec->ino,
2884                               mode | btrfs_type_to_imode(filetype));
2885         if (ret < 0)
2886                 goto out;
2887
2888         /*
2889          * Here inode rebuild is done, we only rebuild the inode item,
2890          * don't repair the nlink(like move to lost+found).
2891          * That is the job of nlink repair.
2892          *
2893          * We just fill the record and return
2894          */
2895         rec->found_dir_item = 1;
2896         rec->imode = mode | btrfs_type_to_imode(filetype);
2897         rec->nlink = 0;
2898         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2899         /* Ensure the inode_nlinks repair function will be called */
2900         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2901 out:
2902         return ret;
2903 }
2904
2905 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2906                                       struct btrfs_root *root,
2907                                       struct btrfs_path *path,
2908                                       struct inode_record *rec)
2909 {
2910         struct orphan_data_extent *orphan;
2911         struct orphan_data_extent *tmp;
2912         int ret = 0;
2913
2914         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2915                 /*
2916                  * Check for conflicting file extents
2917                  *
2918                  * Here we don't know whether the extents is compressed or not,
2919                  * so we can only assume it not compressed nor data offset,
2920                  * and use its disk_len as extent length.
2921                  */
2922                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2923                                        orphan->offset, orphan->disk_len, 0);
2924                 btrfs_release_path(path);
2925                 if (ret < 0)
2926                         goto out;
2927                 if (!ret) {
2928                         fprintf(stderr,
2929                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2930                                 orphan->disk_bytenr, orphan->disk_len);
2931                         ret = btrfs_free_extent(trans,
2932                                         root->fs_info->extent_root,
2933                                         orphan->disk_bytenr, orphan->disk_len,
2934                                         0, root->objectid, orphan->objectid,
2935                                         orphan->offset);
2936                         if (ret < 0)
2937                                 goto out;
2938                 }
2939                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2940                                 orphan->offset, orphan->disk_bytenr,
2941                                 orphan->disk_len, orphan->disk_len);
2942                 if (ret < 0)
2943                         goto out;
2944
2945                 /* Update file size info */
2946                 rec->found_size += orphan->disk_len;
2947                 if (rec->found_size == rec->nbytes)
2948                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2949
2950                 /* Update the file extent hole info too */
2951                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2952                                            orphan->disk_len);
2953                 if (ret < 0)
2954                         goto out;
2955                 if (RB_EMPTY_ROOT(&rec->holes))
2956                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2957
2958                 list_del(&orphan->list);
2959                 free(orphan);
2960         }
2961         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2962 out:
2963         return ret;
2964 }
2965
2966 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2967                                         struct btrfs_root *root,
2968                                         struct btrfs_path *path,
2969                                         struct inode_record *rec)
2970 {
2971         struct rb_node *node;
2972         struct file_extent_hole *hole;
2973         int found = 0;
2974         int ret = 0;
2975
2976         node = rb_first(&rec->holes);
2977
2978         while (node) {
2979                 found = 1;
2980                 hole = rb_entry(node, struct file_extent_hole, node);
2981                 ret = btrfs_punch_hole(trans, root, rec->ino,
2982                                        hole->start, hole->len);
2983                 if (ret < 0)
2984                         goto out;
2985                 ret = del_file_extent_hole(&rec->holes, hole->start,
2986                                            hole->len);
2987                 if (ret < 0)
2988                         goto out;
2989                 if (RB_EMPTY_ROOT(&rec->holes))
2990                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2991                 node = rb_first(&rec->holes);
2992         }
2993         /* special case for a file losing all its file extent */
2994         if (!found) {
2995                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2996                                        round_up(rec->isize, root->sectorsize));
2997                 if (ret < 0)
2998                         goto out;
2999         }
3000         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3001                rec->ino, root->objectid);
3002 out:
3003         return ret;
3004 }
3005
3006 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3007 {
3008         struct btrfs_trans_handle *trans;
3009         struct btrfs_path *path;
3010         int ret = 0;
3011
3012         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3013                              I_ERR_NO_ORPHAN_ITEM |
3014                              I_ERR_LINK_COUNT_WRONG |
3015                              I_ERR_NO_INODE_ITEM |
3016                              I_ERR_FILE_EXTENT_ORPHAN |
3017                              I_ERR_FILE_EXTENT_DISCOUNT|
3018                              I_ERR_FILE_NBYTES_WRONG)))
3019                 return rec->errors;
3020
3021         path = btrfs_alloc_path();
3022         if (!path)
3023                 return -ENOMEM;
3024
3025         /*
3026          * For nlink repair, it may create a dir and add link, so
3027          * 2 for parent(256)'s dir_index and dir_item
3028          * 2 for lost+found dir's inode_item and inode_ref
3029          * 1 for the new inode_ref of the file
3030          * 2 for lost+found dir's dir_index and dir_item for the file
3031          */
3032         trans = btrfs_start_transaction(root, 7);
3033         if (IS_ERR(trans)) {
3034                 btrfs_free_path(path);
3035                 return PTR_ERR(trans);
3036         }
3037
3038         if (rec->errors & I_ERR_NO_INODE_ITEM)
3039                 ret = repair_inode_no_item(trans, root, path, rec);
3040         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3041                 ret = repair_inode_orphan_extent(trans, root, path, rec);
3042         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3043                 ret = repair_inode_discount_extent(trans, root, path, rec);
3044         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3045                 ret = repair_inode_isize(trans, root, path, rec);
3046         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3047                 ret = repair_inode_orphan_item(trans, root, path, rec);
3048         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3049                 ret = repair_inode_nlinks(trans, root, path, rec);
3050         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3051                 ret = repair_inode_nbytes(trans, root, path, rec);
3052         btrfs_commit_transaction(trans, root);
3053         btrfs_free_path(path);
3054         return ret;
3055 }
3056
3057 static int check_inode_recs(struct btrfs_root *root,
3058                             struct cache_tree *inode_cache)
3059 {
3060         struct cache_extent *cache;
3061         struct ptr_node *node;
3062         struct inode_record *rec;
3063         struct inode_backref *backref;
3064         int stage = 0;
3065         int ret = 0;
3066         int err = 0;
3067         u64 error = 0;
3068         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3069
3070         if (btrfs_root_refs(&root->root_item) == 0) {
3071                 if (!cache_tree_empty(inode_cache))
3072                         fprintf(stderr, "warning line %d\n", __LINE__);
3073                 return 0;
3074         }
3075
3076         /*
3077          * We need to record the highest inode number for later 'lost+found'
3078          * dir creation.
3079          * We must select an ino not used/referred by any existing inode, or
3080          * 'lost+found' ino may be a missing ino in a corrupted leaf,
3081          * this may cause 'lost+found' dir has wrong nlinks.
3082          */
3083         cache = last_cache_extent(inode_cache);
3084         if (cache) {
3085                 node = container_of(cache, struct ptr_node, cache);
3086                 rec = node->data;
3087                 if (rec->ino > root->highest_inode)
3088                         root->highest_inode = rec->ino;
3089         }
3090
3091         /*
3092          * We need to repair backrefs first because we could change some of the
3093          * errors in the inode recs.
3094          *
3095          * We also need to go through and delete invalid backrefs first and then
3096          * add the correct ones second.  We do this because we may get EEXIST
3097          * when adding back the correct index because we hadn't yet deleted the
3098          * invalid index.
3099          *
3100          * For example, if we were missing a dir index then the directories
3101          * isize would be wrong, so if we fixed the isize to what we thought it
3102          * would be and then fixed the backref we'd still have a invalid fs, so
3103          * we need to add back the dir index and then check to see if the isize
3104          * is still wrong.
3105          */
3106         while (stage < 3) {
3107                 stage++;
3108                 if (stage == 3 && !err)
3109                         break;
3110
3111                 cache = search_cache_extent(inode_cache, 0);
3112                 while (repair && cache) {
3113                         node = container_of(cache, struct ptr_node, cache);
3114                         rec = node->data;
3115                         cache = next_cache_extent(cache);
3116
3117                         /* Need to free everything up and rescan */
3118                         if (stage == 3) {
3119                                 remove_cache_extent(inode_cache, &node->cache);
3120                                 free(node);
3121                                 free_inode_rec(rec);
3122                                 continue;
3123                         }
3124
3125                         if (list_empty(&rec->backrefs))
3126                                 continue;
3127
3128                         ret = repair_inode_backrefs(root, rec, inode_cache,
3129                                                     stage == 1);
3130                         if (ret < 0) {
3131                                 err = ret;
3132                                 stage = 2;
3133                                 break;
3134                         } if (ret > 0) {
3135                                 err = -EAGAIN;
3136                         }
3137                 }
3138         }
3139         if (err)
3140                 return err;
3141
3142         rec = get_inode_rec(inode_cache, root_dirid, 0);
3143         BUG_ON(IS_ERR(rec));
3144         if (rec) {
3145                 ret = check_root_dir(rec);
3146                 if (ret) {
3147                         fprintf(stderr, "root %llu root dir %llu error\n",
3148                                 (unsigned long long)root->root_key.objectid,
3149                                 (unsigned long long)root_dirid);
3150                         print_inode_error(root, rec);
3151                         error++;
3152                 }
3153         } else {
3154                 if (repair) {
3155                         struct btrfs_trans_handle *trans;
3156
3157                         trans = btrfs_start_transaction(root, 1);
3158                         if (IS_ERR(trans)) {
3159                                 err = PTR_ERR(trans);
3160                                 return err;
3161                         }
3162
3163                         fprintf(stderr,
3164                                 "root %llu missing its root dir, recreating\n",
3165                                 (unsigned long long)root->objectid);
3166
3167                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3168                         BUG_ON(ret);
3169
3170                         btrfs_commit_transaction(trans, root);
3171                         return -EAGAIN;
3172                 }
3173
3174                 fprintf(stderr, "root %llu root dir %llu not found\n",
3175                         (unsigned long long)root->root_key.objectid,
3176                         (unsigned long long)root_dirid);
3177         }
3178
3179         while (1) {
3180                 cache = search_cache_extent(inode_cache, 0);
3181                 if (!cache)
3182                         break;
3183                 node = container_of(cache, struct ptr_node, cache);
3184                 rec = node->data;
3185                 remove_cache_extent(inode_cache, &node->cache);
3186                 free(node);
3187                 if (rec->ino == root_dirid ||
3188                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3189                         free_inode_rec(rec);
3190                         continue;
3191                 }
3192
3193                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3194                         ret = check_orphan_item(root, rec->ino);
3195                         if (ret == 0)
3196                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3197                         if (can_free_inode_rec(rec)) {
3198                                 free_inode_rec(rec);
3199                                 continue;
3200                         }
3201                 }
3202
3203                 if (!rec->found_inode_item)
3204                         rec->errors |= I_ERR_NO_INODE_ITEM;
3205                 if (rec->found_link != rec->nlink)
3206                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3207                 if (repair) {
3208                         ret = try_repair_inode(root, rec);
3209                         if (ret == 0 && can_free_inode_rec(rec)) {
3210                                 free_inode_rec(rec);
3211                                 continue;
3212                         }
3213                         ret = 0;
3214                 }
3215
3216                 if (!(repair && ret == 0))
3217                         error++;
3218                 print_inode_error(root, rec);
3219                 list_for_each_entry(backref, &rec->backrefs, list) {
3220                         if (!backref->found_dir_item)
3221                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3222                         if (!backref->found_dir_index)
3223                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3224                         if (!backref->found_inode_ref)
3225                                 backref->errors |= REF_ERR_NO_INODE_REF;
3226                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3227                                 " namelen %u name %s filetype %d errors %x",
3228                                 (unsigned long long)backref->dir,
3229                                 (unsigned long long)backref->index,
3230                                 backref->namelen, backref->name,
3231                                 backref->filetype, backref->errors);
3232                         print_ref_error(backref->errors);
3233                 }
3234                 free_inode_rec(rec);
3235         }
3236         return (error > 0) ? -1 : 0;
3237 }
3238
3239 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3240                                         u64 objectid)
3241 {
3242         struct cache_extent *cache;
3243         struct root_record *rec = NULL;
3244         int ret;
3245
3246         cache = lookup_cache_extent(root_cache, objectid, 1);
3247         if (cache) {
3248                 rec = container_of(cache, struct root_record, cache);
3249         } else {
3250                 rec = calloc(1, sizeof(*rec));
3251                 if (!rec)
3252                         return ERR_PTR(-ENOMEM);
3253                 rec->objectid = objectid;
3254                 INIT_LIST_HEAD(&rec->backrefs);
3255                 rec->cache.start = objectid;
3256                 rec->cache.size = 1;
3257
3258                 ret = insert_cache_extent(root_cache, &rec->cache);
3259                 if (ret)
3260                         return ERR_PTR(-EEXIST);
3261         }
3262         return rec;
3263 }
3264
3265 static struct root_backref *get_root_backref(struct root_record *rec,
3266                                              u64 ref_root, u64 dir, u64 index,
3267                                              const char *name, int namelen)
3268 {
3269         struct root_backref *backref;
3270
3271         list_for_each_entry(backref, &rec->backrefs, list) {
3272                 if (backref->ref_root != ref_root || backref->dir != dir ||
3273                     backref->namelen != namelen)
3274                         continue;
3275                 if (memcmp(name, backref->name, namelen))
3276                         continue;
3277                 return backref;
3278         }
3279
3280         backref = calloc(1, sizeof(*backref) + namelen + 1);
3281         if (!backref)
3282                 return NULL;
3283         backref->ref_root = ref_root;
3284         backref->dir = dir;
3285         backref->index = index;
3286         backref->namelen = namelen;
3287         memcpy(backref->name, name, namelen);
3288         backref->name[namelen] = '\0';
3289         list_add_tail(&backref->list, &rec->backrefs);
3290         return backref;
3291 }
3292
3293 static void free_root_record(struct cache_extent *cache)
3294 {
3295         struct root_record *rec;
3296         struct root_backref *backref;
3297
3298         rec = container_of(cache, struct root_record, cache);
3299         while (!list_empty(&rec->backrefs)) {
3300                 backref = to_root_backref(rec->backrefs.next);
3301                 list_del(&backref->list);
3302                 free(backref);
3303         }
3304
3305         kfree(rec);
3306 }
3307
3308 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3309
3310 static int add_root_backref(struct cache_tree *root_cache,
3311                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3312                             const char *name, int namelen,
3313                             int item_type, int errors)
3314 {
3315         struct root_record *rec;
3316         struct root_backref *backref;
3317
3318         rec = get_root_rec(root_cache, root_id);
3319         BUG_ON(IS_ERR(rec));
3320         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3321         BUG_ON(!backref);
3322
3323         backref->errors |= errors;
3324
3325         if (item_type != BTRFS_DIR_ITEM_KEY) {
3326                 if (backref->found_dir_index || backref->found_back_ref ||
3327                     backref->found_forward_ref) {
3328                         if (backref->index != index)
3329                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3330                 } else {
3331                         backref->index = index;
3332                 }
3333         }
3334
3335         if (item_type == BTRFS_DIR_ITEM_KEY) {
3336                 if (backref->found_forward_ref)
3337                         rec->found_ref++;
3338                 backref->found_dir_item = 1;
3339         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3340                 backref->found_dir_index = 1;
3341         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3342                 if (backref->found_forward_ref)
3343                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3344                 else if (backref->found_dir_item)
3345                         rec->found_ref++;
3346                 backref->found_forward_ref = 1;
3347         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3348                 if (backref->found_back_ref)
3349                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3350                 backref->found_back_ref = 1;
3351         } else {
3352                 BUG_ON(1);
3353         }
3354
3355         if (backref->found_forward_ref && backref->found_dir_item)
3356                 backref->reachable = 1;
3357         return 0;
3358 }
3359
3360 static int merge_root_recs(struct btrfs_root *root,
3361                            struct cache_tree *src_cache,
3362                            struct cache_tree *dst_cache)
3363 {
3364         struct cache_extent *cache;
3365         struct ptr_node *node;
3366         struct inode_record *rec;
3367         struct inode_backref *backref;
3368         int ret = 0;
3369
3370         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3371                 free_inode_recs_tree(src_cache);
3372                 return 0;
3373         }
3374
3375         while (1) {
3376                 cache = search_cache_extent(src_cache, 0);
3377                 if (!cache)
3378                         break;
3379                 node = container_of(cache, struct ptr_node, cache);
3380                 rec = node->data;
3381                 remove_cache_extent(src_cache, &node->cache);
3382                 free(node);
3383
3384                 ret = is_child_root(root, root->objectid, rec->ino);
3385                 if (ret < 0)
3386                         break;
3387                 else if (ret == 0)
3388                         goto skip;
3389
3390                 list_for_each_entry(backref, &rec->backrefs, list) {
3391                         BUG_ON(backref->found_inode_ref);
3392                         if (backref->found_dir_item)
3393                                 add_root_backref(dst_cache, rec->ino,
3394                                         root->root_key.objectid, backref->dir,
3395                                         backref->index, backref->name,
3396                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3397                                         backref->errors);
3398                         if (backref->found_dir_index)
3399                                 add_root_backref(dst_cache, rec->ino,
3400                                         root->root_key.objectid, backref->dir,
3401                                         backref->index, backref->name,
3402                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3403                                         backref->errors);
3404                 }
3405 skip:
3406                 free_inode_rec(rec);
3407         }
3408         if (ret < 0)
3409                 return ret;
3410         return 0;
3411 }
3412
3413 static int check_root_refs(struct btrfs_root *root,
3414                            struct cache_tree *root_cache)
3415 {
3416         struct root_record *rec;
3417         struct root_record *ref_root;
3418         struct root_backref *backref;
3419         struct cache_extent *cache;
3420         int loop = 1;
3421         int ret;
3422         int error;
3423         int errors = 0;
3424
3425         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3426         BUG_ON(IS_ERR(rec));
3427         rec->found_ref = 1;
3428
3429         /* fixme: this can not detect circular references */
3430         while (loop) {
3431                 loop = 0;
3432                 cache = search_cache_extent(root_cache, 0);
3433                 while (1) {
3434                         if (!cache)
3435                                 break;
3436                         rec = container_of(cache, struct root_record, cache);
3437                         cache = next_cache_extent(cache);
3438
3439                         if (rec->found_ref == 0)
3440                                 continue;
3441
3442                         list_for_each_entry(backref, &rec->backrefs, list) {
3443                                 if (!backref->reachable)
3444                                         continue;
3445
3446                                 ref_root = get_root_rec(root_cache,
3447                                                         backref->ref_root);
3448                                 BUG_ON(IS_ERR(ref_root));
3449                                 if (ref_root->found_ref > 0)
3450                                         continue;
3451
3452                                 backref->reachable = 0;
3453                                 rec->found_ref--;
3454                                 if (rec->found_ref == 0)
3455                                         loop = 1;
3456                         }
3457                 }
3458         }
3459
3460         cache = search_cache_extent(root_cache, 0);
3461         while (1) {
3462                 if (!cache)
3463                         break;
3464                 rec = container_of(cache, struct root_record, cache);
3465                 cache = next_cache_extent(cache);
3466
3467                 if (rec->found_ref == 0 &&
3468                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3469                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3470                         ret = check_orphan_item(root->fs_info->tree_root,
3471                                                 rec->objectid);
3472                         if (ret == 0)
3473                                 continue;
3474
3475                         /*
3476                          * If we don't have a root item then we likely just have
3477                          * a dir item in a snapshot for this root but no actual
3478                          * ref key or anything so it's meaningless.
3479                          */
3480                         if (!rec->found_root_item)
3481                                 continue;
3482                         errors++;
3483                         fprintf(stderr, "fs tree %llu not referenced\n",
3484                                 (unsigned long long)rec->objectid);
3485                 }
3486
3487                 error = 0;
3488                 if (rec->found_ref > 0 && !rec->found_root_item)
3489                         error = 1;
3490                 list_for_each_entry(backref, &rec->backrefs, list) {
3491                         if (!backref->found_dir_item)
3492                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3493                         if (!backref->found_dir_index)
3494                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3495                         if (!backref->found_back_ref)
3496                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3497                         if (!backref->found_forward_ref)
3498                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3499                         if (backref->reachable && backref->errors)
3500                                 error = 1;
3501                 }
3502                 if (!error)
3503                         continue;
3504
3505                 errors++;
3506                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3507                         (unsigned long long)rec->objectid, rec->found_ref,
3508                          rec->found_root_item ? "" : "not found");
3509
3510                 list_for_each_entry(backref, &rec->backrefs, list) {
3511                         if (!backref->reachable)
3512                                 continue;
3513                         if (!backref->errors && rec->found_root_item)
3514                                 continue;
3515                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3516                                 " index %llu namelen %u name %s errors %x\n",
3517                                 (unsigned long long)backref->ref_root,
3518                                 (unsigned long long)backref->dir,
3519                                 (unsigned long long)backref->index,
3520                                 backref->namelen, backref->name,
3521                                 backref->errors);
3522                         print_ref_error(backref->errors);
3523                 }
3524         }
3525         return errors > 0 ? 1 : 0;
3526 }
3527
3528 static int process_root_ref(struct extent_buffer *eb, int slot,
3529                             struct btrfs_key *key,
3530                             struct cache_tree *root_cache)
3531 {
3532         u64 dirid;
3533         u64 index;
3534         u32 len;
3535         u32 name_len;
3536         struct btrfs_root_ref *ref;
3537         char namebuf[BTRFS_NAME_LEN];
3538         int error;
3539
3540         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3541
3542         dirid = btrfs_root_ref_dirid(eb, ref);
3543         index = btrfs_root_ref_sequence(eb, ref);
3544         name_len = btrfs_root_ref_name_len(eb, ref);
3545
3546         if (name_len <= BTRFS_NAME_LEN) {
3547                 len = name_len;
3548                 error = 0;
3549         } else {
3550                 len = BTRFS_NAME_LEN;
3551                 error = REF_ERR_NAME_TOO_LONG;
3552         }
3553         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3554
3555         if (key->type == BTRFS_ROOT_REF_KEY) {
3556                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3557                                  index, namebuf, len, key->type, error);
3558         } else {
3559                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3560                                  index, namebuf, len, key->type, error);
3561         }
3562         return 0;
3563 }
3564
3565 static void free_corrupt_block(struct cache_extent *cache)
3566 {
3567         struct btrfs_corrupt_block *corrupt;
3568
3569         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3570         free(corrupt);
3571 }
3572
3573 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3574
3575 /*
3576  * Repair the btree of the given root.
3577  *
3578  * The fix is to remove the node key in corrupt_blocks cache_tree.
3579  * and rebalance the tree.
3580  * After the fix, the btree should be writeable.
3581  */
3582 static int repair_btree(struct btrfs_root *root,
3583                         struct cache_tree *corrupt_blocks)
3584 {
3585         struct btrfs_trans_handle *trans;
3586         struct btrfs_path *path;
3587         struct btrfs_corrupt_block *corrupt;
3588         struct cache_extent *cache;
3589         struct btrfs_key key;
3590         u64 offset;
3591         int level;
3592         int ret = 0;
3593
3594         if (cache_tree_empty(corrupt_blocks))
3595                 return 0;
3596
3597         path = btrfs_alloc_path();
3598         if (!path)
3599                 return -ENOMEM;
3600
3601         trans = btrfs_start_transaction(root, 1);
3602         if (IS_ERR(trans)) {
3603                 ret = PTR_ERR(trans);
3604                 fprintf(stderr, "Error starting transaction: %s\n",
3605                         strerror(-ret));
3606                 goto out_free_path;
3607         }
3608         cache = first_cache_extent(corrupt_blocks);
3609         while (cache) {
3610                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3611                                        cache);
3612                 level = corrupt->level;
3613                 path->lowest_level = level;
3614                 key.objectid = corrupt->key.objectid;
3615                 key.type = corrupt->key.type;
3616                 key.offset = corrupt->key.offset;
3617
3618                 /*
3619                  * Here we don't want to do any tree balance, since it may
3620                  * cause a balance with corrupted brother leaf/node,
3621                  * so ins_len set to 0 here.
3622                  * Balance will be done after all corrupt node/leaf is deleted.
3623                  */
3624                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3625                 if (ret < 0)
3626                         goto out;
3627                 offset = btrfs_node_blockptr(path->nodes[level],
3628                                              path->slots[level]);
3629
3630                 /* Remove the ptr */
3631                 ret = btrfs_del_ptr(trans, root, path, level,
3632                                     path->slots[level]);
3633                 if (ret < 0)
3634                         goto out;
3635                 /*
3636                  * Remove the corresponding extent
3637                  * return value is not concerned.
3638                  */
3639                 btrfs_release_path(path);
3640                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3641                                         0, root->root_key.objectid,
3642                                         level - 1, 0);
3643                 cache = next_cache_extent(cache);
3644         }
3645
3646         /* Balance the btree using btrfs_search_slot() */
3647         cache = first_cache_extent(corrupt_blocks);
3648         while (cache) {
3649                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3650                                        cache);
3651                 memcpy(&key, &corrupt->key, sizeof(key));
3652                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3653                 if (ret < 0)
3654                         goto out;
3655                 /* return will always >0 since it won't find the item */
3656                 ret = 0;
3657                 btrfs_release_path(path);
3658                 cache = next_cache_extent(cache);
3659         }
3660 out:
3661         btrfs_commit_transaction(trans, root);
3662 out_free_path:
3663         btrfs_free_path(path);
3664         return ret;
3665 }
3666
3667 static int check_fs_root(struct btrfs_root *root,
3668                          struct cache_tree *root_cache,
3669                          struct walk_control *wc)
3670 {
3671         int ret = 0;
3672         int err = 0;
3673         int wret;
3674         int level;
3675         struct btrfs_path path;
3676         struct shared_node root_node;
3677         struct root_record *rec;
3678         struct btrfs_root_item *root_item = &root->root_item;
3679         struct cache_tree corrupt_blocks;
3680         struct orphan_data_extent *orphan;
3681         struct orphan_data_extent *tmp;
3682         enum btrfs_tree_block_status status;
3683         struct node_refs nrefs;
3684
3685         /*
3686          * Reuse the corrupt_block cache tree to record corrupted tree block
3687          *
3688          * Unlike the usage in extent tree check, here we do it in a per
3689          * fs/subvol tree base.
3690          */
3691         cache_tree_init(&corrupt_blocks);
3692         root->fs_info->corrupt_blocks = &corrupt_blocks;
3693
3694         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3695                 rec = get_root_rec(root_cache, root->root_key.objectid);
3696                 BUG_ON(IS_ERR(rec));
3697                 if (btrfs_root_refs(root_item) > 0)
3698                         rec->found_root_item = 1;
3699         }
3700
3701         btrfs_init_path(&path);
3702         memset(&root_node, 0, sizeof(root_node));
3703         cache_tree_init(&root_node.root_cache);
3704         cache_tree_init(&root_node.inode_cache);
3705         memset(&nrefs, 0, sizeof(nrefs));
3706
3707         /* Move the orphan extent record to corresponding inode_record */
3708         list_for_each_entry_safe(orphan, tmp,
3709                                  &root->orphan_data_extents, list) {
3710                 struct inode_record *inode;
3711
3712                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3713                                       1);
3714                 BUG_ON(IS_ERR(inode));
3715                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3716                 list_move(&orphan->list, &inode->orphan_extents);
3717         }
3718
3719         level = btrfs_header_level(root->node);
3720         memset(wc->nodes, 0, sizeof(wc->nodes));
3721         wc->nodes[level] = &root_node;
3722         wc->active_node = level;
3723         wc->root_level = level;
3724
3725         /* We may not have checked the root block, lets do that now */
3726         if (btrfs_is_leaf(root->node))
3727                 status = btrfs_check_leaf(root, NULL, root->node);
3728         else
3729                 status = btrfs_check_node(root, NULL, root->node);
3730         if (status != BTRFS_TREE_BLOCK_CLEAN)
3731                 return -EIO;
3732
3733         if (btrfs_root_refs(root_item) > 0 ||
3734             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3735                 path.nodes[level] = root->node;
3736                 extent_buffer_get(root->node);
3737                 path.slots[level] = 0;
3738         } else {
3739                 struct btrfs_key key;
3740                 struct btrfs_disk_key found_key;
3741
3742                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3743                 level = root_item->drop_level;
3744                 path.lowest_level = level;
3745                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3746                 if (wret < 0)
3747                         goto skip_walking;
3748                 btrfs_node_key(path.nodes[level], &found_key,
3749                                 path.slots[level]);
3750                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3751                                         sizeof(found_key)));
3752         }
3753
3754         while (1) {
3755                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3756                 if (wret < 0)
3757                         ret = wret;
3758                 if (wret != 0)
3759                         break;
3760
3761                 wret = walk_up_tree(root, &path, wc, &level);
3762                 if (wret < 0)
3763                         ret = wret;
3764                 if (wret != 0)
3765                         break;
3766         }
3767 skip_walking:
3768         btrfs_release_path(&path);
3769
3770         if (!cache_tree_empty(&corrupt_blocks)) {
3771                 struct cache_extent *cache;
3772                 struct btrfs_corrupt_block *corrupt;
3773
3774                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3775                        root->root_key.objectid);
3776                 cache = first_cache_extent(&corrupt_blocks);
3777                 while (cache) {
3778                         corrupt = container_of(cache,
3779                                                struct btrfs_corrupt_block,
3780                                                cache);
3781                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3782                                cache->start, corrupt->level,
3783                                corrupt->key.objectid, corrupt->key.type,
3784                                corrupt->key.offset);
3785                         cache = next_cache_extent(cache);
3786                 }
3787                 if (repair) {
3788                         printf("Try to repair the btree for root %llu\n",
3789                                root->root_key.objectid);
3790                         ret = repair_btree(root, &corrupt_blocks);
3791                         if (ret < 0)
3792                                 fprintf(stderr, "Failed to repair btree: %s\n",
3793                                         strerror(-ret));
3794                         if (!ret)
3795                                 printf("Btree for root %llu is fixed\n",
3796                                        root->root_key.objectid);
3797                 }
3798         }
3799
3800         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3801         if (err < 0)
3802                 ret = err;
3803
3804         if (root_node.current) {
3805                 root_node.current->checked = 1;
3806                 maybe_free_inode_rec(&root_node.inode_cache,
3807                                 root_node.current);
3808         }
3809
3810         err = check_inode_recs(root, &root_node.inode_cache);
3811         if (!ret)
3812                 ret = err;
3813
3814         free_corrupt_blocks_tree(&corrupt_blocks);
3815         root->fs_info->corrupt_blocks = NULL;
3816         free_orphan_data_extents(&root->orphan_data_extents);
3817         return ret;
3818 }
3819
3820 static int fs_root_objectid(u64 objectid)
3821 {
3822         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3823             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3824                 return 1;
3825         return is_fstree(objectid);
3826 }
3827
3828 static int check_fs_roots(struct btrfs_root *root,
3829                           struct cache_tree *root_cache)
3830 {
3831         struct btrfs_path path;
3832         struct btrfs_key key;
3833         struct walk_control wc;
3834         struct extent_buffer *leaf, *tree_node;
3835         struct btrfs_root *tmp_root;
3836         struct btrfs_root *tree_root = root->fs_info->tree_root;
3837         int ret;
3838         int err = 0;
3839
3840         if (ctx.progress_enabled) {
3841                 ctx.tp = TASK_FS_ROOTS;
3842                 task_start(ctx.info);
3843         }
3844
3845         /*
3846          * Just in case we made any changes to the extent tree that weren't
3847          * reflected into the free space cache yet.
3848          */
3849         if (repair)
3850                 reset_cached_block_groups(root->fs_info);
3851         memset(&wc, 0, sizeof(wc));
3852         cache_tree_init(&wc.shared);
3853         btrfs_init_path(&path);
3854
3855 again:
3856         key.offset = 0;
3857         key.objectid = 0;
3858         key.type = BTRFS_ROOT_ITEM_KEY;
3859         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3860         if (ret < 0) {
3861                 err = 1;
3862                 goto out;
3863         }
3864         tree_node = tree_root->node;
3865         while (1) {
3866                 if (tree_node != tree_root->node) {
3867                         free_root_recs_tree(root_cache);
3868                         btrfs_release_path(&path);
3869                         goto again;
3870                 }
3871                 leaf = path.nodes[0];
3872                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3873                         ret = btrfs_next_leaf(tree_root, &path);
3874                         if (ret) {
3875                                 if (ret < 0)
3876                                         err = 1;
3877                                 break;
3878                         }
3879                         leaf = path.nodes[0];
3880                 }
3881                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3882                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3883                     fs_root_objectid(key.objectid)) {
3884                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3885                                 tmp_root = btrfs_read_fs_root_no_cache(
3886                                                 root->fs_info, &key);
3887                         } else {
3888                                 key.offset = (u64)-1;
3889                                 tmp_root = btrfs_read_fs_root(
3890                                                 root->fs_info, &key);
3891                         }
3892                         if (IS_ERR(tmp_root)) {
3893                                 err = 1;
3894                                 goto next;
3895                         }
3896                         ret = check_fs_root(tmp_root, root_cache, &wc);
3897                         if (ret == -EAGAIN) {
3898                                 free_root_recs_tree(root_cache);
3899                                 btrfs_release_path(&path);
3900                                 goto again;
3901                         }
3902                         if (ret)
3903                                 err = 1;
3904                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3905                                 btrfs_free_fs_root(tmp_root);
3906                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3907                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3908                         process_root_ref(leaf, path.slots[0], &key,
3909                                          root_cache);
3910                 }
3911 next:
3912                 path.slots[0]++;
3913         }
3914 out:
3915         btrfs_release_path(&path);
3916         if (err)
3917                 free_extent_cache_tree(&wc.shared);
3918         if (!cache_tree_empty(&wc.shared))
3919                 fprintf(stderr, "warning line %d\n", __LINE__);
3920
3921         task_stop(ctx.info);
3922
3923         return err;
3924 }
3925
3926 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3927 {
3928         struct rb_node *n;
3929         struct extent_backref *back;
3930         struct tree_backref *tback;
3931         struct data_backref *dback;
3932         u64 found = 0;
3933         int err = 0;
3934
3935         for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
3936                 back = rb_node_to_extent_backref(n);
3937                 if (!back->found_extent_tree) {
3938                         err = 1;
3939                         if (!print_errs)
3940                                 goto out;
3941                         if (back->is_data) {
3942                                 dback = to_data_backref(back);
3943                                 fprintf(stderr, "Backref %llu %s %llu"
3944                                         " owner %llu offset %llu num_refs %lu"
3945                                         " not found in extent tree\n",
3946                                         (unsigned long long)rec->start,
3947                                         back->full_backref ?
3948                                         "parent" : "root",
3949                                         back->full_backref ?
3950                                         (unsigned long long)dback->parent:
3951                                         (unsigned long long)dback->root,
3952                                         (unsigned long long)dback->owner,
3953                                         (unsigned long long)dback->offset,
3954                                         (unsigned long)dback->num_refs);
3955                         } else {
3956                                 tback = to_tree_backref(back);
3957                                 fprintf(stderr, "Backref %llu parent %llu"
3958                                         " root %llu not found in extent tree\n",
3959                                         (unsigned long long)rec->start,
3960                                         (unsigned long long)tback->parent,
3961                                         (unsigned long long)tback->root);
3962                         }
3963                 }
3964                 if (!back->is_data && !back->found_ref) {
3965                         err = 1;
3966                         if (!print_errs)
3967                                 goto out;
3968                         tback = to_tree_backref(back);
3969                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3970                                 (unsigned long long)rec->start,
3971                                 back->full_backref ? "parent" : "root",
3972                                 back->full_backref ?
3973                                 (unsigned long long)tback->parent :
3974                                 (unsigned long long)tback->root, back);
3975                 }
3976                 if (back->is_data) {
3977                         dback = to_data_backref(back);
3978                         if (dback->found_ref != dback->num_refs) {
3979                                 err = 1;
3980                                 if (!print_errs)
3981                                         goto out;
3982                                 fprintf(stderr, "Incorrect local backref count"
3983                                         " on %llu %s %llu owner %llu"
3984                                         " offset %llu found %u wanted %u back %p\n",
3985                                         (unsigned long long)rec->start,
3986                                         back->full_backref ?
3987                                         "parent" : "root",
3988                                         back->full_backref ?
3989                                         (unsigned long long)dback->parent:
3990                                         (unsigned long long)dback->root,
3991                                         (unsigned long long)dback->owner,
3992                                         (unsigned long long)dback->offset,
3993                                         dback->found_ref, dback->num_refs, back);
3994                         }
3995                         if (dback->disk_bytenr != rec->start) {
3996                                 err = 1;
3997                                 if (!print_errs)
3998                                         goto out;
3999                                 fprintf(stderr, "Backref disk bytenr does not"
4000                                         " match extent record, bytenr=%llu, "
4001                                         "ref bytenr=%llu\n",
4002                                         (unsigned long long)rec->start,
4003                                         (unsigned long long)dback->disk_bytenr);
4004                         }
4005
4006                         if (dback->bytes != rec->nr) {
4007                                 err = 1;
4008                                 if (!print_errs)
4009                                         goto out;
4010                                 fprintf(stderr, "Backref bytes do not match "
4011                                         "extent backref, bytenr=%llu, ref "
4012                                         "bytes=%llu, backref bytes=%llu\n",
4013                                         (unsigned long long)rec->start,
4014                                         (unsigned long long)rec->nr,
4015                                         (unsigned long long)dback->bytes);
4016                         }
4017                 }
4018                 if (!back->is_data) {
4019                         found += 1;
4020                 } else {
4021                         dback = to_data_backref(back);
4022                         found += dback->found_ref;
4023                 }
4024         }
4025         if (found != rec->refs) {
4026                 err = 1;
4027                 if (!print_errs)
4028                         goto out;
4029                 fprintf(stderr, "Incorrect global backref count "
4030                         "on %llu found %llu wanted %llu\n",
4031                         (unsigned long long)rec->start,
4032                         (unsigned long long)found,
4033                         (unsigned long long)rec->refs);
4034         }
4035 out:
4036         return err;
4037 }
4038
4039 static void __free_one_backref(struct rb_node *node)
4040 {
4041         struct extent_backref *back = rb_node_to_extent_backref(node);
4042
4043         free(back);
4044 }
4045
4046 static void free_all_extent_backrefs(struct extent_record *rec)
4047 {
4048         rb_free_nodes(&rec->backref_tree, __free_one_backref);
4049 }
4050
4051 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4052                                      struct cache_tree *extent_cache)
4053 {
4054         struct cache_extent *cache;
4055         struct extent_record *rec;
4056
4057         while (1) {
4058                 cache = first_cache_extent(extent_cache);
4059                 if (!cache)
4060                         break;
4061                 rec = container_of(cache, struct extent_record, cache);
4062                 remove_cache_extent(extent_cache, cache);
4063                 free_all_extent_backrefs(rec);
4064                 free(rec);
4065         }
4066 }
4067
4068 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4069                                  struct extent_record *rec)
4070 {
4071         if (rec->content_checked && rec->owner_ref_checked &&
4072             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4073             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4074             !rec->bad_full_backref && !rec->crossing_stripes &&
4075             !rec->wrong_chunk_type) {
4076                 remove_cache_extent(extent_cache, &rec->cache);
4077                 free_all_extent_backrefs(rec);
4078                 list_del_init(&rec->list);
4079                 free(rec);
4080         }
4081         return 0;
4082 }
4083
4084 static int check_owner_ref(struct btrfs_root *root,
4085                             struct extent_record *rec,
4086                             struct extent_buffer *buf)
4087 {
4088         struct extent_backref *node, *tmp;
4089         struct tree_backref *back;
4090         struct btrfs_root *ref_root;
4091         struct btrfs_key key;
4092         struct btrfs_path path;
4093         struct extent_buffer *parent;
4094         int level;
4095         int found = 0;
4096         int ret;
4097
4098         rbtree_postorder_for_each_entry_safe(node, tmp,
4099                                              &rec->backref_tree, node) {
4100                 if (node->is_data)
4101                         continue;
4102                 if (!node->found_ref)
4103                         continue;
4104                 if (node->full_backref)
4105                         continue;
4106                 back = to_tree_backref(node);
4107                 if (btrfs_header_owner(buf) == back->root)
4108                         return 0;
4109         }
4110         BUG_ON(rec->is_root);
4111
4112         /* try to find the block by search corresponding fs tree */
4113         key.objectid = btrfs_header_owner(buf);
4114         key.type = BTRFS_ROOT_ITEM_KEY;
4115         key.offset = (u64)-1;
4116
4117         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4118         if (IS_ERR(ref_root))
4119                 return 1;
4120
4121         level = btrfs_header_level(buf);
4122         if (level == 0)
4123                 btrfs_item_key_to_cpu(buf, &key, 0);
4124         else
4125                 btrfs_node_key_to_cpu(buf, &key, 0);
4126
4127         btrfs_init_path(&path);
4128         path.lowest_level = level + 1;
4129         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4130         if (ret < 0)
4131                 return 0;
4132
4133         parent = path.nodes[level + 1];
4134         if (parent && buf->start == btrfs_node_blockptr(parent,
4135                                                         path.slots[level + 1]))
4136                 found = 1;
4137
4138         btrfs_release_path(&path);
4139         return found ? 0 : 1;
4140 }
4141
4142 static int is_extent_tree_record(struct extent_record *rec)
4143 {
4144         struct extent_backref *ref, *tmp;
4145         struct tree_backref *back;
4146         int is_extent = 0;
4147
4148         rbtree_postorder_for_each_entry_safe(ref, tmp,
4149                                              &rec->backref_tree, node) {
4150                 if (ref->is_data)
4151                         return 0;
4152                 back = to_tree_backref(ref);
4153                 if (ref->full_backref)
4154                         return 0;
4155                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4156                         is_extent = 1;
4157         }
4158         return is_extent;
4159 }
4160
4161
4162 static int record_bad_block_io(struct btrfs_fs_info *info,
4163                                struct cache_tree *extent_cache,
4164                                u64 start, u64 len)
4165 {
4166         struct extent_record *rec;
4167         struct cache_extent *cache;
4168         struct btrfs_key key;
4169
4170         cache = lookup_cache_extent(extent_cache, start, len);
4171         if (!cache)
4172                 return 0;
4173
4174         rec = container_of(cache, struct extent_record, cache);
4175         if (!is_extent_tree_record(rec))
4176                 return 0;
4177
4178         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4179         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4180 }
4181
4182 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4183                        struct extent_buffer *buf, int slot)
4184 {
4185         if (btrfs_header_level(buf)) {
4186                 struct btrfs_key_ptr ptr1, ptr2;
4187
4188                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4189                                    sizeof(struct btrfs_key_ptr));
4190                 read_extent_buffer(buf, &ptr2,
4191                                    btrfs_node_key_ptr_offset(slot + 1),
4192                                    sizeof(struct btrfs_key_ptr));
4193                 write_extent_buffer(buf, &ptr1,
4194                                     btrfs_node_key_ptr_offset(slot + 1),
4195                                     sizeof(struct btrfs_key_ptr));
4196                 write_extent_buffer(buf, &ptr2,
4197                                     btrfs_node_key_ptr_offset(slot),
4198                                     sizeof(struct btrfs_key_ptr));
4199                 if (slot == 0) {
4200                         struct btrfs_disk_key key;
4201                         btrfs_node_key(buf, &key, 0);
4202                         btrfs_fixup_low_keys(root, path, &key,
4203                                              btrfs_header_level(buf) + 1);
4204                 }
4205         } else {
4206                 struct btrfs_item *item1, *item2;
4207                 struct btrfs_key k1, k2;
4208                 char *item1_data, *item2_data;
4209                 u32 item1_offset, item2_offset, item1_size, item2_size;
4210
4211                 item1 = btrfs_item_nr(slot);
4212                 item2 = btrfs_item_nr(slot + 1);
4213                 btrfs_item_key_to_cpu(buf, &k1, slot);
4214                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4215                 item1_offset = btrfs_item_offset(buf, item1);
4216                 item2_offset = btrfs_item_offset(buf, item2);
4217                 item1_size = btrfs_item_size(buf, item1);
4218                 item2_size = btrfs_item_size(buf, item2);
4219
4220                 item1_data = malloc(item1_size);
4221                 if (!item1_data)
4222                         return -ENOMEM;
4223                 item2_data = malloc(item2_size);
4224                 if (!item2_data) {
4225                         free(item1_data);
4226                         return -ENOMEM;
4227                 }
4228
4229                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4230                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4231
4232                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4233                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4234                 free(item1_data);
4235                 free(item2_data);
4236
4237                 btrfs_set_item_offset(buf, item1, item2_offset);
4238                 btrfs_set_item_offset(buf, item2, item1_offset);
4239                 btrfs_set_item_size(buf, item1, item2_size);
4240                 btrfs_set_item_size(buf, item2, item1_size);
4241
4242                 path->slots[0] = slot;
4243                 btrfs_set_item_key_unsafe(root, path, &k2);
4244                 path->slots[0] = slot + 1;
4245                 btrfs_set_item_key_unsafe(root, path, &k1);
4246         }
4247         return 0;
4248 }
4249
4250 static int fix_key_order(struct btrfs_trans_handle *trans,
4251                          struct btrfs_root *root,
4252                          struct btrfs_path *path)
4253 {
4254         struct extent_buffer *buf;
4255         struct btrfs_key k1, k2;
4256         int i;
4257         int level = path->lowest_level;
4258         int ret = -EIO;
4259
4260         buf = path->nodes[level];
4261         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4262                 if (level) {
4263                         btrfs_node_key_to_cpu(buf, &k1, i);
4264                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4265                 } else {
4266                         btrfs_item_key_to_cpu(buf, &k1, i);
4267                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4268                 }
4269                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4270                         continue;
4271                 ret = swap_values(root, path, buf, i);
4272                 if (ret)
4273                         break;
4274                 btrfs_mark_buffer_dirty(buf);
4275                 i = 0;
4276         }
4277         return ret;
4278 }
4279
4280 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4281                              struct btrfs_root *root,
4282                              struct btrfs_path *path,
4283                              struct extent_buffer *buf, int slot)
4284 {
4285         struct btrfs_key key;
4286         int nritems = btrfs_header_nritems(buf);
4287
4288         btrfs_item_key_to_cpu(buf, &key, slot);
4289
4290         /* These are all the keys we can deal with missing. */
4291         if (key.type != BTRFS_DIR_INDEX_KEY &&
4292             key.type != BTRFS_EXTENT_ITEM_KEY &&
4293             key.type != BTRFS_METADATA_ITEM_KEY &&
4294             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4295             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4296                 return -1;
4297
4298         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4299                (unsigned long long)key.objectid, key.type,
4300                (unsigned long long)key.offset, slot, buf->start);
4301         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4302                               btrfs_item_nr_offset(slot + 1),
4303                               sizeof(struct btrfs_item) *
4304                               (nritems - slot - 1));
4305         btrfs_set_header_nritems(buf, nritems - 1);
4306         if (slot == 0) {
4307                 struct btrfs_disk_key disk_key;
4308
4309                 btrfs_item_key(buf, &disk_key, 0);
4310                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4311         }
4312         btrfs_mark_buffer_dirty(buf);
4313         return 0;
4314 }
4315
4316 static int fix_item_offset(struct btrfs_trans_handle *trans,
4317                            struct btrfs_root *root,
4318                            struct btrfs_path *path)
4319 {
4320         struct extent_buffer *buf;
4321         int i;
4322         int ret = 0;
4323
4324         /* We should only get this for leaves */
4325         BUG_ON(path->lowest_level);
4326         buf = path->nodes[0];
4327 again:
4328         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4329                 unsigned int shift = 0, offset;
4330
4331                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4332                     BTRFS_LEAF_DATA_SIZE(root)) {
4333                         if (btrfs_item_end_nr(buf, i) >
4334                             BTRFS_LEAF_DATA_SIZE(root)) {
4335                                 ret = delete_bogus_item(trans, root, path,
4336                                                         buf, i);
4337                                 if (!ret)
4338                                         goto again;
4339                                 fprintf(stderr, "item is off the end of the "
4340                                         "leaf, can't fix\n");
4341                                 ret = -EIO;
4342                                 break;
4343                         }
4344                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4345                                 btrfs_item_end_nr(buf, i);
4346                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4347                            btrfs_item_offset_nr(buf, i - 1)) {
4348                         if (btrfs_item_end_nr(buf, i) >
4349                             btrfs_item_offset_nr(buf, i - 1)) {
4350                                 ret = delete_bogus_item(trans, root, path,
4351                                                         buf, i);
4352                                 if (!ret)
4353                                         goto again;
4354                                 fprintf(stderr, "items overlap, can't fix\n");
4355                                 ret = -EIO;
4356                                 break;
4357                         }
4358                         shift = btrfs_item_offset_nr(buf, i - 1) -
4359                                 btrfs_item_end_nr(buf, i);
4360                 }
4361                 if (!shift)
4362                         continue;
4363
4364                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4365                        i, shift, (unsigned long long)buf->start);
4366                 offset = btrfs_item_offset_nr(buf, i);
4367                 memmove_extent_buffer(buf,
4368                                       btrfs_leaf_data(buf) + offset + shift,
4369                                       btrfs_leaf_data(buf) + offset,
4370                                       btrfs_item_size_nr(buf, i));
4371                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4372                                       offset + shift);
4373                 btrfs_mark_buffer_dirty(buf);
4374         }
4375
4376         /*
4377          * We may have moved things, in which case we want to exit so we don't
4378          * write those changes out.  Once we have proper abort functionality in
4379          * progs this can be changed to something nicer.
4380          */
4381         BUG_ON(ret);
4382         return ret;
4383 }
4384
4385 /*
4386  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4387  * then just return -EIO.
4388  */
4389 static int try_to_fix_bad_block(struct btrfs_root *root,
4390                                 struct extent_buffer *buf,
4391                                 enum btrfs_tree_block_status status)
4392 {
4393         struct btrfs_trans_handle *trans;
4394         struct ulist *roots;
4395         struct ulist_node *node;
4396         struct btrfs_root *search_root;
4397         struct btrfs_path *path;
4398         struct ulist_iterator iter;
4399         struct btrfs_key root_key, key;
4400         int ret;
4401
4402         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4403             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4404                 return -EIO;
4405
4406         path = btrfs_alloc_path();
4407         if (!path)
4408                 return -EIO;
4409
4410         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4411                                    0, &roots);
4412         if (ret) {
4413                 btrfs_free_path(path);
4414                 return -EIO;
4415         }
4416
4417         ULIST_ITER_INIT(&iter);
4418         while ((node = ulist_next(roots, &iter))) {
4419                 root_key.objectid = node->val;
4420                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4421                 root_key.offset = (u64)-1;
4422
4423                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4424                 if (IS_ERR(root)) {
4425                         ret = -EIO;
4426                         break;
4427                 }
4428
4429
4430                 trans = btrfs_start_transaction(search_root, 0);
4431                 if (IS_ERR(trans)) {
4432                         ret = PTR_ERR(trans);
4433                         break;
4434                 }
4435
4436                 path->lowest_level = btrfs_header_level(buf);
4437                 path->skip_check_block = 1;
4438                 if (path->lowest_level)
4439                         btrfs_node_key_to_cpu(buf, &key, 0);
4440                 else
4441                         btrfs_item_key_to_cpu(buf, &key, 0);
4442                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4443                 if (ret) {
4444                         ret = -EIO;
4445                         btrfs_commit_transaction(trans, search_root);
4446                         break;
4447                 }
4448                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4449                         ret = fix_key_order(trans, search_root, path);
4450                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4451                         ret = fix_item_offset(trans, search_root, path);
4452                 if (ret) {
4453                         btrfs_commit_transaction(trans, search_root);
4454                         break;
4455                 }
4456                 btrfs_release_path(path);
4457                 btrfs_commit_transaction(trans, search_root);
4458         }
4459         ulist_free(roots);
4460         btrfs_free_path(path);
4461         return ret;
4462 }
4463
4464 static int check_block(struct btrfs_root *root,
4465                        struct cache_tree *extent_cache,
4466                        struct extent_buffer *buf, u64 flags)
4467 {
4468         struct extent_record *rec;
4469         struct cache_extent *cache;
4470         struct btrfs_key key;
4471         enum btrfs_tree_block_status status;
4472         int ret = 0;
4473         int level;
4474
4475         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4476         if (!cache)
4477                 return 1;
4478         rec = container_of(cache, struct extent_record, cache);
4479         rec->generation = btrfs_header_generation(buf);
4480
4481         level = btrfs_header_level(buf);
4482         if (btrfs_header_nritems(buf) > 0) {
4483
4484                 if (level == 0)
4485                         btrfs_item_key_to_cpu(buf, &key, 0);
4486                 else
4487                         btrfs_node_key_to_cpu(buf, &key, 0);
4488
4489                 rec->info_objectid = key.objectid;
4490         }
4491         rec->info_level = level;
4492
4493         if (btrfs_is_leaf(buf))
4494                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4495         else
4496                 status = btrfs_check_node(root, &rec->parent_key, buf);
4497
4498         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4499                 if (repair)
4500                         status = try_to_fix_bad_block(root, buf, status);
4501                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4502                         ret = -EIO;
4503                         fprintf(stderr, "bad block %llu\n",
4504                                 (unsigned long long)buf->start);
4505                 } else {
4506                         /*
4507                          * Signal to callers we need to start the scan over
4508                          * again since we'll have cowed blocks.
4509                          */
4510                         ret = -EAGAIN;
4511                 }
4512         } else {
4513                 rec->content_checked = 1;
4514                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4515                         rec->owner_ref_checked = 1;
4516                 else {
4517                         ret = check_owner_ref(root, rec, buf);
4518                         if (!ret)
4519                                 rec->owner_ref_checked = 1;
4520                 }
4521         }
4522         if (!ret)
4523                 maybe_free_extent_rec(extent_cache, rec);
4524         return ret;
4525 }
4526
4527
4528 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4529                                                 u64 parent, u64 root)
4530 {
4531         struct rb_node *node;
4532         struct tree_backref *back = NULL;
4533         struct tree_backref match = {
4534                 .node = {
4535                         .is_data = 0,
4536                 },
4537         };
4538
4539         if (parent) {
4540                 match.parent = parent;
4541                 match.node.full_backref = 1;
4542         } else {
4543                 match.root = root;
4544         }
4545
4546         node = rb_search(&rec->backref_tree, &match.node.node,
4547                          (rb_compare_keys)compare_extent_backref, NULL);
4548         if (node)
4549                 back = to_tree_backref(rb_node_to_extent_backref(node));
4550
4551         return back;
4552 }
4553
4554 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4555                                                 u64 parent, u64 root)
4556 {
4557         struct tree_backref *ref = malloc(sizeof(*ref));
4558
4559         if (!ref)
4560                 return NULL;
4561         memset(&ref->node, 0, sizeof(ref->node));
4562         if (parent > 0) {
4563                 ref->parent = parent;
4564                 ref->node.full_backref = 1;
4565         } else {
4566                 ref->root = root;
4567                 ref->node.full_backref = 0;
4568         }
4569         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4570
4571         return ref;
4572 }
4573
4574 static struct data_backref *find_data_backref(struct extent_record *rec,
4575                                                 u64 parent, u64 root,
4576                                                 u64 owner, u64 offset,
4577                                                 int found_ref,
4578                                                 u64 disk_bytenr, u64 bytes)
4579 {
4580         struct rb_node *node;
4581         struct data_backref *back = NULL;
4582         struct data_backref match = {
4583                 .node = {
4584                         .is_data = 1,
4585                 },
4586                 .owner = owner,
4587                 .offset = offset,
4588                 .bytes = bytes,
4589                 .found_ref = found_ref,
4590                 .disk_bytenr = disk_bytenr,
4591         };
4592
4593         if (parent) {
4594                 match.parent = parent;
4595                 match.node.full_backref = 1;
4596         } else {
4597                 match.root = root;
4598         }
4599
4600         node = rb_search(&rec->backref_tree, &match.node.node,
4601                          (rb_compare_keys)compare_extent_backref, NULL);
4602         if (node)
4603                 back = to_data_backref(rb_node_to_extent_backref(node));
4604
4605         return back;
4606 }
4607
4608 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4609                                                 u64 parent, u64 root,
4610                                                 u64 owner, u64 offset,
4611                                                 u64 max_size)
4612 {
4613         struct data_backref *ref = malloc(sizeof(*ref));
4614
4615         if (!ref)
4616                 return NULL;
4617         memset(&ref->node, 0, sizeof(ref->node));
4618         ref->node.is_data = 1;
4619
4620         if (parent > 0) {
4621                 ref->parent = parent;
4622                 ref->owner = 0;
4623                 ref->offset = 0;
4624                 ref->node.full_backref = 1;
4625         } else {
4626                 ref->root = root;
4627                 ref->owner = owner;
4628                 ref->offset = offset;
4629                 ref->node.full_backref = 0;
4630         }
4631         ref->bytes = max_size;
4632         ref->found_ref = 0;
4633         ref->num_refs = 0;
4634         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4635         if (max_size > rec->max_size)
4636                 rec->max_size = max_size;
4637         return ref;
4638 }
4639
4640 /* Check if the type of extent matches with its chunk */
4641 static void check_extent_type(struct extent_record *rec)
4642 {
4643         struct btrfs_block_group_cache *bg_cache;
4644
4645         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4646         if (!bg_cache)
4647                 return;
4648
4649         /* data extent, check chunk directly*/
4650         if (!rec->metadata) {
4651                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4652                         rec->wrong_chunk_type = 1;
4653                 return;
4654         }
4655
4656         /* metadata extent, check the obvious case first */
4657         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4658                                  BTRFS_BLOCK_GROUP_METADATA))) {
4659                 rec->wrong_chunk_type = 1;
4660                 return;
4661         }
4662
4663         /*
4664          * Check SYSTEM extent, as it's also marked as metadata, we can only
4665          * make sure it's a SYSTEM extent by its backref
4666          */
4667         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4668                 struct extent_backref *node;
4669                 struct tree_backref *tback;
4670                 u64 bg_type;
4671
4672                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4673                 if (node->is_data) {
4674                         /* tree block shouldn't have data backref */
4675                         rec->wrong_chunk_type = 1;
4676                         return;
4677                 }
4678                 tback = container_of(node, struct tree_backref, node);
4679
4680                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4681                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4682                 else
4683                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4684                 if (!(bg_cache->flags & bg_type))
4685                         rec->wrong_chunk_type = 1;
4686         }
4687 }
4688
4689 /*
4690  * Allocate a new extent record, fill default values from @tmpl and insert int
4691  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4692  * the cache, otherwise it fails.
4693  */
4694 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4695                 struct extent_record *tmpl)
4696 {
4697         struct extent_record *rec;
4698         int ret = 0;
4699
4700         rec = malloc(sizeof(*rec));
4701         if (!rec)
4702                 return -ENOMEM;
4703         rec->start = tmpl->start;
4704         rec->max_size = tmpl->max_size;
4705         rec->nr = max(tmpl->nr, tmpl->max_size);
4706         rec->found_rec = tmpl->found_rec;
4707         rec->content_checked = tmpl->content_checked;
4708         rec->owner_ref_checked = tmpl->owner_ref_checked;
4709         rec->num_duplicates = 0;
4710         rec->metadata = tmpl->metadata;
4711         rec->flag_block_full_backref = FLAG_UNSET;
4712         rec->bad_full_backref = 0;
4713         rec->crossing_stripes = 0;
4714         rec->wrong_chunk_type = 0;
4715         rec->is_root = tmpl->is_root;
4716         rec->refs = tmpl->refs;
4717         rec->extent_item_refs = tmpl->extent_item_refs;
4718         rec->parent_generation = tmpl->parent_generation;
4719         INIT_LIST_HEAD(&rec->backrefs);
4720         INIT_LIST_HEAD(&rec->dups);
4721         INIT_LIST_HEAD(&rec->list);
4722         rec->backref_tree = RB_ROOT;
4723         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4724         rec->cache.start = tmpl->start;
4725         rec->cache.size = tmpl->nr;
4726         ret = insert_cache_extent(extent_cache, &rec->cache);
4727         BUG_ON(ret);
4728         bytes_used += rec->nr;
4729
4730         if (tmpl->metadata)
4731                 rec->crossing_stripes = check_crossing_stripes(rec->start,
4732                                 global_info->tree_root->nodesize);
4733         check_extent_type(rec);
4734         return ret;
4735 }
4736
4737 /*
4738  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4739  * some are hints:
4740  * - refs              - if found, increase refs
4741  * - is_root           - if found, set
4742  * - content_checked   - if found, set
4743  * - owner_ref_checked - if found, set
4744  *
4745  * If not found, create a new one, initialize and insert.
4746  */
4747 static int add_extent_rec(struct cache_tree *extent_cache,
4748                 struct extent_record *tmpl)
4749 {
4750         struct extent_record *rec;
4751         struct cache_extent *cache;
4752         int ret = 0;
4753         int dup = 0;
4754
4755         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4756         if (cache) {
4757                 rec = container_of(cache, struct extent_record, cache);
4758                 if (tmpl->refs)
4759                         rec->refs++;
4760                 if (rec->nr == 1)
4761                         rec->nr = max(tmpl->nr, tmpl->max_size);
4762
4763                 /*
4764                  * We need to make sure to reset nr to whatever the extent
4765                  * record says was the real size, this way we can compare it to
4766                  * the backrefs.
4767                  */
4768                 if (tmpl->found_rec) {
4769                         if (tmpl->start != rec->start || rec->found_rec) {
4770                                 struct extent_record *tmp;
4771
4772                                 dup = 1;
4773                                 if (list_empty(&rec->list))
4774                                         list_add_tail(&rec->list,
4775                                                       &duplicate_extents);
4776
4777                                 /*
4778                                  * We have to do this song and dance in case we
4779                                  * find an extent record that falls inside of
4780                                  * our current extent record but does not have
4781                                  * the same objectid.
4782                                  */
4783                                 tmp = malloc(sizeof(*tmp));
4784                                 if (!tmp)
4785                                         return -ENOMEM;
4786                                 tmp->start = tmpl->start;
4787                                 tmp->max_size = tmpl->max_size;
4788                                 tmp->nr = tmpl->nr;
4789                                 tmp->found_rec = 1;
4790                                 tmp->metadata = tmpl->metadata;
4791                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4792                                 INIT_LIST_HEAD(&tmp->list);
4793                                 list_add_tail(&tmp->list, &rec->dups);
4794                                 rec->num_duplicates++;
4795                         } else {
4796                                 rec->nr = tmpl->nr;
4797                                 rec->found_rec = 1;
4798                         }
4799                 }
4800
4801                 if (tmpl->extent_item_refs && !dup) {
4802                         if (rec->extent_item_refs) {
4803                                 fprintf(stderr, "block %llu rec "
4804                                         "extent_item_refs %llu, passed %llu\n",
4805                                         (unsigned long long)tmpl->start,
4806                                         (unsigned long long)
4807                                                         rec->extent_item_refs,
4808                                         (unsigned long long)tmpl->extent_item_refs);
4809                         }
4810                         rec->extent_item_refs = tmpl->extent_item_refs;
4811                 }
4812                 if (tmpl->is_root)
4813                         rec->is_root = 1;
4814                 if (tmpl->content_checked)
4815                         rec->content_checked = 1;
4816                 if (tmpl->owner_ref_checked)
4817                         rec->owner_ref_checked = 1;
4818                 memcpy(&rec->parent_key, &tmpl->parent_key,
4819                                 sizeof(tmpl->parent_key));
4820                 if (tmpl->parent_generation)
4821                         rec->parent_generation = tmpl->parent_generation;
4822                 if (rec->max_size < tmpl->max_size)
4823                         rec->max_size = tmpl->max_size;
4824
4825                 /*
4826                  * A metadata extent can't cross stripe_len boundary, otherwise
4827                  * kernel scrub won't be able to handle it.
4828                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4829                  * it.
4830                  */
4831                 if (tmpl->metadata)
4832                         rec->crossing_stripes = check_crossing_stripes(
4833                                 rec->start, global_info->tree_root->nodesize);
4834                 check_extent_type(rec);
4835                 maybe_free_extent_rec(extent_cache, rec);
4836                 return ret;
4837         }
4838
4839         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4840
4841         return ret;
4842 }
4843
4844 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4845                             u64 parent, u64 root, int found_ref)
4846 {
4847         struct extent_record *rec;
4848         struct tree_backref *back;
4849         struct cache_extent *cache;
4850
4851         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4852         if (!cache) {
4853                 struct extent_record tmpl;
4854
4855                 memset(&tmpl, 0, sizeof(tmpl));
4856                 tmpl.start = bytenr;
4857                 tmpl.nr = 1;
4858                 tmpl.metadata = 1;
4859
4860                 add_extent_rec_nolookup(extent_cache, &tmpl);
4861
4862                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4863                 if (!cache)
4864                         abort();
4865         }
4866
4867         rec = container_of(cache, struct extent_record, cache);
4868         if (rec->start != bytenr) {
4869                 abort();
4870         }
4871
4872         back = find_tree_backref(rec, parent, root);
4873         if (!back) {
4874                 back = alloc_tree_backref(rec, parent, root);
4875                 BUG_ON(!back);
4876         }
4877
4878         if (found_ref) {
4879                 if (back->node.found_ref) {
4880                         fprintf(stderr, "Extent back ref already exists "
4881                                 "for %llu parent %llu root %llu \n",
4882                                 (unsigned long long)bytenr,
4883                                 (unsigned long long)parent,
4884                                 (unsigned long long)root);
4885                 }
4886                 back->node.found_ref = 1;
4887         } else {
4888                 if (back->node.found_extent_tree) {
4889                         fprintf(stderr, "Extent back ref already exists "
4890                                 "for %llu parent %llu root %llu \n",
4891                                 (unsigned long long)bytenr,
4892                                 (unsigned long long)parent,
4893                                 (unsigned long long)root);
4894                 }
4895                 back->node.found_extent_tree = 1;
4896         }
4897         check_extent_type(rec);
4898         maybe_free_extent_rec(extent_cache, rec);
4899         return 0;
4900 }
4901
4902 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4903                             u64 parent, u64 root, u64 owner, u64 offset,
4904                             u32 num_refs, int found_ref, u64 max_size)
4905 {
4906         struct extent_record *rec;
4907         struct data_backref *back;
4908         struct cache_extent *cache;
4909
4910         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4911         if (!cache) {
4912                 struct extent_record tmpl;
4913
4914                 memset(&tmpl, 0, sizeof(tmpl));
4915                 tmpl.start = bytenr;
4916                 tmpl.nr = 1;
4917                 tmpl.max_size = max_size;
4918
4919                 add_extent_rec_nolookup(extent_cache, &tmpl);
4920
4921                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4922                 if (!cache)
4923                         abort();
4924         }
4925
4926         rec = container_of(cache, struct extent_record, cache);
4927         if (rec->max_size < max_size)
4928                 rec->max_size = max_size;
4929
4930         /*
4931          * If found_ref is set then max_size is the real size and must match the
4932          * existing refs.  So if we have already found a ref then we need to
4933          * make sure that this ref matches the existing one, otherwise we need
4934          * to add a new backref so we can notice that the backrefs don't match
4935          * and we need to figure out who is telling the truth.  This is to
4936          * account for that awful fsync bug I introduced where we'd end up with
4937          * a btrfs_file_extent_item that would have its length include multiple
4938          * prealloc extents or point inside of a prealloc extent.
4939          */
4940         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4941                                  bytenr, max_size);
4942         if (!back) {
4943                 back = alloc_data_backref(rec, parent, root, owner, offset,
4944                                           max_size);
4945                 BUG_ON(!back);
4946         }
4947
4948         if (found_ref) {
4949                 BUG_ON(num_refs != 1);
4950                 if (back->node.found_ref)
4951                         BUG_ON(back->bytes != max_size);
4952                 back->node.found_ref = 1;
4953                 back->found_ref += 1;
4954                 back->bytes = max_size;
4955                 back->disk_bytenr = bytenr;
4956                 rec->refs += 1;
4957                 rec->content_checked = 1;
4958                 rec->owner_ref_checked = 1;
4959         } else {
4960                 if (back->node.found_extent_tree) {
4961                         fprintf(stderr, "Extent back ref already exists "
4962                                 "for %llu parent %llu root %llu "
4963                                 "owner %llu offset %llu num_refs %lu\n",
4964                                 (unsigned long long)bytenr,
4965                                 (unsigned long long)parent,
4966                                 (unsigned long long)root,
4967                                 (unsigned long long)owner,
4968                                 (unsigned long long)offset,
4969                                 (unsigned long)num_refs);
4970                 }
4971                 back->num_refs = num_refs;
4972                 back->node.found_extent_tree = 1;
4973         }
4974         maybe_free_extent_rec(extent_cache, rec);
4975         return 0;
4976 }
4977
4978 static int add_pending(struct cache_tree *pending,
4979                        struct cache_tree *seen, u64 bytenr, u32 size)
4980 {
4981         int ret;
4982         ret = add_cache_extent(seen, bytenr, size);
4983         if (ret)
4984                 return ret;
4985         add_cache_extent(pending, bytenr, size);
4986         return 0;
4987 }
4988
4989 static int pick_next_pending(struct cache_tree *pending,
4990                         struct cache_tree *reada,
4991                         struct cache_tree *nodes,
4992                         u64 last, struct block_info *bits, int bits_nr,
4993                         int *reada_bits)
4994 {
4995         unsigned long node_start = last;
4996         struct cache_extent *cache;
4997         int ret;
4998
4999         cache = search_cache_extent(reada, 0);
5000         if (cache) {
5001                 bits[0].start = cache->start;
5002                 bits[0].size = cache->size;
5003                 *reada_bits = 1;
5004                 return 1;
5005         }
5006         *reada_bits = 0;
5007         if (node_start > 32768)
5008                 node_start -= 32768;
5009
5010         cache = search_cache_extent(nodes, node_start);
5011         if (!cache)
5012                 cache = search_cache_extent(nodes, 0);
5013
5014         if (!cache) {
5015                  cache = search_cache_extent(pending, 0);
5016                  if (!cache)
5017                          return 0;
5018                  ret = 0;
5019                  do {
5020                          bits[ret].start = cache->start;
5021                          bits[ret].size = cache->size;
5022                          cache = next_cache_extent(cache);
5023                          ret++;
5024                  } while (cache && ret < bits_nr);
5025                  return ret;
5026         }
5027
5028         ret = 0;
5029         do {
5030                 bits[ret].start = cache->start;
5031                 bits[ret].size = cache->size;
5032                 cache = next_cache_extent(cache);
5033                 ret++;
5034         } while (cache && ret < bits_nr);
5035
5036         if (bits_nr - ret > 8) {
5037                 u64 lookup = bits[0].start + bits[0].size;
5038                 struct cache_extent *next;
5039                 next = search_cache_extent(pending, lookup);
5040                 while(next) {
5041                         if (next->start - lookup > 32768)
5042                                 break;
5043                         bits[ret].start = next->start;
5044                         bits[ret].size = next->size;
5045                         lookup = next->start + next->size;
5046                         ret++;
5047                         if (ret == bits_nr)
5048                                 break;
5049                         next = next_cache_extent(next);
5050                         if (!next)
5051                                 break;
5052                 }
5053         }
5054         return ret;
5055 }
5056
5057 static void free_chunk_record(struct cache_extent *cache)
5058 {
5059         struct chunk_record *rec;
5060
5061         rec = container_of(cache, struct chunk_record, cache);
5062         list_del_init(&rec->list);
5063         list_del_init(&rec->dextents);
5064         free(rec);
5065 }
5066
5067 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5068 {
5069         cache_tree_free_extents(chunk_cache, free_chunk_record);
5070 }
5071
5072 static void free_device_record(struct rb_node *node)
5073 {
5074         struct device_record *rec;
5075
5076         rec = container_of(node, struct device_record, node);
5077         free(rec);
5078 }
5079
5080 FREE_RB_BASED_TREE(device_cache, free_device_record);
5081
5082 int insert_block_group_record(struct block_group_tree *tree,
5083                               struct block_group_record *bg_rec)
5084 {
5085         int ret;
5086
5087         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5088         if (ret)
5089                 return ret;
5090
5091         list_add_tail(&bg_rec->list, &tree->block_groups);
5092         return 0;
5093 }
5094
5095 static void free_block_group_record(struct cache_extent *cache)
5096 {
5097         struct block_group_record *rec;
5098
5099         rec = container_of(cache, struct block_group_record, cache);
5100         list_del_init(&rec->list);
5101         free(rec);
5102 }
5103
5104 void free_block_group_tree(struct block_group_tree *tree)
5105 {
5106         cache_tree_free_extents(&tree->tree, free_block_group_record);
5107 }
5108
5109 int insert_device_extent_record(struct device_extent_tree *tree,
5110                                 struct device_extent_record *de_rec)
5111 {
5112         int ret;
5113
5114         /*
5115          * Device extent is a bit different from the other extents, because
5116          * the extents which belong to the different devices may have the
5117          * same start and size, so we need use the special extent cache
5118          * search/insert functions.
5119          */
5120         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5121         if (ret)
5122                 return ret;
5123
5124         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5125         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5126         return 0;
5127 }
5128
5129 static void free_device_extent_record(struct cache_extent *cache)
5130 {
5131         struct device_extent_record *rec;
5132
5133         rec = container_of(cache, struct device_extent_record, cache);
5134         if (!list_empty(&rec->chunk_list))
5135                 list_del_init(&rec->chunk_list);
5136         if (!list_empty(&rec->device_list))
5137                 list_del_init(&rec->device_list);
5138         free(rec);
5139 }
5140
5141 void free_device_extent_tree(struct device_extent_tree *tree)
5142 {
5143         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5144 }
5145
5146 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5147 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5148                                  struct extent_buffer *leaf, int slot)
5149 {
5150         struct btrfs_extent_ref_v0 *ref0;
5151         struct btrfs_key key;
5152
5153         btrfs_item_key_to_cpu(leaf, &key, slot);
5154         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5155         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5156                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5157         } else {
5158                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5159                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5160         }
5161         return 0;
5162 }
5163 #endif
5164
5165 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5166                                             struct btrfs_key *key,
5167                                             int slot)
5168 {
5169         struct btrfs_chunk *ptr;
5170         struct chunk_record *rec;
5171         int num_stripes, i;
5172
5173         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5174         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5175
5176         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5177         if (!rec) {
5178                 fprintf(stderr, "memory allocation failed\n");
5179                 exit(-1);
5180         }
5181
5182         INIT_LIST_HEAD(&rec->list);
5183         INIT_LIST_HEAD(&rec->dextents);
5184         rec->bg_rec = NULL;
5185
5186         rec->cache.start = key->offset;
5187         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5188
5189         rec->generation = btrfs_header_generation(leaf);
5190
5191         rec->objectid = key->objectid;
5192         rec->type = key->type;
5193         rec->offset = key->offset;
5194
5195         rec->length = rec->cache.size;
5196         rec->owner = btrfs_chunk_owner(leaf, ptr);
5197         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5198         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5199         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5200         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5201         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5202         rec->num_stripes = num_stripes;
5203         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5204
5205         for (i = 0; i < rec->num_stripes; ++i) {
5206                 rec->stripes[i].devid =
5207                         btrfs_stripe_devid_nr(leaf, ptr, i);
5208                 rec->stripes[i].offset =
5209                         btrfs_stripe_offset_nr(leaf, ptr, i);
5210                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5211                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5212                                 BTRFS_UUID_SIZE);
5213         }
5214
5215         return rec;
5216 }
5217
5218 static int process_chunk_item(struct cache_tree *chunk_cache,
5219                               struct btrfs_key *key, struct extent_buffer *eb,
5220                               int slot)
5221 {
5222         struct chunk_record *rec;
5223         int ret = 0;
5224
5225         rec = btrfs_new_chunk_record(eb, key, slot);
5226         ret = insert_cache_extent(chunk_cache, &rec->cache);
5227         if (ret) {
5228                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5229                         rec->offset, rec->length);
5230                 free(rec);
5231         }
5232
5233         return ret;
5234 }
5235
5236 static int process_device_item(struct rb_root *dev_cache,
5237                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5238 {
5239         struct btrfs_dev_item *ptr;
5240         struct device_record *rec;
5241         int ret = 0;
5242
5243         ptr = btrfs_item_ptr(eb,
5244                 slot, struct btrfs_dev_item);
5245
5246         rec = malloc(sizeof(*rec));
5247         if (!rec) {
5248                 fprintf(stderr, "memory allocation failed\n");
5249                 return -ENOMEM;
5250         }
5251
5252         rec->devid = key->offset;
5253         rec->generation = btrfs_header_generation(eb);
5254
5255         rec->objectid = key->objectid;
5256         rec->type = key->type;
5257         rec->offset = key->offset;
5258
5259         rec->devid = btrfs_device_id(eb, ptr);
5260         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5261         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5262
5263         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5264         if (ret) {
5265                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5266                 free(rec);
5267         }
5268
5269         return ret;
5270 }
5271
5272 struct block_group_record *
5273 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5274                              int slot)
5275 {
5276         struct btrfs_block_group_item *ptr;
5277         struct block_group_record *rec;
5278
5279         rec = calloc(1, sizeof(*rec));
5280         if (!rec) {
5281                 fprintf(stderr, "memory allocation failed\n");
5282                 exit(-1);
5283         }
5284
5285         rec->cache.start = key->objectid;
5286         rec->cache.size = key->offset;
5287
5288         rec->generation = btrfs_header_generation(leaf);
5289
5290         rec->objectid = key->objectid;
5291         rec->type = key->type;
5292         rec->offset = key->offset;
5293
5294         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5295         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5296
5297         INIT_LIST_HEAD(&rec->list);
5298
5299         return rec;
5300 }
5301
5302 static int process_block_group_item(struct block_group_tree *block_group_cache,
5303                                     struct btrfs_key *key,
5304                                     struct extent_buffer *eb, int slot)
5305 {
5306         struct block_group_record *rec;
5307         int ret = 0;
5308
5309         rec = btrfs_new_block_group_record(eb, key, slot);
5310         ret = insert_block_group_record(block_group_cache, rec);
5311         if (ret) {
5312                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5313                         rec->objectid, rec->offset);
5314                 free(rec);
5315         }
5316
5317         return ret;
5318 }
5319
5320 struct device_extent_record *
5321 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5322                                struct btrfs_key *key, int slot)
5323 {
5324         struct device_extent_record *rec;
5325         struct btrfs_dev_extent *ptr;
5326
5327         rec = calloc(1, sizeof(*rec));
5328         if (!rec) {
5329                 fprintf(stderr, "memory allocation failed\n");
5330                 exit(-1);
5331         }
5332
5333         rec->cache.objectid = key->objectid;
5334         rec->cache.start = key->offset;
5335
5336         rec->generation = btrfs_header_generation(leaf);
5337
5338         rec->objectid = key->objectid;
5339         rec->type = key->type;
5340         rec->offset = key->offset;
5341
5342         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5343         rec->chunk_objecteid =
5344                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5345         rec->chunk_offset =
5346                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5347         rec->length = btrfs_dev_extent_length(leaf, ptr);
5348         rec->cache.size = rec->length;
5349
5350         INIT_LIST_HEAD(&rec->chunk_list);
5351         INIT_LIST_HEAD(&rec->device_list);
5352
5353         return rec;
5354 }
5355
5356 static int
5357 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5358                            struct btrfs_key *key, struct extent_buffer *eb,
5359                            int slot)
5360 {
5361         struct device_extent_record *rec;
5362         int ret;
5363
5364         rec = btrfs_new_device_extent_record(eb, key, slot);
5365         ret = insert_device_extent_record(dev_extent_cache, rec);
5366         if (ret) {
5367                 fprintf(stderr,
5368                         "Device extent[%llu, %llu, %llu] existed.\n",
5369                         rec->objectid, rec->offset, rec->length);
5370                 free(rec);
5371         }
5372
5373         return ret;
5374 }
5375
5376 static int process_extent_item(struct btrfs_root *root,
5377                                struct cache_tree *extent_cache,
5378                                struct extent_buffer *eb, int slot)
5379 {
5380         struct btrfs_extent_item *ei;
5381         struct btrfs_extent_inline_ref *iref;
5382         struct btrfs_extent_data_ref *dref;
5383         struct btrfs_shared_data_ref *sref;
5384         struct btrfs_key key;
5385         struct extent_record tmpl;
5386         unsigned long end;
5387         unsigned long ptr;
5388         int type;
5389         u32 item_size = btrfs_item_size_nr(eb, slot);
5390         u64 refs = 0;
5391         u64 offset;
5392         u64 num_bytes;
5393         int metadata = 0;
5394
5395         btrfs_item_key_to_cpu(eb, &key, slot);
5396
5397         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5398                 metadata = 1;
5399                 num_bytes = root->nodesize;
5400         } else {
5401                 num_bytes = key.offset;
5402         }
5403
5404         if (item_size < sizeof(*ei)) {
5405 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5406                 struct btrfs_extent_item_v0 *ei0;
5407                 BUG_ON(item_size != sizeof(*ei0));
5408                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5409                 refs = btrfs_extent_refs_v0(eb, ei0);
5410 #else
5411                 BUG();
5412 #endif
5413                 memset(&tmpl, 0, sizeof(tmpl));
5414                 tmpl.start = key.objectid;
5415                 tmpl.nr = num_bytes;
5416                 tmpl.extent_item_refs = refs;
5417                 tmpl.metadata = metadata;
5418                 tmpl.found_rec = 1;
5419                 tmpl.max_size = num_bytes;
5420
5421                 return add_extent_rec(extent_cache, &tmpl);
5422         }
5423
5424         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5425         refs = btrfs_extent_refs(eb, ei);
5426         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5427                 metadata = 1;
5428         else
5429                 metadata = 0;
5430
5431         memset(&tmpl, 0, sizeof(tmpl));
5432         tmpl.start = key.objectid;
5433         tmpl.nr = num_bytes;
5434         tmpl.extent_item_refs = refs;
5435         tmpl.metadata = metadata;
5436         tmpl.found_rec = 1;
5437         tmpl.max_size = num_bytes;
5438         add_extent_rec(extent_cache, &tmpl);
5439
5440         ptr = (unsigned long)(ei + 1);
5441         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5442             key.type == BTRFS_EXTENT_ITEM_KEY)
5443                 ptr += sizeof(struct btrfs_tree_block_info);
5444
5445         end = (unsigned long)ei + item_size;
5446         while (ptr < end) {
5447                 iref = (struct btrfs_extent_inline_ref *)ptr;
5448                 type = btrfs_extent_inline_ref_type(eb, iref);
5449                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5450                 switch (type) {
5451                 case BTRFS_TREE_BLOCK_REF_KEY:
5452                         add_tree_backref(extent_cache, key.objectid,
5453                                          0, offset, 0);
5454                         break;
5455                 case BTRFS_SHARED_BLOCK_REF_KEY:
5456                         add_tree_backref(extent_cache, key.objectid,
5457                                          offset, 0, 0);
5458                         break;
5459                 case BTRFS_EXTENT_DATA_REF_KEY:
5460                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5461                         add_data_backref(extent_cache, key.objectid, 0,
5462                                         btrfs_extent_data_ref_root(eb, dref),
5463                                         btrfs_extent_data_ref_objectid(eb,
5464                                                                        dref),
5465                                         btrfs_extent_data_ref_offset(eb, dref),
5466                                         btrfs_extent_data_ref_count(eb, dref),
5467                                         0, num_bytes);
5468                         break;
5469                 case BTRFS_SHARED_DATA_REF_KEY:
5470                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5471                         add_data_backref(extent_cache, key.objectid, offset,
5472                                         0, 0, 0,
5473                                         btrfs_shared_data_ref_count(eb, sref),
5474                                         0, num_bytes);
5475                         break;
5476                 default:
5477                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5478                                 key.objectid, key.type, num_bytes);
5479                         goto out;
5480                 }
5481                 ptr += btrfs_extent_inline_ref_size(type);
5482         }
5483         WARN_ON(ptr > end);
5484 out:
5485         return 0;
5486 }
5487
5488 static int check_cache_range(struct btrfs_root *root,
5489                              struct btrfs_block_group_cache *cache,
5490                              u64 offset, u64 bytes)
5491 {
5492         struct btrfs_free_space *entry;
5493         u64 *logical;
5494         u64 bytenr;
5495         int stripe_len;
5496         int i, nr, ret;
5497
5498         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5499                 bytenr = btrfs_sb_offset(i);
5500                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5501                                        cache->key.objectid, bytenr, 0,
5502                                        &logical, &nr, &stripe_len);
5503                 if (ret)
5504                         return ret;
5505
5506                 while (nr--) {
5507                         if (logical[nr] + stripe_len <= offset)
5508                                 continue;
5509                         if (offset + bytes <= logical[nr])
5510                                 continue;
5511                         if (logical[nr] == offset) {
5512                                 if (stripe_len >= bytes) {
5513                                         kfree(logical);
5514                                         return 0;
5515                                 }
5516                                 bytes -= stripe_len;
5517                                 offset += stripe_len;
5518                         } else if (logical[nr] < offset) {
5519                                 if (logical[nr] + stripe_len >=
5520                                     offset + bytes) {
5521                                         kfree(logical);
5522                                         return 0;
5523                                 }
5524                                 bytes = (offset + bytes) -
5525                                         (logical[nr] + stripe_len);
5526                                 offset = logical[nr] + stripe_len;
5527                         } else {
5528                                 /*
5529                                  * Could be tricky, the super may land in the
5530                                  * middle of the area we're checking.  First
5531                                  * check the easiest case, it's at the end.
5532                                  */
5533                                 if (logical[nr] + stripe_len >=
5534                                     bytes + offset) {
5535                                         bytes = logical[nr] - offset;
5536                                         continue;
5537                                 }
5538
5539                                 /* Check the left side */
5540                                 ret = check_cache_range(root, cache,
5541                                                         offset,
5542                                                         logical[nr] - offset);
5543                                 if (ret) {
5544                                         kfree(logical);
5545                                         return ret;
5546                                 }
5547
5548                                 /* Now we continue with the right side */
5549                                 bytes = (offset + bytes) -
5550                                         (logical[nr] + stripe_len);
5551                                 offset = logical[nr] + stripe_len;
5552                         }
5553                 }
5554
5555                 kfree(logical);
5556         }
5557
5558         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5559         if (!entry) {
5560                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5561                         offset, offset+bytes);
5562                 return -EINVAL;
5563         }
5564
5565         if (entry->offset != offset) {
5566                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5567                         entry->offset);
5568                 return -EINVAL;
5569         }
5570
5571         if (entry->bytes != bytes) {
5572                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5573                         bytes, entry->bytes, offset);
5574                 return -EINVAL;
5575         }
5576
5577         unlink_free_space(cache->free_space_ctl, entry);
5578         free(entry);
5579         return 0;
5580 }
5581
5582 static int verify_space_cache(struct btrfs_root *root,
5583                               struct btrfs_block_group_cache *cache)
5584 {
5585         struct btrfs_path *path;
5586         struct extent_buffer *leaf;
5587         struct btrfs_key key;
5588         u64 last;
5589         int ret = 0;
5590
5591         path = btrfs_alloc_path();
5592         if (!path)
5593                 return -ENOMEM;
5594
5595         root = root->fs_info->extent_root;
5596
5597         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5598
5599         key.objectid = last;
5600         key.offset = 0;
5601         key.type = BTRFS_EXTENT_ITEM_KEY;
5602
5603         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5604         if (ret < 0)
5605                 goto out;
5606         ret = 0;
5607         while (1) {
5608                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5609                         ret = btrfs_next_leaf(root, path);
5610                         if (ret < 0)
5611                                 goto out;
5612                         if (ret > 0) {
5613                                 ret = 0;
5614                                 break;
5615                         }
5616                 }
5617                 leaf = path->nodes[0];
5618                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5619                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5620                         break;
5621                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5622                     key.type != BTRFS_METADATA_ITEM_KEY) {
5623                         path->slots[0]++;
5624                         continue;
5625                 }
5626
5627                 if (last == key.objectid) {
5628                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5629                                 last = key.objectid + key.offset;
5630                         else
5631                                 last = key.objectid + root->nodesize;
5632                         path->slots[0]++;
5633                         continue;
5634                 }
5635
5636                 ret = check_cache_range(root, cache, last,
5637                                         key.objectid - last);
5638                 if (ret)
5639                         break;
5640                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5641                         last = key.objectid + key.offset;
5642                 else
5643                         last = key.objectid + root->nodesize;
5644                 path->slots[0]++;
5645         }
5646
5647         if (last < cache->key.objectid + cache->key.offset)
5648                 ret = check_cache_range(root, cache, last,
5649                                         cache->key.objectid +
5650                                         cache->key.offset - last);
5651
5652 out:
5653         btrfs_free_path(path);
5654
5655         if (!ret &&
5656             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5657                 fprintf(stderr, "There are still entries left in the space "
5658                         "cache\n");
5659                 ret = -EINVAL;
5660         }
5661
5662         return ret;
5663 }
5664
5665 static int check_space_cache(struct btrfs_root *root)
5666 {
5667         struct btrfs_block_group_cache *cache;
5668         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5669         int ret;
5670         int error = 0;
5671
5672         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5673             btrfs_super_generation(root->fs_info->super_copy) !=
5674             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5675                 printf("cache and super generation don't match, space cache "
5676                        "will be invalidated\n");
5677                 return 0;
5678         }
5679
5680         if (ctx.progress_enabled) {
5681                 ctx.tp = TASK_FREE_SPACE;
5682                 task_start(ctx.info);
5683         }
5684
5685         while (1) {
5686                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5687                 if (!cache)
5688                         break;
5689
5690                 start = cache->key.objectid + cache->key.offset;
5691                 if (!cache->free_space_ctl) {
5692                         if (btrfs_init_free_space_ctl(cache,
5693                                                       root->sectorsize)) {
5694                                 ret = -ENOMEM;
5695                                 break;
5696                         }
5697                 } else {
5698                         btrfs_remove_free_space_cache(cache);
5699                 }
5700
5701                 if (btrfs_fs_compat_ro(root->fs_info,
5702                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5703                         ret = exclude_super_stripes(root, cache);
5704                         if (ret) {
5705                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5706                                         strerror(-ret));
5707                                 error++;
5708                                 continue;
5709                         }
5710                         ret = load_free_space_tree(root->fs_info, cache);
5711                         free_excluded_extents(root, cache);
5712                         if (ret < 0) {
5713                                 fprintf(stderr, "could not load free space tree: %s\n",
5714                                         strerror(-ret));
5715                                 error++;
5716                                 continue;
5717                         }
5718                         error += ret;
5719                 } else {
5720                         ret = load_free_space_cache(root->fs_info, cache);
5721                         if (!ret)
5722                                 continue;
5723                 }
5724
5725                 ret = verify_space_cache(root, cache);
5726                 if (ret) {
5727                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5728                                 cache->key.objectid);
5729                         error++;
5730                 }
5731         }
5732
5733         task_stop(ctx.info);
5734
5735         return error ? -EINVAL : 0;
5736 }
5737
5738 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5739                         u64 num_bytes, unsigned long leaf_offset,
5740                         struct extent_buffer *eb) {
5741
5742         u64 offset = 0;
5743         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5744         char *data;
5745         unsigned long csum_offset;
5746         u32 csum;
5747         u32 csum_expected;
5748         u64 read_len;
5749         u64 data_checked = 0;
5750         u64 tmp;
5751         int ret = 0;
5752         int mirror;
5753         int num_copies;
5754
5755         if (num_bytes % root->sectorsize)
5756                 return -EINVAL;
5757
5758         data = malloc(num_bytes);
5759         if (!data)
5760                 return -ENOMEM;
5761
5762         while (offset < num_bytes) {
5763                 mirror = 0;
5764 again:
5765                 read_len = num_bytes - offset;
5766                 /* read as much space once a time */
5767                 ret = read_extent_data(root, data + offset,
5768                                 bytenr + offset, &read_len, mirror);
5769                 if (ret)
5770                         goto out;
5771                 data_checked = 0;
5772                 /* verify every 4k data's checksum */
5773                 while (data_checked < read_len) {
5774                         csum = ~(u32)0;
5775                         tmp = offset + data_checked;
5776
5777                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5778                                                csum, root->sectorsize);
5779                         btrfs_csum_final(csum, (char *)&csum);
5780
5781                         csum_offset = leaf_offset +
5782                                  tmp / root->sectorsize * csum_size;
5783                         read_extent_buffer(eb, (char *)&csum_expected,
5784                                            csum_offset, csum_size);
5785                         /* try another mirror */
5786                         if (csum != csum_expected) {
5787                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5788                                                 mirror, bytenr + tmp,
5789                                                 csum, csum_expected);
5790                                 num_copies = btrfs_num_copies(
5791                                                 &root->fs_info->mapping_tree,
5792                                                 bytenr, num_bytes);
5793                                 if (mirror < num_copies - 1) {
5794                                         mirror += 1;
5795                                         goto again;
5796                                 }
5797                         }
5798                         data_checked += root->sectorsize;
5799                 }
5800                 offset += read_len;
5801         }
5802 out:
5803         free(data);
5804         return ret;
5805 }
5806
5807 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5808                                u64 num_bytes)
5809 {
5810         struct btrfs_path *path;
5811         struct extent_buffer *leaf;
5812         struct btrfs_key key;
5813         int ret;
5814
5815         path = btrfs_alloc_path();
5816         if (!path) {
5817                 fprintf(stderr, "Error allocating path\n");
5818                 return -ENOMEM;
5819         }
5820
5821         key.objectid = bytenr;
5822         key.type = BTRFS_EXTENT_ITEM_KEY;
5823         key.offset = (u64)-1;
5824
5825 again:
5826         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5827                                 0, 0);
5828         if (ret < 0) {
5829                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5830                 btrfs_free_path(path);
5831                 return ret;
5832         } else if (ret) {
5833                 if (path->slots[0] > 0) {
5834                         path->slots[0]--;
5835                 } else {
5836                         ret = btrfs_prev_leaf(root, path);
5837                         if (ret < 0) {
5838                                 goto out;
5839                         } else if (ret > 0) {
5840                                 ret = 0;
5841                                 goto out;
5842                         }
5843                 }
5844         }
5845
5846         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5847
5848         /*
5849          * Block group items come before extent items if they have the same
5850          * bytenr, so walk back one more just in case.  Dear future traveller,
5851          * first congrats on mastering time travel.  Now if it's not too much
5852          * trouble could you go back to 2006 and tell Chris to make the
5853          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5854          * EXTENT_ITEM_KEY please?
5855          */
5856         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5857                 if (path->slots[0] > 0) {
5858                         path->slots[0]--;
5859                 } else {
5860                         ret = btrfs_prev_leaf(root, path);
5861                         if (ret < 0) {
5862                                 goto out;
5863                         } else if (ret > 0) {
5864                                 ret = 0;
5865                                 goto out;
5866                         }
5867                 }
5868                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5869         }
5870
5871         while (num_bytes) {
5872                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5873                         ret = btrfs_next_leaf(root, path);
5874                         if (ret < 0) {
5875                                 fprintf(stderr, "Error going to next leaf "
5876                                         "%d\n", ret);
5877                                 btrfs_free_path(path);
5878                                 return ret;
5879                         } else if (ret) {
5880                                 break;
5881                         }
5882                 }
5883                 leaf = path->nodes[0];
5884                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5885                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5886                         path->slots[0]++;
5887                         continue;
5888                 }
5889                 if (key.objectid + key.offset < bytenr) {
5890                         path->slots[0]++;
5891                         continue;
5892                 }
5893                 if (key.objectid > bytenr + num_bytes)
5894                         break;
5895
5896                 if (key.objectid == bytenr) {
5897                         if (key.offset >= num_bytes) {
5898                                 num_bytes = 0;
5899                                 break;
5900                         }
5901                         num_bytes -= key.offset;
5902                         bytenr += key.offset;
5903                 } else if (key.objectid < bytenr) {
5904                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5905                                 num_bytes = 0;
5906                                 break;
5907                         }
5908                         num_bytes = (bytenr + num_bytes) -
5909                                 (key.objectid + key.offset);
5910                         bytenr = key.objectid + key.offset;
5911                 } else {
5912                         if (key.objectid + key.offset < bytenr + num_bytes) {
5913                                 u64 new_start = key.objectid + key.offset;
5914                                 u64 new_bytes = bytenr + num_bytes - new_start;
5915
5916                                 /*
5917                                  * Weird case, the extent is in the middle of
5918                                  * our range, we'll have to search one side
5919                                  * and then the other.  Not sure if this happens
5920                                  * in real life, but no harm in coding it up
5921                                  * anyway just in case.
5922                                  */
5923                                 btrfs_release_path(path);
5924                                 ret = check_extent_exists(root, new_start,
5925                                                           new_bytes);
5926                                 if (ret) {
5927                                         fprintf(stderr, "Right section didn't "
5928                                                 "have a record\n");
5929                                         break;
5930                                 }
5931                                 num_bytes = key.objectid - bytenr;
5932                                 goto again;
5933                         }
5934                         num_bytes = key.objectid - bytenr;
5935                 }
5936                 path->slots[0]++;
5937         }
5938         ret = 0;
5939
5940 out:
5941         if (num_bytes && !ret) {
5942                 fprintf(stderr, "There are no extents for csum range "
5943                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5944                 ret = 1;
5945         }
5946
5947         btrfs_free_path(path);
5948         return ret;
5949 }
5950
5951 static int check_csums(struct btrfs_root *root)
5952 {
5953         struct btrfs_path *path;
5954         struct extent_buffer *leaf;
5955         struct btrfs_key key;
5956         u64 offset = 0, num_bytes = 0;
5957         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5958         int errors = 0;
5959         int ret;
5960         u64 data_len;
5961         unsigned long leaf_offset;
5962
5963         root = root->fs_info->csum_root;
5964         if (!extent_buffer_uptodate(root->node)) {
5965                 fprintf(stderr, "No valid csum tree found\n");
5966                 return -ENOENT;
5967         }
5968
5969         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5970         key.type = BTRFS_EXTENT_CSUM_KEY;
5971         key.offset = 0;
5972
5973         path = btrfs_alloc_path();
5974         if (!path)
5975                 return -ENOMEM;
5976
5977         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5978         if (ret < 0) {
5979                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5980                 btrfs_free_path(path);
5981                 return ret;
5982         }
5983
5984         if (ret > 0 && path->slots[0])
5985                 path->slots[0]--;
5986         ret = 0;
5987
5988         while (1) {
5989                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5990                         ret = btrfs_next_leaf(root, path);
5991                         if (ret < 0) {
5992                                 fprintf(stderr, "Error going to next leaf "
5993                                         "%d\n", ret);
5994                                 break;
5995                         }
5996                         if (ret)
5997                                 break;
5998                 }
5999                 leaf = path->nodes[0];
6000
6001                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6002                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
6003                         path->slots[0]++;
6004                         continue;
6005                 }
6006
6007                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
6008                               csum_size) * root->sectorsize;
6009                 if (!check_data_csum)
6010                         goto skip_csum_check;
6011                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
6012                 ret = check_extent_csums(root, key.offset, data_len,
6013                                          leaf_offset, leaf);
6014                 if (ret)
6015                         break;
6016 skip_csum_check:
6017                 if (!num_bytes) {
6018                         offset = key.offset;
6019                 } else if (key.offset != offset + num_bytes) {
6020                         ret = check_extent_exists(root, offset, num_bytes);
6021                         if (ret) {
6022                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6023                                         "there is no extent record\n",
6024                                         offset, offset+num_bytes);
6025                                 errors++;
6026                         }
6027                         offset = key.offset;
6028                         num_bytes = 0;
6029                 }
6030                 num_bytes += data_len;
6031                 path->slots[0]++;
6032         }
6033
6034         btrfs_free_path(path);
6035         return errors;
6036 }
6037
6038 static int is_dropped_key(struct btrfs_key *key,
6039                           struct btrfs_key *drop_key) {
6040         if (key->objectid < drop_key->objectid)
6041                 return 1;
6042         else if (key->objectid == drop_key->objectid) {
6043                 if (key->type < drop_key->type)
6044                         return 1;
6045                 else if (key->type == drop_key->type) {
6046                         if (key->offset < drop_key->offset)
6047                                 return 1;
6048                 }
6049         }
6050         return 0;
6051 }
6052
6053 /*
6054  * Here are the rules for FULL_BACKREF.
6055  *
6056  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6057  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6058  *      FULL_BACKREF set.
6059  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6060  *    if it happened after the relocation occurred since we'll have dropped the
6061  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6062  *    have no real way to know for sure.
6063  *
6064  * We process the blocks one root at a time, and we start from the lowest root
6065  * objectid and go to the highest.  So we can just lookup the owner backref for
6066  * the record and if we don't find it then we know it doesn't exist and we have
6067  * a FULL BACKREF.
6068  *
6069  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6070  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6071  * be set or not and then we can check later once we've gathered all the refs.
6072  */
6073 static int calc_extent_flag(struct btrfs_root *root,
6074                            struct cache_tree *extent_cache,
6075                            struct extent_buffer *buf,
6076                            struct root_item_record *ri,
6077                            u64 *flags)
6078 {
6079         struct extent_record *rec;
6080         struct cache_extent *cache;
6081         struct tree_backref *tback;
6082         u64 owner = 0;
6083
6084         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6085         /* we have added this extent before */
6086         BUG_ON(!cache);
6087         rec = container_of(cache, struct extent_record, cache);
6088
6089         /*
6090          * Except file/reloc tree, we can not have
6091          * FULL BACKREF MODE
6092          */
6093         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6094                 goto normal;
6095         /*
6096          * root node
6097          */
6098         if (buf->start == ri->bytenr)
6099                 goto normal;
6100
6101         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6102                 goto full_backref;
6103
6104         owner = btrfs_header_owner(buf);
6105         if (owner == ri->objectid)
6106                 goto normal;
6107
6108         tback = find_tree_backref(rec, 0, owner);
6109         if (!tback)
6110                 goto full_backref;
6111 normal:
6112         *flags = 0;
6113         if (rec->flag_block_full_backref != FLAG_UNSET &&
6114             rec->flag_block_full_backref != 0)
6115                 rec->bad_full_backref = 1;
6116         return 0;
6117 full_backref:
6118         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6119         if (rec->flag_block_full_backref != FLAG_UNSET &&
6120             rec->flag_block_full_backref != 1)
6121                 rec->bad_full_backref = 1;
6122         return 0;
6123 }
6124
6125 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6126 {
6127         fprintf(stderr, "Invalid key type(");
6128         print_key_type(stderr, 0, key_type);
6129         fprintf(stderr, ") found in root(");
6130         print_objectid(stderr, rootid, 0);
6131         fprintf(stderr, ")\n");
6132 }
6133
6134 /*
6135  * Check if the key is valid with its extent buffer.
6136  *
6137  * This is a early check in case invalid key exists in a extent buffer
6138  * This is not comprehensive yet, but should prevent wrong key/item passed
6139  * further
6140  */
6141 static int check_type_with_root(u64 rootid, u8 key_type)
6142 {
6143         switch (key_type) {
6144         /* Only valid in chunk tree */
6145         case BTRFS_DEV_ITEM_KEY:
6146         case BTRFS_CHUNK_ITEM_KEY:
6147                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6148                         goto err;
6149                 break;
6150         /* valid in csum and log tree */
6151         case BTRFS_CSUM_TREE_OBJECTID:
6152                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6153                       is_fstree(rootid)))
6154                         goto err;
6155                 break;
6156         case BTRFS_EXTENT_ITEM_KEY:
6157         case BTRFS_METADATA_ITEM_KEY:
6158         case BTRFS_BLOCK_GROUP_ITEM_KEY:
6159                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6160                         goto err;
6161                 break;
6162         case BTRFS_ROOT_ITEM_KEY:
6163                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6164                         goto err;
6165                 break;
6166         case BTRFS_DEV_EXTENT_KEY:
6167                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6168                         goto err;
6169                 break;
6170         }
6171         return 0;
6172 err:
6173         report_mismatch_key_root(key_type, rootid);
6174         return -EINVAL;
6175 }
6176
6177 static int run_next_block(struct btrfs_root *root,
6178                           struct block_info *bits,
6179                           int bits_nr,
6180                           u64 *last,
6181                           struct cache_tree *pending,
6182                           struct cache_tree *seen,
6183                           struct cache_tree *reada,
6184                           struct cache_tree *nodes,
6185                           struct cache_tree *extent_cache,
6186                           struct cache_tree *chunk_cache,
6187                           struct rb_root *dev_cache,
6188                           struct block_group_tree *block_group_cache,
6189                           struct device_extent_tree *dev_extent_cache,
6190                           struct root_item_record *ri)
6191 {
6192         struct extent_buffer *buf;
6193         struct extent_record *rec = NULL;
6194         u64 bytenr;
6195         u32 size;
6196         u64 parent;
6197         u64 owner;
6198         u64 flags;
6199         u64 ptr;
6200         u64 gen = 0;
6201         int ret = 0;
6202         int i;
6203         int nritems;
6204         struct btrfs_key key;
6205         struct cache_extent *cache;
6206         int reada_bits;
6207
6208         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6209                                     bits_nr, &reada_bits);
6210         if (nritems == 0)
6211                 return 1;
6212
6213         if (!reada_bits) {
6214                 for(i = 0; i < nritems; i++) {
6215                         ret = add_cache_extent(reada, bits[i].start,
6216                                                bits[i].size);
6217                         if (ret == -EEXIST)
6218                                 continue;
6219
6220                         /* fixme, get the parent transid */
6221                         readahead_tree_block(root, bits[i].start,
6222                                              bits[i].size, 0);
6223                 }
6224         }
6225         *last = bits[0].start;
6226         bytenr = bits[0].start;
6227         size = bits[0].size;
6228
6229         cache = lookup_cache_extent(pending, bytenr, size);
6230         if (cache) {
6231                 remove_cache_extent(pending, cache);
6232                 free(cache);
6233         }
6234         cache = lookup_cache_extent(reada, bytenr, size);
6235         if (cache) {
6236                 remove_cache_extent(reada, cache);
6237                 free(cache);
6238         }
6239         cache = lookup_cache_extent(nodes, bytenr, size);
6240         if (cache) {
6241                 remove_cache_extent(nodes, cache);
6242                 free(cache);
6243         }
6244         cache = lookup_cache_extent(extent_cache, bytenr, size);
6245         if (cache) {
6246                 rec = container_of(cache, struct extent_record, cache);
6247                 gen = rec->parent_generation;
6248         }
6249
6250         /* fixme, get the real parent transid */
6251         buf = read_tree_block(root, bytenr, size, gen);
6252         if (!extent_buffer_uptodate(buf)) {
6253                 record_bad_block_io(root->fs_info,
6254                                     extent_cache, bytenr, size);
6255                 goto out;
6256         }
6257
6258         nritems = btrfs_header_nritems(buf);
6259
6260         flags = 0;
6261         if (!init_extent_tree) {
6262                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6263                                        btrfs_header_level(buf), 1, NULL,
6264                                        &flags);
6265                 if (ret < 0) {
6266                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6267                         if (ret < 0) {
6268                                 fprintf(stderr, "Couldn't calc extent flags\n");
6269                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6270                         }
6271                 }
6272         } else {
6273                 flags = 0;
6274                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6275                 if (ret < 0) {
6276                         fprintf(stderr, "Couldn't calc extent flags\n");
6277                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6278                 }
6279         }
6280
6281         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6282                 if (ri != NULL &&
6283                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6284                     ri->objectid == btrfs_header_owner(buf)) {
6285                         /*
6286                          * Ok we got to this block from it's original owner and
6287                          * we have FULL_BACKREF set.  Relocation can leave
6288                          * converted blocks over so this is altogether possible,
6289                          * however it's not possible if the generation > the
6290                          * last snapshot, so check for this case.
6291                          */
6292                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6293                             btrfs_header_generation(buf) > ri->last_snapshot) {
6294                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6295                                 rec->bad_full_backref = 1;
6296                         }
6297                 }
6298         } else {
6299                 if (ri != NULL &&
6300                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6301                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6302                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6303                         rec->bad_full_backref = 1;
6304                 }
6305         }
6306
6307         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6308                 rec->flag_block_full_backref = 1;
6309                 parent = bytenr;
6310                 owner = 0;
6311         } else {
6312                 rec->flag_block_full_backref = 0;
6313                 parent = 0;
6314                 owner = btrfs_header_owner(buf);
6315         }
6316
6317         ret = check_block(root, extent_cache, buf, flags);
6318         if (ret)
6319                 goto out;
6320
6321         if (btrfs_is_leaf(buf)) {
6322                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6323                 for (i = 0; i < nritems; i++) {
6324                         struct btrfs_file_extent_item *fi;
6325                         btrfs_item_key_to_cpu(buf, &key, i);
6326                         /*
6327                          * Check key type against the leaf owner.
6328                          * Could filter quite a lot of early error if
6329                          * owner is correct
6330                          */
6331                         if (check_type_with_root(btrfs_header_owner(buf),
6332                                                  key.type)) {
6333                                 fprintf(stderr, "ignoring invalid key\n");
6334                                 continue;
6335                         }
6336                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6337                                 process_extent_item(root, extent_cache, buf,
6338                                                     i);
6339                                 continue;
6340                         }
6341                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6342                                 process_extent_item(root, extent_cache, buf,
6343                                                     i);
6344                                 continue;
6345                         }
6346                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6347                                 total_csum_bytes +=
6348                                         btrfs_item_size_nr(buf, i);
6349                                 continue;
6350                         }
6351                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6352                                 process_chunk_item(chunk_cache, &key, buf, i);
6353                                 continue;
6354                         }
6355                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6356                                 process_device_item(dev_cache, &key, buf, i);
6357                                 continue;
6358                         }
6359                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6360                                 process_block_group_item(block_group_cache,
6361                                         &key, buf, i);
6362                                 continue;
6363                         }
6364                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6365                                 process_device_extent_item(dev_extent_cache,
6366                                         &key, buf, i);
6367                                 continue;
6368
6369                         }
6370                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6371 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6372                                 process_extent_ref_v0(extent_cache, buf, i);
6373 #else
6374                                 BUG();
6375 #endif
6376                                 continue;
6377                         }
6378
6379                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6380                                 add_tree_backref(extent_cache, key.objectid, 0,
6381                                                  key.offset, 0);
6382                                 continue;
6383                         }
6384                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6385                                 add_tree_backref(extent_cache, key.objectid,
6386                                                  key.offset, 0, 0);
6387                                 continue;
6388                         }
6389                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6390                                 struct btrfs_extent_data_ref *ref;
6391                                 ref = btrfs_item_ptr(buf, i,
6392                                                 struct btrfs_extent_data_ref);
6393                                 add_data_backref(extent_cache,
6394                                         key.objectid, 0,
6395                                         btrfs_extent_data_ref_root(buf, ref),
6396                                         btrfs_extent_data_ref_objectid(buf,
6397                                                                        ref),
6398                                         btrfs_extent_data_ref_offset(buf, ref),
6399                                         btrfs_extent_data_ref_count(buf, ref),
6400                                         0, root->sectorsize);
6401                                 continue;
6402                         }
6403                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6404                                 struct btrfs_shared_data_ref *ref;
6405                                 ref = btrfs_item_ptr(buf, i,
6406                                                 struct btrfs_shared_data_ref);
6407                                 add_data_backref(extent_cache,
6408                                         key.objectid, key.offset, 0, 0, 0,
6409                                         btrfs_shared_data_ref_count(buf, ref),
6410                                         0, root->sectorsize);
6411                                 continue;
6412                         }
6413                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6414                                 struct bad_item *bad;
6415
6416                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6417                                         continue;
6418                                 if (!owner)
6419                                         continue;
6420                                 bad = malloc(sizeof(struct bad_item));
6421                                 if (!bad)
6422                                         continue;
6423                                 INIT_LIST_HEAD(&bad->list);
6424                                 memcpy(&bad->key, &key,
6425                                        sizeof(struct btrfs_key));
6426                                 bad->root_id = owner;
6427                                 list_add_tail(&bad->list, &delete_items);
6428                                 continue;
6429                         }
6430                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6431                                 continue;
6432                         fi = btrfs_item_ptr(buf, i,
6433                                             struct btrfs_file_extent_item);
6434                         if (btrfs_file_extent_type(buf, fi) ==
6435                             BTRFS_FILE_EXTENT_INLINE)
6436                                 continue;
6437                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6438                                 continue;
6439
6440                         data_bytes_allocated +=
6441                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6442                         if (data_bytes_allocated < root->sectorsize) {
6443                                 abort();
6444                         }
6445                         data_bytes_referenced +=
6446                                 btrfs_file_extent_num_bytes(buf, fi);
6447                         add_data_backref(extent_cache,
6448                                 btrfs_file_extent_disk_bytenr(buf, fi),
6449                                 parent, owner, key.objectid, key.offset -
6450                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6451                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6452                 }
6453         } else {
6454                 int level;
6455                 struct btrfs_key first_key;
6456
6457                 first_key.objectid = 0;
6458
6459                 if (nritems > 0)
6460                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6461                 level = btrfs_header_level(buf);
6462                 for (i = 0; i < nritems; i++) {
6463                         struct extent_record tmpl;
6464
6465                         ptr = btrfs_node_blockptr(buf, i);
6466                         size = root->nodesize;
6467                         btrfs_node_key_to_cpu(buf, &key, i);
6468                         if (ri != NULL) {
6469                                 if ((level == ri->drop_level)
6470                                     && is_dropped_key(&key, &ri->drop_key)) {
6471                                         continue;
6472                                 }
6473                         }
6474
6475                         memset(&tmpl, 0, sizeof(tmpl));
6476                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6477                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6478                         tmpl.start = ptr;
6479                         tmpl.nr = size;
6480                         tmpl.refs = 1;
6481                         tmpl.metadata = 1;
6482                         tmpl.max_size = size;
6483                         ret = add_extent_rec(extent_cache, &tmpl);
6484                         BUG_ON(ret);
6485
6486                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6487
6488                         if (level > 1) {
6489                                 add_pending(nodes, seen, ptr, size);
6490                         } else {
6491                                 add_pending(pending, seen, ptr, size);
6492                         }
6493                 }
6494                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6495                                       nritems) * sizeof(struct btrfs_key_ptr);
6496         }
6497         total_btree_bytes += buf->len;
6498         if (fs_root_objectid(btrfs_header_owner(buf)))
6499                 total_fs_tree_bytes += buf->len;
6500         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6501                 total_extent_tree_bytes += buf->len;
6502         if (!found_old_backref &&
6503             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6504             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6505             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6506                 found_old_backref = 1;
6507 out:
6508         free_extent_buffer(buf);
6509         return ret;
6510 }
6511
6512 static int add_root_to_pending(struct extent_buffer *buf,
6513                                struct cache_tree *extent_cache,
6514                                struct cache_tree *pending,
6515                                struct cache_tree *seen,
6516                                struct cache_tree *nodes,
6517                                u64 objectid)
6518 {
6519         struct extent_record tmpl;
6520
6521         if (btrfs_header_level(buf) > 0)
6522                 add_pending(nodes, seen, buf->start, buf->len);
6523         else
6524                 add_pending(pending, seen, buf->start, buf->len);
6525
6526         memset(&tmpl, 0, sizeof(tmpl));
6527         tmpl.start = buf->start;
6528         tmpl.nr = buf->len;
6529         tmpl.is_root = 1;
6530         tmpl.refs = 1;
6531         tmpl.metadata = 1;
6532         tmpl.max_size = buf->len;
6533         add_extent_rec(extent_cache, &tmpl);
6534
6535         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6536             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6537                 add_tree_backref(extent_cache, buf->start, buf->start,
6538                                  0, 1);
6539         else
6540                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6541         return 0;
6542 }
6543
6544 /* as we fix the tree, we might be deleting blocks that
6545  * we're tracking for repair.  This hook makes sure we
6546  * remove any backrefs for blocks as we are fixing them.
6547  */
6548 static int free_extent_hook(struct btrfs_trans_handle *trans,
6549                             struct btrfs_root *root,
6550                             u64 bytenr, u64 num_bytes, u64 parent,
6551                             u64 root_objectid, u64 owner, u64 offset,
6552                             int refs_to_drop)
6553 {
6554         struct extent_record *rec;
6555         struct cache_extent *cache;
6556         int is_data;
6557         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6558
6559         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6560         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6561         if (!cache)
6562                 return 0;
6563
6564         rec = container_of(cache, struct extent_record, cache);
6565         if (is_data) {
6566                 struct data_backref *back;
6567                 back = find_data_backref(rec, parent, root_objectid, owner,
6568                                          offset, 1, bytenr, num_bytes);
6569                 if (!back)
6570                         goto out;
6571                 if (back->node.found_ref) {
6572                         back->found_ref -= refs_to_drop;
6573                         if (rec->refs)
6574                                 rec->refs -= refs_to_drop;
6575                 }
6576                 if (back->node.found_extent_tree) {
6577                         back->num_refs -= refs_to_drop;
6578                         if (rec->extent_item_refs)
6579                                 rec->extent_item_refs -= refs_to_drop;
6580                 }
6581                 if (back->found_ref == 0)
6582                         back->node.found_ref = 0;
6583                 if (back->num_refs == 0)
6584                         back->node.found_extent_tree = 0;
6585
6586                 if (!back->node.found_extent_tree && back->node.found_ref) {
6587                         rb_erase(&back->node.node, &rec->backref_tree);
6588                         free(back);
6589                 }
6590         } else {
6591                 struct tree_backref *back;
6592                 back = find_tree_backref(rec, parent, root_objectid);
6593                 if (!back)
6594                         goto out;
6595                 if (back->node.found_ref) {
6596                         if (rec->refs)
6597                                 rec->refs--;
6598                         back->node.found_ref = 0;
6599                 }
6600                 if (back->node.found_extent_tree) {
6601                         if (rec->extent_item_refs)
6602                                 rec->extent_item_refs--;
6603                         back->node.found_extent_tree = 0;
6604                 }
6605                 if (!back->node.found_extent_tree && back->node.found_ref) {
6606                         rb_erase(&back->node.node, &rec->backref_tree);
6607                         free(back);
6608                 }
6609         }
6610         maybe_free_extent_rec(extent_cache, rec);
6611 out:
6612         return 0;
6613 }
6614
6615 static int delete_extent_records(struct btrfs_trans_handle *trans,
6616                                  struct btrfs_root *root,
6617                                  struct btrfs_path *path,
6618                                  u64 bytenr, u64 new_len)
6619 {
6620         struct btrfs_key key;
6621         struct btrfs_key found_key;
6622         struct extent_buffer *leaf;
6623         int ret;
6624         int slot;
6625
6626
6627         key.objectid = bytenr;
6628         key.type = (u8)-1;
6629         key.offset = (u64)-1;
6630
6631         while(1) {
6632                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6633                                         &key, path, 0, 1);
6634                 if (ret < 0)
6635                         break;
6636
6637                 if (ret > 0) {
6638                         ret = 0;
6639                         if (path->slots[0] == 0)
6640                                 break;
6641                         path->slots[0]--;
6642                 }
6643                 ret = 0;
6644
6645                 leaf = path->nodes[0];
6646                 slot = path->slots[0];
6647
6648                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6649                 if (found_key.objectid != bytenr)
6650                         break;
6651
6652                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6653                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6654                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6655                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6656                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6657                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6658                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6659                         btrfs_release_path(path);
6660                         if (found_key.type == 0) {
6661                                 if (found_key.offset == 0)
6662                                         break;
6663                                 key.offset = found_key.offset - 1;
6664                                 key.type = found_key.type;
6665                         }
6666                         key.type = found_key.type - 1;
6667                         key.offset = (u64)-1;
6668                         continue;
6669                 }
6670
6671                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6672                         found_key.objectid, found_key.type, found_key.offset);
6673
6674                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6675                 if (ret)
6676                         break;
6677                 btrfs_release_path(path);
6678
6679                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6680                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6681                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6682                                 found_key.offset : root->nodesize;
6683
6684                         ret = btrfs_update_block_group(trans, root, bytenr,
6685                                                        bytes, 0, 0);
6686                         if (ret)
6687                                 break;
6688                 }
6689         }
6690
6691         btrfs_release_path(path);
6692         return ret;
6693 }
6694
6695 /*
6696  * for a single backref, this will allocate a new extent
6697  * and add the backref to it.
6698  */
6699 static int record_extent(struct btrfs_trans_handle *trans,
6700                          struct btrfs_fs_info *info,
6701                          struct btrfs_path *path,
6702                          struct extent_record *rec,
6703                          struct extent_backref *back,
6704                          int allocated, u64 flags)
6705 {
6706         int ret;
6707         struct btrfs_root *extent_root = info->extent_root;
6708         struct extent_buffer *leaf;
6709         struct btrfs_key ins_key;
6710         struct btrfs_extent_item *ei;
6711         struct tree_backref *tback;
6712         struct data_backref *dback;
6713         struct btrfs_tree_block_info *bi;
6714
6715         if (!back->is_data)
6716                 rec->max_size = max_t(u64, rec->max_size,
6717                                     info->extent_root->nodesize);
6718
6719         if (!allocated) {
6720                 u32 item_size = sizeof(*ei);
6721
6722                 if (!back->is_data)
6723                         item_size += sizeof(*bi);
6724
6725                 ins_key.objectid = rec->start;
6726                 ins_key.offset = rec->max_size;
6727                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6728
6729                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6730                                         &ins_key, item_size);
6731                 if (ret)
6732                         goto fail;
6733
6734                 leaf = path->nodes[0];
6735                 ei = btrfs_item_ptr(leaf, path->slots[0],
6736                                     struct btrfs_extent_item);
6737
6738                 btrfs_set_extent_refs(leaf, ei, 0);
6739                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6740
6741                 if (back->is_data) {
6742                         btrfs_set_extent_flags(leaf, ei,
6743                                                BTRFS_EXTENT_FLAG_DATA);
6744                 } else {
6745                         struct btrfs_disk_key copy_key;;
6746
6747                         tback = to_tree_backref(back);
6748                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6749                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6750                                              sizeof(*bi));
6751
6752                         btrfs_set_disk_key_objectid(&copy_key,
6753                                                     rec->info_objectid);
6754                         btrfs_set_disk_key_type(&copy_key, 0);
6755                         btrfs_set_disk_key_offset(&copy_key, 0);
6756
6757                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6758                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6759
6760                         btrfs_set_extent_flags(leaf, ei,
6761                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6762                 }
6763
6764                 btrfs_mark_buffer_dirty(leaf);
6765                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6766                                                rec->max_size, 1, 0);
6767                 if (ret)
6768                         goto fail;
6769                 btrfs_release_path(path);
6770         }
6771
6772         if (back->is_data) {
6773                 u64 parent;
6774                 int i;
6775
6776                 dback = to_data_backref(back);
6777                 if (back->full_backref)
6778                         parent = dback->parent;
6779                 else
6780                         parent = 0;
6781
6782                 for (i = 0; i < dback->found_ref; i++) {
6783                         /* if parent != 0, we're doing a full backref
6784                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6785                          * just makes the backref allocator create a data
6786                          * backref
6787                          */
6788                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6789                                                    rec->start, rec->max_size,
6790                                                    parent,
6791                                                    dback->root,
6792                                                    parent ?
6793                                                    BTRFS_FIRST_FREE_OBJECTID :
6794                                                    dback->owner,
6795                                                    dback->offset);
6796                         if (ret)
6797                                 break;
6798                 }
6799                 fprintf(stderr, "adding new data backref"
6800                                 " on %llu %s %llu owner %llu"
6801                                 " offset %llu found %d\n",
6802                                 (unsigned long long)rec->start,
6803                                 back->full_backref ?
6804                                 "parent" : "root",
6805                                 back->full_backref ?
6806                                 (unsigned long long)parent :
6807                                 (unsigned long long)dback->root,
6808                                 (unsigned long long)dback->owner,
6809                                 (unsigned long long)dback->offset,
6810                                 dback->found_ref);
6811         } else {
6812                 u64 parent;
6813
6814                 tback = to_tree_backref(back);
6815                 if (back->full_backref)
6816                         parent = tback->parent;
6817                 else
6818                         parent = 0;
6819
6820                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6821                                            rec->start, rec->max_size,
6822                                            parent, tback->root, 0, 0);
6823                 fprintf(stderr, "adding new tree backref on "
6824                         "start %llu len %llu parent %llu root %llu\n",
6825                         rec->start, rec->max_size, parent, tback->root);
6826         }
6827 fail:
6828         btrfs_release_path(path);
6829         return ret;
6830 }
6831
6832 static struct extent_entry *find_entry(struct list_head *entries,
6833                                        u64 bytenr, u64 bytes)
6834 {
6835         struct extent_entry *entry = NULL;
6836
6837         list_for_each_entry(entry, entries, list) {
6838                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6839                         return entry;
6840         }
6841
6842         return NULL;
6843 }
6844
6845 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6846 {
6847         struct extent_entry *entry, *best = NULL, *prev = NULL;
6848
6849         list_for_each_entry(entry, entries, list) {
6850                 if (!prev) {
6851                         prev = entry;
6852                         continue;
6853                 }
6854
6855                 /*
6856                  * If there are as many broken entries as entries then we know
6857                  * not to trust this particular entry.
6858                  */
6859                 if (entry->broken == entry->count)
6860                         continue;
6861
6862                 /*
6863                  * If our current entry == best then we can't be sure our best
6864                  * is really the best, so we need to keep searching.
6865                  */
6866                 if (best && best->count == entry->count) {
6867                         prev = entry;
6868                         best = NULL;
6869                         continue;
6870                 }
6871
6872                 /* Prev == entry, not good enough, have to keep searching */
6873                 if (!prev->broken && prev->count == entry->count)
6874                         continue;
6875
6876                 if (!best)
6877                         best = (prev->count > entry->count) ? prev : entry;
6878                 else if (best->count < entry->count)
6879                         best = entry;
6880                 prev = entry;
6881         }
6882
6883         return best;
6884 }
6885
6886 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6887                       struct data_backref *dback, struct extent_entry *entry)
6888 {
6889         struct btrfs_trans_handle *trans;
6890         struct btrfs_root *root;
6891         struct btrfs_file_extent_item *fi;
6892         struct extent_buffer *leaf;
6893         struct btrfs_key key;
6894         u64 bytenr, bytes;
6895         int ret, err;
6896
6897         key.objectid = dback->root;
6898         key.type = BTRFS_ROOT_ITEM_KEY;
6899         key.offset = (u64)-1;
6900         root = btrfs_read_fs_root(info, &key);
6901         if (IS_ERR(root)) {
6902                 fprintf(stderr, "Couldn't find root for our ref\n");
6903                 return -EINVAL;
6904         }
6905
6906         /*
6907          * The backref points to the original offset of the extent if it was
6908          * split, so we need to search down to the offset we have and then walk
6909          * forward until we find the backref we're looking for.
6910          */
6911         key.objectid = dback->owner;
6912         key.type = BTRFS_EXTENT_DATA_KEY;
6913         key.offset = dback->offset;
6914         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6915         if (ret < 0) {
6916                 fprintf(stderr, "Error looking up ref %d\n", ret);
6917                 return ret;
6918         }
6919
6920         while (1) {
6921                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6922                         ret = btrfs_next_leaf(root, path);
6923                         if (ret) {
6924                                 fprintf(stderr, "Couldn't find our ref, next\n");
6925                                 return -EINVAL;
6926                         }
6927                 }
6928                 leaf = path->nodes[0];
6929                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6930                 if (key.objectid != dback->owner ||
6931                     key.type != BTRFS_EXTENT_DATA_KEY) {
6932                         fprintf(stderr, "Couldn't find our ref, search\n");
6933                         return -EINVAL;
6934                 }
6935                 fi = btrfs_item_ptr(leaf, path->slots[0],
6936                                     struct btrfs_file_extent_item);
6937                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6938                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6939
6940                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6941                         break;
6942                 path->slots[0]++;
6943         }
6944
6945         btrfs_release_path(path);
6946
6947         trans = btrfs_start_transaction(root, 1);
6948         if (IS_ERR(trans))
6949                 return PTR_ERR(trans);
6950
6951         /*
6952          * Ok we have the key of the file extent we want to fix, now we can cow
6953          * down to the thing and fix it.
6954          */
6955         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6956         if (ret < 0) {
6957                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6958                         key.objectid, key.type, key.offset, ret);
6959                 goto out;
6960         }
6961         if (ret > 0) {
6962                 fprintf(stderr, "Well that's odd, we just found this key "
6963                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6964                         key.offset);
6965                 ret = -EINVAL;
6966                 goto out;
6967         }
6968         leaf = path->nodes[0];
6969         fi = btrfs_item_ptr(leaf, path->slots[0],
6970                             struct btrfs_file_extent_item);
6971
6972         if (btrfs_file_extent_compression(leaf, fi) &&
6973             dback->disk_bytenr != entry->bytenr) {
6974                 fprintf(stderr, "Ref doesn't match the record start and is "
6975                         "compressed, please take a btrfs-image of this file "
6976                         "system and send it to a btrfs developer so they can "
6977                         "complete this functionality for bytenr %Lu\n",
6978                         dback->disk_bytenr);
6979                 ret = -EINVAL;
6980                 goto out;
6981         }
6982
6983         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6984                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6985         } else if (dback->disk_bytenr > entry->bytenr) {
6986                 u64 off_diff, offset;
6987
6988                 off_diff = dback->disk_bytenr - entry->bytenr;
6989                 offset = btrfs_file_extent_offset(leaf, fi);
6990                 if (dback->disk_bytenr + offset +
6991                     btrfs_file_extent_num_bytes(leaf, fi) >
6992                     entry->bytenr + entry->bytes) {
6993                         fprintf(stderr, "Ref is past the entry end, please "
6994                                 "take a btrfs-image of this file system and "
6995                                 "send it to a btrfs developer, ref %Lu\n",
6996                                 dback->disk_bytenr);
6997                         ret = -EINVAL;
6998                         goto out;
6999                 }
7000                 offset += off_diff;
7001                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7002                 btrfs_set_file_extent_offset(leaf, fi, offset);
7003         } else if (dback->disk_bytenr < entry->bytenr) {
7004                 u64 offset;
7005
7006                 offset = btrfs_file_extent_offset(leaf, fi);
7007                 if (dback->disk_bytenr + offset < entry->bytenr) {
7008                         fprintf(stderr, "Ref is before the entry start, please"
7009                                 " take a btrfs-image of this file system and "
7010                                 "send it to a btrfs developer, ref %Lu\n",
7011                                 dback->disk_bytenr);
7012                         ret = -EINVAL;
7013                         goto out;
7014                 }
7015
7016                 offset += dback->disk_bytenr;
7017                 offset -= entry->bytenr;
7018                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7019                 btrfs_set_file_extent_offset(leaf, fi, offset);
7020         }
7021
7022         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7023
7024         /*
7025          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7026          * only do this if we aren't using compression, otherwise it's a
7027          * trickier case.
7028          */
7029         if (!btrfs_file_extent_compression(leaf, fi))
7030                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7031         else
7032                 printf("ram bytes may be wrong?\n");
7033         btrfs_mark_buffer_dirty(leaf);
7034 out:
7035         err = btrfs_commit_transaction(trans, root);
7036         btrfs_release_path(path);
7037         return ret ? ret : err;
7038 }
7039
7040 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7041                            struct extent_record *rec)
7042 {
7043         struct extent_backref *back, *tmp;
7044         struct data_backref *dback;
7045         struct extent_entry *entry, *best = NULL;
7046         LIST_HEAD(entries);
7047         int nr_entries = 0;
7048         int broken_entries = 0;
7049         int ret = 0;
7050         short mismatch = 0;
7051
7052         /*
7053          * Metadata is easy and the backrefs should always agree on bytenr and
7054          * size, if not we've got bigger issues.
7055          */
7056         if (rec->metadata)
7057                 return 0;
7058
7059         rbtree_postorder_for_each_entry_safe(back, tmp,
7060                                              &rec->backref_tree, node) {
7061                 if (back->full_backref || !back->is_data)
7062                         continue;
7063
7064                 dback = to_data_backref(back);
7065
7066                 /*
7067                  * We only pay attention to backrefs that we found a real
7068                  * backref for.
7069                  */
7070                 if (dback->found_ref == 0)
7071                         continue;
7072
7073                 /*
7074                  * For now we only catch when the bytes don't match, not the
7075                  * bytenr.  We can easily do this at the same time, but I want
7076                  * to have a fs image to test on before we just add repair
7077                  * functionality willy-nilly so we know we won't screw up the
7078                  * repair.
7079                  */
7080
7081                 entry = find_entry(&entries, dback->disk_bytenr,
7082                                    dback->bytes);
7083                 if (!entry) {
7084                         entry = malloc(sizeof(struct extent_entry));
7085                         if (!entry) {
7086                                 ret = -ENOMEM;
7087                                 goto out;
7088                         }
7089                         memset(entry, 0, sizeof(*entry));
7090                         entry->bytenr = dback->disk_bytenr;
7091                         entry->bytes = dback->bytes;
7092                         list_add_tail(&entry->list, &entries);
7093                         nr_entries++;
7094                 }
7095
7096                 /*
7097                  * If we only have on entry we may think the entries agree when
7098                  * in reality they don't so we have to do some extra checking.
7099                  */
7100                 if (dback->disk_bytenr != rec->start ||
7101                     dback->bytes != rec->nr || back->broken)
7102                         mismatch = 1;
7103
7104                 if (back->broken) {
7105                         entry->broken++;
7106                         broken_entries++;
7107                 }
7108
7109                 entry->count++;
7110         }
7111
7112         /* Yay all the backrefs agree, carry on good sir */
7113         if (nr_entries <= 1 && !mismatch)
7114                 goto out;
7115
7116         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7117                 "%Lu\n", rec->start);
7118
7119         /*
7120          * First we want to see if the backrefs can agree amongst themselves who
7121          * is right, so figure out which one of the entries has the highest
7122          * count.
7123          */
7124         best = find_most_right_entry(&entries);
7125
7126         /*
7127          * Ok so we may have an even split between what the backrefs think, so
7128          * this is where we use the extent ref to see what it thinks.
7129          */
7130         if (!best) {
7131                 entry = find_entry(&entries, rec->start, rec->nr);
7132                 if (!entry && (!broken_entries || !rec->found_rec)) {
7133                         fprintf(stderr, "Backrefs don't agree with each other "
7134                                 "and extent record doesn't agree with anybody,"
7135                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7136                                 rec->start, rec->nr);
7137                         ret = -EINVAL;
7138                         goto out;
7139                 } else if (!entry) {
7140                         /*
7141                          * Ok our backrefs were broken, we'll assume this is the
7142                          * correct value and add an entry for this range.
7143                          */
7144                         entry = malloc(sizeof(struct extent_entry));
7145                         if (!entry) {
7146                                 ret = -ENOMEM;
7147                                 goto out;
7148                         }
7149                         memset(entry, 0, sizeof(*entry));
7150                         entry->bytenr = rec->start;
7151                         entry->bytes = rec->nr;
7152                         list_add_tail(&entry->list, &entries);
7153                         nr_entries++;
7154                 }
7155                 entry->count++;
7156                 best = find_most_right_entry(&entries);
7157                 if (!best) {
7158                         fprintf(stderr, "Backrefs and extent record evenly "
7159                                 "split on who is right, this is going to "
7160                                 "require user input to fix bytenr %Lu bytes "
7161                                 "%Lu\n", rec->start, rec->nr);
7162                         ret = -EINVAL;
7163                         goto out;
7164                 }
7165         }
7166
7167         /*
7168          * I don't think this can happen currently as we'll abort() if we catch
7169          * this case higher up, but in case somebody removes that we still can't
7170          * deal with it properly here yet, so just bail out of that's the case.
7171          */
7172         if (best->bytenr != rec->start) {
7173                 fprintf(stderr, "Extent start and backref starts don't match, "
7174                         "please use btrfs-image on this file system and send "
7175                         "it to a btrfs developer so they can make fsck fix "
7176                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7177                         rec->start, rec->nr);
7178                 ret = -EINVAL;
7179                 goto out;
7180         }
7181
7182         /*
7183          * Ok great we all agreed on an extent record, let's go find the real
7184          * references and fix up the ones that don't match.
7185          */
7186         rbtree_postorder_for_each_entry_safe(back, tmp,
7187                                              &rec->backref_tree, node) {
7188                 if (back->full_backref || !back->is_data)
7189                         continue;
7190
7191                 dback = to_data_backref(back);
7192
7193                 /*
7194                  * Still ignoring backrefs that don't have a real ref attached
7195                  * to them.
7196                  */
7197                 if (dback->found_ref == 0)
7198                         continue;
7199
7200                 if (dback->bytes == best->bytes &&
7201                     dback->disk_bytenr == best->bytenr)
7202                         continue;
7203
7204                 ret = repair_ref(info, path, dback, best);
7205                 if (ret)
7206                         goto out;
7207         }
7208
7209         /*
7210          * Ok we messed with the actual refs, which means we need to drop our
7211          * entire cache and go back and rescan.  I know this is a huge pain and
7212          * adds a lot of extra work, but it's the only way to be safe.  Once all
7213          * the backrefs agree we may not need to do anything to the extent
7214          * record itself.
7215          */
7216         ret = -EAGAIN;
7217 out:
7218         while (!list_empty(&entries)) {
7219                 entry = list_entry(entries.next, struct extent_entry, list);
7220                 list_del_init(&entry->list);
7221                 free(entry);
7222         }
7223         return ret;
7224 }
7225
7226 static int process_duplicates(struct btrfs_root *root,
7227                               struct cache_tree *extent_cache,
7228                               struct extent_record *rec)
7229 {
7230         struct extent_record *good, *tmp;
7231         struct cache_extent *cache;
7232         int ret;
7233
7234         /*
7235          * If we found a extent record for this extent then return, or if we
7236          * have more than one duplicate we are likely going to need to delete
7237          * something.
7238          */
7239         if (rec->found_rec || rec->num_duplicates > 1)
7240                 return 0;
7241
7242         /* Shouldn't happen but just in case */
7243         BUG_ON(!rec->num_duplicates);
7244
7245         /*
7246          * So this happens if we end up with a backref that doesn't match the
7247          * actual extent entry.  So either the backref is bad or the extent
7248          * entry is bad.  Either way we want to have the extent_record actually
7249          * reflect what we found in the extent_tree, so we need to take the
7250          * duplicate out and use that as the extent_record since the only way we
7251          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7252          */
7253         remove_cache_extent(extent_cache, &rec->cache);
7254
7255         good = to_extent_record(rec->dups.next);
7256         list_del_init(&good->list);
7257         INIT_LIST_HEAD(&good->backrefs);
7258         INIT_LIST_HEAD(&good->dups);
7259         good->cache.start = good->start;
7260         good->cache.size = good->nr;
7261         good->content_checked = 0;
7262         good->owner_ref_checked = 0;
7263         good->num_duplicates = 0;
7264         good->refs = rec->refs;
7265         list_splice_init(&rec->backrefs, &good->backrefs);
7266         while (1) {
7267                 cache = lookup_cache_extent(extent_cache, good->start,
7268                                             good->nr);
7269                 if (!cache)
7270                         break;
7271                 tmp = container_of(cache, struct extent_record, cache);
7272
7273                 /*
7274                  * If we find another overlapping extent and it's found_rec is
7275                  * set then it's a duplicate and we need to try and delete
7276                  * something.
7277                  */
7278                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7279                         if (list_empty(&good->list))
7280                                 list_add_tail(&good->list,
7281                                               &duplicate_extents);
7282                         good->num_duplicates += tmp->num_duplicates + 1;
7283                         list_splice_init(&tmp->dups, &good->dups);
7284                         list_del_init(&tmp->list);
7285                         list_add_tail(&tmp->list, &good->dups);
7286                         remove_cache_extent(extent_cache, &tmp->cache);
7287                         continue;
7288                 }
7289
7290                 /*
7291                  * Ok we have another non extent item backed extent rec, so lets
7292                  * just add it to this extent and carry on like we did above.
7293                  */
7294                 good->refs += tmp->refs;
7295                 list_splice_init(&tmp->backrefs, &good->backrefs);
7296                 remove_cache_extent(extent_cache, &tmp->cache);
7297                 free(tmp);
7298         }
7299         ret = insert_cache_extent(extent_cache, &good->cache);
7300         BUG_ON(ret);
7301         free(rec);
7302         return good->num_duplicates ? 0 : 1;
7303 }
7304
7305 static int delete_duplicate_records(struct btrfs_root *root,
7306                                     struct extent_record *rec)
7307 {
7308         struct btrfs_trans_handle *trans;
7309         LIST_HEAD(delete_list);
7310         struct btrfs_path *path;
7311         struct extent_record *tmp, *good, *n;
7312         int nr_del = 0;
7313         int ret = 0, err;
7314         struct btrfs_key key;
7315
7316         path = btrfs_alloc_path();
7317         if (!path) {
7318                 ret = -ENOMEM;
7319                 goto out;
7320         }
7321
7322         good = rec;
7323         /* Find the record that covers all of the duplicates. */
7324         list_for_each_entry(tmp, &rec->dups, list) {
7325                 if (good->start < tmp->start)
7326                         continue;
7327                 if (good->nr > tmp->nr)
7328                         continue;
7329
7330                 if (tmp->start + tmp->nr < good->start + good->nr) {
7331                         fprintf(stderr, "Ok we have overlapping extents that "
7332                                 "aren't completely covered by each other, this "
7333                                 "is going to require more careful thought.  "
7334                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7335                                 tmp->start, tmp->nr, good->start, good->nr);
7336                         abort();
7337                 }
7338                 good = tmp;
7339         }
7340
7341         if (good != rec)
7342                 list_add_tail(&rec->list, &delete_list);
7343
7344         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7345                 if (tmp == good)
7346                         continue;
7347                 list_move_tail(&tmp->list, &delete_list);
7348         }
7349
7350         root = root->fs_info->extent_root;
7351         trans = btrfs_start_transaction(root, 1);
7352         if (IS_ERR(trans)) {
7353                 ret = PTR_ERR(trans);
7354                 goto out;
7355         }
7356
7357         list_for_each_entry(tmp, &delete_list, list) {
7358                 if (tmp->found_rec == 0)
7359                         continue;
7360                 key.objectid = tmp->start;
7361                 key.type = BTRFS_EXTENT_ITEM_KEY;
7362                 key.offset = tmp->nr;
7363
7364                 /* Shouldn't happen but just in case */
7365                 if (tmp->metadata) {
7366                         fprintf(stderr, "Well this shouldn't happen, extent "
7367                                 "record overlaps but is metadata? "
7368                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7369                         abort();
7370                 }
7371
7372                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7373                 if (ret) {
7374                         if (ret > 0)
7375                                 ret = -EINVAL;
7376                         break;
7377                 }
7378                 ret = btrfs_del_item(trans, root, path);
7379                 if (ret)
7380                         break;
7381                 btrfs_release_path(path);
7382                 nr_del++;
7383         }
7384         err = btrfs_commit_transaction(trans, root);
7385         if (err && !ret)
7386                 ret = err;
7387 out:
7388         while (!list_empty(&delete_list)) {
7389                 tmp = to_extent_record(delete_list.next);
7390                 list_del_init(&tmp->list);
7391                 if (tmp == rec)
7392                         continue;
7393                 free(tmp);
7394         }
7395
7396         while (!list_empty(&rec->dups)) {
7397                 tmp = to_extent_record(rec->dups.next);
7398                 list_del_init(&tmp->list);
7399                 free(tmp);
7400         }
7401
7402         btrfs_free_path(path);
7403
7404         if (!ret && !nr_del)
7405                 rec->num_duplicates = 0;
7406
7407         return ret ? ret : nr_del;
7408 }
7409
7410 static int find_possible_backrefs(struct btrfs_fs_info *info,
7411                                   struct btrfs_path *path,
7412                                   struct cache_tree *extent_cache,
7413                                   struct extent_record *rec)
7414 {
7415         struct btrfs_root *root;
7416         struct extent_backref *back, *tmp;
7417         struct data_backref *dback;
7418         struct cache_extent *cache;
7419         struct btrfs_file_extent_item *fi;
7420         struct btrfs_key key;
7421         u64 bytenr, bytes;
7422         int ret;
7423
7424         rbtree_postorder_for_each_entry_safe(back, tmp,
7425                                              &rec->backref_tree, node) {
7426                 /* Don't care about full backrefs (poor unloved backrefs) */
7427                 if (back->full_backref || !back->is_data)
7428                         continue;
7429
7430                 dback = to_data_backref(back);
7431
7432                 /* We found this one, we don't need to do a lookup */
7433                 if (dback->found_ref)
7434                         continue;
7435
7436                 key.objectid = dback->root;
7437                 key.type = BTRFS_ROOT_ITEM_KEY;
7438                 key.offset = (u64)-1;
7439
7440                 root = btrfs_read_fs_root(info, &key);
7441
7442                 /* No root, definitely a bad ref, skip */
7443                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7444                         continue;
7445                 /* Other err, exit */
7446                 if (IS_ERR(root))
7447                         return PTR_ERR(root);
7448
7449                 key.objectid = dback->owner;
7450                 key.type = BTRFS_EXTENT_DATA_KEY;
7451                 key.offset = dback->offset;
7452                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7453                 if (ret) {
7454                         btrfs_release_path(path);
7455                         if (ret < 0)
7456                                 return ret;
7457                         /* Didn't find it, we can carry on */
7458                         ret = 0;
7459                         continue;
7460                 }
7461
7462                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7463                                     struct btrfs_file_extent_item);
7464                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7465                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7466                 btrfs_release_path(path);
7467                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7468                 if (cache) {
7469                         struct extent_record *tmp;
7470                         tmp = container_of(cache, struct extent_record, cache);
7471
7472                         /*
7473                          * If we found an extent record for the bytenr for this
7474                          * particular backref then we can't add it to our
7475                          * current extent record.  We only want to add backrefs
7476                          * that don't have a corresponding extent item in the
7477                          * extent tree since they likely belong to this record
7478                          * and we need to fix it if it doesn't match bytenrs.
7479                          */
7480                         if  (tmp->found_rec)
7481                                 continue;
7482                 }
7483
7484                 dback->found_ref += 1;
7485                 dback->disk_bytenr = bytenr;
7486                 dback->bytes = bytes;
7487
7488                 /*
7489                  * Set this so the verify backref code knows not to trust the
7490                  * values in this backref.
7491                  */
7492                 back->broken = 1;
7493         }
7494
7495         return 0;
7496 }
7497
7498 /*
7499  * Record orphan data ref into corresponding root.
7500  *
7501  * Return 0 if the extent item contains data ref and recorded.
7502  * Return 1 if the extent item contains no useful data ref
7503  *   On that case, it may contains only shared_dataref or metadata backref
7504  *   or the file extent exists(this should be handled by the extent bytenr
7505  *   recovery routine)
7506  * Return <0 if something goes wrong.
7507  */
7508 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7509                                       struct extent_record *rec)
7510 {
7511         struct btrfs_key key;
7512         struct btrfs_root *dest_root;
7513         struct extent_backref *back, *tmp;
7514         struct data_backref *dback;
7515         struct orphan_data_extent *orphan;
7516         struct btrfs_path *path;
7517         int recorded_data_ref = 0;
7518         int ret = 0;
7519
7520         if (rec->metadata)
7521                 return 1;
7522         path = btrfs_alloc_path();
7523         if (!path)
7524                 return -ENOMEM;
7525         rbtree_postorder_for_each_entry_safe(back, tmp,
7526                                              &rec->backref_tree, node) {
7527                 if (back->full_backref || !back->is_data ||
7528                     !back->found_extent_tree)
7529                         continue;
7530                 dback = to_data_backref(back);
7531                 if (dback->found_ref)
7532                         continue;
7533                 key.objectid = dback->root;
7534                 key.type = BTRFS_ROOT_ITEM_KEY;
7535                 key.offset = (u64)-1;
7536
7537                 dest_root = btrfs_read_fs_root(fs_info, &key);
7538
7539                 /* For non-exist root we just skip it */
7540                 if (IS_ERR(dest_root) || !dest_root)
7541                         continue;
7542
7543                 key.objectid = dback->owner;
7544                 key.type = BTRFS_EXTENT_DATA_KEY;
7545                 key.offset = dback->offset;
7546
7547                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7548                 /*
7549                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7550                  * we need to record it for inode/file extent rebuild.
7551                  * For ret > 0, we record it only for file extent rebuild.
7552                  * For ret == 0, the file extent exists but only bytenr
7553                  * mismatch, let the original bytenr fix routine to handle,
7554                  * don't record it.
7555                  */
7556                 if (ret == 0)
7557                         continue;
7558                 ret = 0;
7559                 orphan = malloc(sizeof(*orphan));
7560                 if (!orphan) {
7561                         ret = -ENOMEM;
7562                         goto out;
7563                 }
7564                 INIT_LIST_HEAD(&orphan->list);
7565                 orphan->root = dback->root;
7566                 orphan->objectid = dback->owner;
7567                 orphan->offset = dback->offset;
7568                 orphan->disk_bytenr = rec->cache.start;
7569                 orphan->disk_len = rec->cache.size;
7570                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7571                 recorded_data_ref = 1;
7572         }
7573 out:
7574         btrfs_free_path(path);
7575         if (!ret)
7576                 return !recorded_data_ref;
7577         else
7578                 return ret;
7579 }
7580
7581 /*
7582  * when an incorrect extent item is found, this will delete
7583  * all of the existing entries for it and recreate them
7584  * based on what the tree scan found.
7585  */
7586 static int fixup_extent_refs(struct btrfs_fs_info *info,
7587                              struct cache_tree *extent_cache,
7588                              struct extent_record *rec)
7589 {
7590         struct btrfs_trans_handle *trans = NULL;
7591         int ret;
7592         struct btrfs_path *path;
7593         struct cache_extent *cache;
7594         struct extent_backref *back, *tmp;
7595         int allocated = 0;
7596         u64 flags = 0;
7597
7598         if (rec->flag_block_full_backref)
7599                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7600
7601         path = btrfs_alloc_path();
7602         if (!path)
7603                 return -ENOMEM;
7604
7605         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7606                 /*
7607                  * Sometimes the backrefs themselves are so broken they don't
7608                  * get attached to any meaningful rec, so first go back and
7609                  * check any of our backrefs that we couldn't find and throw
7610                  * them into the list if we find the backref so that
7611                  * verify_backrefs can figure out what to do.
7612                  */
7613                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7614                 if (ret < 0)
7615                         goto out;
7616         }
7617
7618         /* step one, make sure all of the backrefs agree */
7619         ret = verify_backrefs(info, path, rec);
7620         if (ret < 0)
7621                 goto out;
7622
7623         trans = btrfs_start_transaction(info->extent_root, 1);
7624         if (IS_ERR(trans)) {
7625                 ret = PTR_ERR(trans);
7626                 goto out;
7627         }
7628
7629         /* step two, delete all the existing records */
7630         ret = delete_extent_records(trans, info->extent_root, path,
7631                                     rec->start, rec->max_size);
7632
7633         if (ret < 0)
7634                 goto out;
7635
7636         /* was this block corrupt?  If so, don't add references to it */
7637         cache = lookup_cache_extent(info->corrupt_blocks,
7638                                     rec->start, rec->max_size);
7639         if (cache) {
7640                 ret = 0;
7641                 goto out;
7642         }
7643
7644         /* step three, recreate all the refs we did find */
7645         rbtree_postorder_for_each_entry_safe(back, tmp,
7646                                              &rec->backref_tree, node) {
7647                 /*
7648                  * if we didn't find any references, don't create a
7649                  * new extent record
7650                  */
7651                 if (!back->found_ref)
7652                         continue;
7653
7654                 rec->bad_full_backref = 0;
7655                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7656                 allocated = 1;
7657
7658                 if (ret)
7659                         goto out;
7660         }
7661 out:
7662         if (trans) {
7663                 int err = btrfs_commit_transaction(trans, info->extent_root);
7664                 if (!ret)
7665                         ret = err;
7666         }
7667
7668         btrfs_free_path(path);
7669         return ret;
7670 }
7671
7672 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7673                               struct extent_record *rec)
7674 {
7675         struct btrfs_trans_handle *trans;
7676         struct btrfs_root *root = fs_info->extent_root;
7677         struct btrfs_path *path;
7678         struct btrfs_extent_item *ei;
7679         struct btrfs_key key;
7680         u64 flags;
7681         int ret = 0;
7682
7683         key.objectid = rec->start;
7684         if (rec->metadata) {
7685                 key.type = BTRFS_METADATA_ITEM_KEY;
7686                 key.offset = rec->info_level;
7687         } else {
7688                 key.type = BTRFS_EXTENT_ITEM_KEY;
7689                 key.offset = rec->max_size;
7690         }
7691
7692         path = btrfs_alloc_path();
7693         if (!path)
7694                 return -ENOMEM;
7695
7696         trans = btrfs_start_transaction(root, 0);
7697         if (IS_ERR(trans)) {
7698                 btrfs_free_path(path);
7699                 return PTR_ERR(trans);
7700         }
7701
7702         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7703         if (ret < 0) {
7704                 btrfs_free_path(path);
7705                 btrfs_commit_transaction(trans, root);
7706                 return ret;
7707         } else if (ret) {
7708                 fprintf(stderr, "Didn't find extent for %llu\n",
7709                         (unsigned long long)rec->start);
7710                 btrfs_free_path(path);
7711                 btrfs_commit_transaction(trans, root);
7712                 return -ENOENT;
7713         }
7714
7715         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7716                             struct btrfs_extent_item);
7717         flags = btrfs_extent_flags(path->nodes[0], ei);
7718         if (rec->flag_block_full_backref) {
7719                 fprintf(stderr, "setting full backref on %llu\n",
7720                         (unsigned long long)key.objectid);
7721                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7722         } else {
7723                 fprintf(stderr, "clearing full backref on %llu\n",
7724                         (unsigned long long)key.objectid);
7725                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7726         }
7727         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7728         btrfs_mark_buffer_dirty(path->nodes[0]);
7729         btrfs_free_path(path);
7730         return btrfs_commit_transaction(trans, root);
7731 }
7732
7733 /* right now we only prune from the extent allocation tree */
7734 static int prune_one_block(struct btrfs_trans_handle *trans,
7735                            struct btrfs_fs_info *info,
7736                            struct btrfs_corrupt_block *corrupt)
7737 {
7738         int ret;
7739         struct btrfs_path path;
7740         struct extent_buffer *eb;
7741         u64 found;
7742         int slot;
7743         int nritems;
7744         int level = corrupt->level + 1;
7745
7746         btrfs_init_path(&path);
7747 again:
7748         /* we want to stop at the parent to our busted block */
7749         path.lowest_level = level;
7750
7751         ret = btrfs_search_slot(trans, info->extent_root,
7752                                 &corrupt->key, &path, -1, 1);
7753
7754         if (ret < 0)
7755                 goto out;
7756
7757         eb = path.nodes[level];
7758         if (!eb) {
7759                 ret = -ENOENT;
7760                 goto out;
7761         }
7762
7763         /*
7764          * hopefully the search gave us the block we want to prune,
7765          * lets try that first
7766          */
7767         slot = path.slots[level];
7768         found =  btrfs_node_blockptr(eb, slot);
7769         if (found == corrupt->cache.start)
7770                 goto del_ptr;
7771
7772         nritems = btrfs_header_nritems(eb);
7773
7774         /* the search failed, lets scan this node and hope we find it */
7775         for (slot = 0; slot < nritems; slot++) {
7776                 found =  btrfs_node_blockptr(eb, slot);
7777                 if (found == corrupt->cache.start)
7778                         goto del_ptr;
7779         }
7780         /*
7781          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7782          * to this block
7783          */
7784         if (eb == info->extent_root->node) {
7785                 ret = -ENOENT;
7786                 goto out;
7787         } else {
7788                 level++;
7789                 btrfs_release_path(&path);
7790                 goto again;
7791         }
7792
7793 del_ptr:
7794         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7795         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7796
7797 out:
7798         btrfs_release_path(&path);
7799         return ret;
7800 }
7801
7802 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7803 {
7804         struct btrfs_trans_handle *trans = NULL;
7805         struct cache_extent *cache;
7806         struct btrfs_corrupt_block *corrupt;
7807
7808         while (1) {
7809                 cache = search_cache_extent(info->corrupt_blocks, 0);
7810                 if (!cache)
7811                         break;
7812                 if (!trans) {
7813                         trans = btrfs_start_transaction(info->extent_root, 1);
7814                         if (IS_ERR(trans))
7815                                 return PTR_ERR(trans);
7816                 }
7817                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7818                 prune_one_block(trans, info, corrupt);
7819                 remove_cache_extent(info->corrupt_blocks, cache);
7820         }
7821         if (trans)
7822                 return btrfs_commit_transaction(trans, info->extent_root);
7823         return 0;
7824 }
7825
7826 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7827 {
7828         struct btrfs_block_group_cache *cache;
7829         u64 start, end;
7830         int ret;
7831
7832         while (1) {
7833                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7834                                             &start, &end, EXTENT_DIRTY);
7835                 if (ret)
7836                         break;
7837                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7838                                    GFP_NOFS);
7839         }
7840
7841         start = 0;
7842         while (1) {
7843                 cache = btrfs_lookup_first_block_group(fs_info, start);
7844                 if (!cache)
7845                         break;
7846                 if (cache->cached)
7847                         cache->cached = 0;
7848                 start = cache->key.objectid + cache->key.offset;
7849         }
7850 }
7851
7852 static int check_extent_refs(struct btrfs_root *root,
7853                              struct cache_tree *extent_cache)
7854 {
7855         struct extent_record *rec;
7856         struct cache_extent *cache;
7857         int err = 0;
7858         int ret = 0;
7859         int fixed = 0;
7860         int had_dups = 0;
7861         int recorded = 0;
7862
7863         if (repair) {
7864                 /*
7865                  * if we're doing a repair, we have to make sure
7866                  * we don't allocate from the problem extents.
7867                  * In the worst case, this will be all the
7868                  * extents in the FS
7869                  */
7870                 cache = search_cache_extent(extent_cache, 0);
7871                 while(cache) {
7872                         rec = container_of(cache, struct extent_record, cache);
7873                         set_extent_dirty(root->fs_info->excluded_extents,
7874                                          rec->start,
7875                                          rec->start + rec->max_size - 1,
7876                                          GFP_NOFS);
7877                         cache = next_cache_extent(cache);
7878                 }
7879
7880                 /* pin down all the corrupted blocks too */
7881                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7882                 while(cache) {
7883                         set_extent_dirty(root->fs_info->excluded_extents,
7884                                          cache->start,
7885                                          cache->start + cache->size - 1,
7886                                          GFP_NOFS);
7887                         cache = next_cache_extent(cache);
7888                 }
7889                 prune_corrupt_blocks(root->fs_info);
7890                 reset_cached_block_groups(root->fs_info);
7891         }
7892
7893         reset_cached_block_groups(root->fs_info);
7894
7895         /*
7896          * We need to delete any duplicate entries we find first otherwise we
7897          * could mess up the extent tree when we have backrefs that actually
7898          * belong to a different extent item and not the weird duplicate one.
7899          */
7900         while (repair && !list_empty(&duplicate_extents)) {
7901                 rec = to_extent_record(duplicate_extents.next);
7902                 list_del_init(&rec->list);
7903
7904                 /* Sometimes we can find a backref before we find an actual
7905                  * extent, so we need to process it a little bit to see if there
7906                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7907                  * if this is a backref screwup.  If we need to delete stuff
7908                  * process_duplicates() will return 0, otherwise it will return
7909                  * 1 and we
7910                  */
7911                 if (process_duplicates(root, extent_cache, rec))
7912                         continue;
7913                 ret = delete_duplicate_records(root, rec);
7914                 if (ret < 0)
7915                         return ret;
7916                 /*
7917                  * delete_duplicate_records will return the number of entries
7918                  * deleted, so if it's greater than 0 then we know we actually
7919                  * did something and we need to remove.
7920                  */
7921                 if (ret)
7922                         had_dups = 1;
7923         }
7924
7925         if (had_dups)
7926                 return -EAGAIN;
7927
7928         while(1) {
7929                 int cur_err = 0;
7930
7931                 fixed = 0;
7932                 recorded = 0;
7933                 cache = search_cache_extent(extent_cache, 0);
7934                 if (!cache)
7935                         break;
7936                 rec = container_of(cache, struct extent_record, cache);
7937                 if (rec->num_duplicates) {
7938                         fprintf(stderr, "extent item %llu has multiple extent "
7939                                 "items\n", (unsigned long long)rec->start);
7940                         err = 1;
7941                         cur_err = 1;
7942                 }
7943
7944                 if (rec->refs != rec->extent_item_refs) {
7945                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7946                                 (unsigned long long)rec->start,
7947                                 (unsigned long long)rec->nr);
7948                         fprintf(stderr, "extent item %llu, found %llu\n",
7949                                 (unsigned long long)rec->extent_item_refs,
7950                                 (unsigned long long)rec->refs);
7951                         ret = record_orphan_data_extents(root->fs_info, rec);
7952                         if (ret < 0)
7953                                 goto repair_abort;
7954                         if (ret == 0) {
7955                                 recorded = 1;
7956                         } else {
7957                                 /*
7958                                  * we can't use the extent to repair file
7959                                  * extent, let the fallback method handle it.
7960                                  */
7961                                 if (!fixed && repair) {
7962                                         ret = fixup_extent_refs(
7963                                                         root->fs_info,
7964                                                         extent_cache, rec);
7965                                         if (ret)
7966                                                 goto repair_abort;
7967                                         fixed = 1;
7968                                 }
7969                         }
7970                         err = 1;
7971                         cur_err = 1;
7972                 }
7973                 if (all_backpointers_checked(rec, 1)) {
7974                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7975                                 (unsigned long long)rec->start,
7976                                 (unsigned long long)rec->nr);
7977
7978                         if (!fixed && !recorded && repair) {
7979                                 ret = fixup_extent_refs(root->fs_info,
7980                                                         extent_cache, rec);
7981                                 if (ret)
7982                                         goto repair_abort;
7983                                 fixed = 1;
7984                         }
7985                         cur_err = 1;
7986                         err = 1;
7987                 }
7988                 if (!rec->owner_ref_checked) {
7989                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7990                                 (unsigned long long)rec->start,
7991                                 (unsigned long long)rec->nr);
7992                         if (!fixed && !recorded && repair) {
7993                                 ret = fixup_extent_refs(root->fs_info,
7994                                                         extent_cache, rec);
7995                                 if (ret)
7996                                         goto repair_abort;
7997                                 fixed = 1;
7998                         }
7999                         err = 1;
8000                         cur_err = 1;
8001                 }
8002                 if (rec->bad_full_backref) {
8003                         fprintf(stderr, "bad full backref, on [%llu]\n",
8004                                 (unsigned long long)rec->start);
8005                         if (repair) {
8006                                 ret = fixup_extent_flags(root->fs_info, rec);
8007                                 if (ret)
8008                                         goto repair_abort;
8009                                 fixed = 1;
8010                         }
8011                         err = 1;
8012                         cur_err = 1;
8013                 }
8014                 /*
8015                  * Although it's not a extent ref's problem, we reuse this
8016                  * routine for error reporting.
8017                  * No repair function yet.
8018                  */
8019                 if (rec->crossing_stripes) {
8020                         fprintf(stderr,
8021                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8022                                 rec->start, rec->start + rec->max_size);
8023                         err = 1;
8024                         cur_err = 1;
8025                 }
8026
8027                 if (rec->wrong_chunk_type) {
8028                         fprintf(stderr,
8029                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
8030                                 rec->start, rec->start + rec->max_size);
8031                         err = 1;
8032                         cur_err = 1;
8033                 }
8034
8035                 remove_cache_extent(extent_cache, cache);
8036                 free_all_extent_backrefs(rec);
8037                 if (!init_extent_tree && repair && (!cur_err || fixed))
8038                         clear_extent_dirty(root->fs_info->excluded_extents,
8039                                            rec->start,
8040                                            rec->start + rec->max_size - 1,
8041                                            GFP_NOFS);
8042                 free(rec);
8043         }
8044 repair_abort:
8045         if (repair) {
8046                 if (ret && ret != -EAGAIN) {
8047                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8048                         exit(1);
8049                 } else if (!ret) {
8050                         struct btrfs_trans_handle *trans;
8051
8052                         root = root->fs_info->extent_root;
8053                         trans = btrfs_start_transaction(root, 1);
8054                         if (IS_ERR(trans)) {
8055                                 ret = PTR_ERR(trans);
8056                                 goto repair_abort;
8057                         }
8058
8059                         btrfs_fix_block_accounting(trans, root);
8060                         ret = btrfs_commit_transaction(trans, root);
8061                         if (ret)
8062                                 goto repair_abort;
8063                 }
8064                 if (err)
8065                         fprintf(stderr, "repaired damaged extent references\n");
8066                 return ret;
8067         }
8068         return err;
8069 }
8070
8071 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8072 {
8073         u64 stripe_size;
8074
8075         if (type & BTRFS_BLOCK_GROUP_RAID0) {
8076                 stripe_size = length;
8077                 stripe_size /= num_stripes;
8078         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8079                 stripe_size = length * 2;
8080                 stripe_size /= num_stripes;
8081         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8082                 stripe_size = length;
8083                 stripe_size /= (num_stripes - 1);
8084         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8085                 stripe_size = length;
8086                 stripe_size /= (num_stripes - 2);
8087         } else {
8088                 stripe_size = length;
8089         }
8090         return stripe_size;
8091 }
8092
8093 /*
8094  * Check the chunk with its block group/dev list ref:
8095  * Return 0 if all refs seems valid.
8096  * Return 1 if part of refs seems valid, need later check for rebuild ref
8097  * like missing block group and needs to search extent tree to rebuild them.
8098  * Return -1 if essential refs are missing and unable to rebuild.
8099  */
8100 static int check_chunk_refs(struct chunk_record *chunk_rec,
8101                             struct block_group_tree *block_group_cache,
8102                             struct device_extent_tree *dev_extent_cache,
8103                             int silent)
8104 {
8105         struct cache_extent *block_group_item;
8106         struct block_group_record *block_group_rec;
8107         struct cache_extent *dev_extent_item;
8108         struct device_extent_record *dev_extent_rec;
8109         u64 devid;
8110         u64 offset;
8111         u64 length;
8112         int metadump_v2 = 0;
8113         int i;
8114         int ret = 0;
8115
8116         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8117                                                chunk_rec->offset,
8118                                                chunk_rec->length);
8119         if (block_group_item) {
8120                 block_group_rec = container_of(block_group_item,
8121                                                struct block_group_record,
8122                                                cache);
8123                 if (chunk_rec->length != block_group_rec->offset ||
8124                     chunk_rec->offset != block_group_rec->objectid ||
8125                     (!metadump_v2 &&
8126                      chunk_rec->type_flags != block_group_rec->flags)) {
8127                         if (!silent)
8128                                 fprintf(stderr,
8129                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8130                                         chunk_rec->objectid,
8131                                         chunk_rec->type,
8132                                         chunk_rec->offset,
8133                                         chunk_rec->length,
8134                                         chunk_rec->offset,
8135                                         chunk_rec->type_flags,
8136                                         block_group_rec->objectid,
8137                                         block_group_rec->type,
8138                                         block_group_rec->offset,
8139                                         block_group_rec->offset,
8140                                         block_group_rec->objectid,
8141                                         block_group_rec->flags);
8142                         ret = -1;
8143                 } else {
8144                         list_del_init(&block_group_rec->list);
8145                         chunk_rec->bg_rec = block_group_rec;
8146                 }
8147         } else {
8148                 if (!silent)
8149                         fprintf(stderr,
8150                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8151                                 chunk_rec->objectid,
8152                                 chunk_rec->type,
8153                                 chunk_rec->offset,
8154                                 chunk_rec->length,
8155                                 chunk_rec->offset,
8156                                 chunk_rec->type_flags);
8157                 ret = 1;
8158         }
8159
8160         if (metadump_v2)
8161                 return ret;
8162
8163         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8164                                     chunk_rec->num_stripes);
8165         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8166                 devid = chunk_rec->stripes[i].devid;
8167                 offset = chunk_rec->stripes[i].offset;
8168                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8169                                                        devid, offset, length);
8170                 if (dev_extent_item) {
8171                         dev_extent_rec = container_of(dev_extent_item,
8172                                                 struct device_extent_record,
8173                                                 cache);
8174                         if (dev_extent_rec->objectid != devid ||
8175                             dev_extent_rec->offset != offset ||
8176                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8177                             dev_extent_rec->length != length) {
8178                                 if (!silent)
8179                                         fprintf(stderr,
8180                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8181                                                 chunk_rec->objectid,
8182                                                 chunk_rec->type,
8183                                                 chunk_rec->offset,
8184                                                 chunk_rec->stripes[i].devid,
8185                                                 chunk_rec->stripes[i].offset,
8186                                                 dev_extent_rec->objectid,
8187                                                 dev_extent_rec->offset,
8188                                                 dev_extent_rec->length);
8189                                 ret = -1;
8190                         } else {
8191                                 list_move(&dev_extent_rec->chunk_list,
8192                                           &chunk_rec->dextents);
8193                         }
8194                 } else {
8195                         if (!silent)
8196                                 fprintf(stderr,
8197                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8198                                         chunk_rec->objectid,
8199                                         chunk_rec->type,
8200                                         chunk_rec->offset,
8201                                         chunk_rec->stripes[i].devid,
8202                                         chunk_rec->stripes[i].offset);
8203                         ret = -1;
8204                 }
8205         }
8206         return ret;
8207 }
8208
8209 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8210 int check_chunks(struct cache_tree *chunk_cache,
8211                  struct block_group_tree *block_group_cache,
8212                  struct device_extent_tree *dev_extent_cache,
8213                  struct list_head *good, struct list_head *bad,
8214                  struct list_head *rebuild, int silent)
8215 {
8216         struct cache_extent *chunk_item;
8217         struct chunk_record *chunk_rec;
8218         struct block_group_record *bg_rec;
8219         struct device_extent_record *dext_rec;
8220         int err;
8221         int ret = 0;
8222
8223         chunk_item = first_cache_extent(chunk_cache);
8224         while (chunk_item) {
8225                 chunk_rec = container_of(chunk_item, struct chunk_record,
8226                                          cache);
8227                 err = check_chunk_refs(chunk_rec, block_group_cache,
8228                                        dev_extent_cache, silent);
8229                 if (err < 0)
8230                         ret = err;
8231                 if (err == 0 && good)
8232                         list_add_tail(&chunk_rec->list, good);
8233                 if (err > 0 && rebuild)
8234                         list_add_tail(&chunk_rec->list, rebuild);
8235                 if (err < 0 && bad)
8236                         list_add_tail(&chunk_rec->list, bad);
8237                 chunk_item = next_cache_extent(chunk_item);
8238         }
8239
8240         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8241                 if (!silent)
8242                         fprintf(stderr,
8243                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8244                                 bg_rec->objectid,
8245                                 bg_rec->offset,
8246                                 bg_rec->flags);
8247                 if (!ret)
8248                         ret = 1;
8249         }
8250
8251         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8252                             chunk_list) {
8253                 if (!silent)
8254                         fprintf(stderr,
8255                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8256                                 dext_rec->objectid,
8257                                 dext_rec->offset,
8258                                 dext_rec->length);
8259                 if (!ret)
8260                         ret = 1;
8261         }
8262         return ret;
8263 }
8264
8265
8266 static int check_device_used(struct device_record *dev_rec,
8267                              struct device_extent_tree *dext_cache)
8268 {
8269         struct cache_extent *cache;
8270         struct device_extent_record *dev_extent_rec;
8271         u64 total_byte = 0;
8272
8273         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8274         while (cache) {
8275                 dev_extent_rec = container_of(cache,
8276                                               struct device_extent_record,
8277                                               cache);
8278                 if (dev_extent_rec->objectid != dev_rec->devid)
8279                         break;
8280
8281                 list_del_init(&dev_extent_rec->device_list);
8282                 total_byte += dev_extent_rec->length;
8283                 cache = next_cache_extent(cache);
8284         }
8285
8286         if (total_byte != dev_rec->byte_used) {
8287                 fprintf(stderr,
8288                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8289                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8290                         dev_rec->type, dev_rec->offset);
8291                 return -1;
8292         } else {
8293                 return 0;
8294         }
8295 }
8296
8297 /* check btrfs_dev_item -> btrfs_dev_extent */
8298 static int check_devices(struct rb_root *dev_cache,
8299                          struct device_extent_tree *dev_extent_cache)
8300 {
8301         struct rb_node *dev_node;
8302         struct device_record *dev_rec;
8303         struct device_extent_record *dext_rec;
8304         int err;
8305         int ret = 0;
8306
8307         dev_node = rb_first(dev_cache);
8308         while (dev_node) {
8309                 dev_rec = container_of(dev_node, struct device_record, node);
8310                 err = check_device_used(dev_rec, dev_extent_cache);
8311                 if (err)
8312                         ret = err;
8313
8314                 dev_node = rb_next(dev_node);
8315         }
8316         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8317                             device_list) {
8318                 fprintf(stderr,
8319                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8320                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8321                 if (!ret)
8322                         ret = 1;
8323         }
8324         return ret;
8325 }
8326
8327 static int add_root_item_to_list(struct list_head *head,
8328                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8329                                   u8 level, u8 drop_level,
8330                                   int level_size, struct btrfs_key *drop_key)
8331 {
8332
8333         struct root_item_record *ri_rec;
8334         ri_rec = malloc(sizeof(*ri_rec));
8335         if (!ri_rec)
8336                 return -ENOMEM;
8337         ri_rec->bytenr = bytenr;
8338         ri_rec->objectid = objectid;
8339         ri_rec->level = level;
8340         ri_rec->level_size = level_size;
8341         ri_rec->drop_level = drop_level;
8342         ri_rec->last_snapshot = last_snapshot;
8343         if (drop_key)
8344                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8345         list_add_tail(&ri_rec->list, head);
8346
8347         return 0;
8348 }
8349
8350 static void free_root_item_list(struct list_head *list)
8351 {
8352         struct root_item_record *ri_rec;
8353
8354         while (!list_empty(list)) {
8355                 ri_rec = list_first_entry(list, struct root_item_record,
8356                                           list);
8357                 list_del_init(&ri_rec->list);
8358                 free(ri_rec);
8359         }
8360 }
8361
8362 static int deal_root_from_list(struct list_head *list,
8363                                struct btrfs_root *root,
8364                                struct block_info *bits,
8365                                int bits_nr,
8366                                struct cache_tree *pending,
8367                                struct cache_tree *seen,
8368                                struct cache_tree *reada,
8369                                struct cache_tree *nodes,
8370                                struct cache_tree *extent_cache,
8371                                struct cache_tree *chunk_cache,
8372                                struct rb_root *dev_cache,
8373                                struct block_group_tree *block_group_cache,
8374                                struct device_extent_tree *dev_extent_cache)
8375 {
8376         int ret = 0;
8377         u64 last;
8378
8379         while (!list_empty(list)) {
8380                 struct root_item_record *rec;
8381                 struct extent_buffer *buf;
8382                 rec = list_entry(list->next,
8383                                  struct root_item_record, list);
8384                 last = 0;
8385                 buf = read_tree_block(root->fs_info->tree_root,
8386                                       rec->bytenr, rec->level_size, 0);
8387                 if (!extent_buffer_uptodate(buf)) {
8388                         free_extent_buffer(buf);
8389                         ret = -EIO;
8390                         break;
8391                 }
8392                 add_root_to_pending(buf, extent_cache, pending,
8393                                     seen, nodes, rec->objectid);
8394                 /*
8395                  * To rebuild extent tree, we need deal with snapshot
8396                  * one by one, otherwise we deal with node firstly which
8397                  * can maximize readahead.
8398                  */
8399                 while (1) {
8400                         ret = run_next_block(root, bits, bits_nr, &last,
8401                                              pending, seen, reada, nodes,
8402                                              extent_cache, chunk_cache,
8403                                              dev_cache, block_group_cache,
8404                                              dev_extent_cache, rec);
8405                         if (ret != 0)
8406                                 break;
8407                 }
8408                 free_extent_buffer(buf);
8409                 list_del(&rec->list);
8410                 free(rec);
8411                 if (ret < 0)
8412                         break;
8413         }
8414         while (ret >= 0) {
8415                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8416                                      reada, nodes, extent_cache, chunk_cache,
8417                                      dev_cache, block_group_cache,
8418                                      dev_extent_cache, NULL);
8419                 if (ret != 0) {
8420                         if (ret > 0)
8421                                 ret = 0;
8422                         break;
8423                 }
8424         }
8425         return ret;
8426 }
8427
8428 static int check_chunks_and_extents(struct btrfs_root *root)
8429 {
8430         struct rb_root dev_cache;
8431         struct cache_tree chunk_cache;
8432         struct block_group_tree block_group_cache;
8433         struct device_extent_tree dev_extent_cache;
8434         struct cache_tree extent_cache;
8435         struct cache_tree seen;
8436         struct cache_tree pending;
8437         struct cache_tree reada;
8438         struct cache_tree nodes;
8439         struct extent_io_tree excluded_extents;
8440         struct cache_tree corrupt_blocks;
8441         struct btrfs_path path;
8442         struct btrfs_key key;
8443         struct btrfs_key found_key;
8444         int ret, err = 0;
8445         struct block_info *bits;
8446         int bits_nr;
8447         struct extent_buffer *leaf;
8448         int slot;
8449         struct btrfs_root_item ri;
8450         struct list_head dropping_trees;
8451         struct list_head normal_trees;
8452         struct btrfs_root *root1;
8453         u64 objectid;
8454         u32 level_size;
8455         u8 level;
8456
8457         dev_cache = RB_ROOT;
8458         cache_tree_init(&chunk_cache);
8459         block_group_tree_init(&block_group_cache);
8460         device_extent_tree_init(&dev_extent_cache);
8461
8462         cache_tree_init(&extent_cache);
8463         cache_tree_init(&seen);
8464         cache_tree_init(&pending);
8465         cache_tree_init(&nodes);
8466         cache_tree_init(&reada);
8467         cache_tree_init(&corrupt_blocks);
8468         extent_io_tree_init(&excluded_extents);
8469         INIT_LIST_HEAD(&dropping_trees);
8470         INIT_LIST_HEAD(&normal_trees);
8471
8472         if (repair) {
8473                 root->fs_info->excluded_extents = &excluded_extents;
8474                 root->fs_info->fsck_extent_cache = &extent_cache;
8475                 root->fs_info->free_extent_hook = free_extent_hook;
8476                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8477         }
8478
8479         bits_nr = 1024;
8480         bits = malloc(bits_nr * sizeof(struct block_info));
8481         if (!bits) {
8482                 perror("malloc");
8483                 exit(1);
8484         }
8485
8486         if (ctx.progress_enabled) {
8487                 ctx.tp = TASK_EXTENTS;
8488                 task_start(ctx.info);
8489         }
8490
8491 again:
8492         root1 = root->fs_info->tree_root;
8493         level = btrfs_header_level(root1->node);
8494         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8495                                     root1->node->start, 0, level, 0,
8496                                     root1->nodesize, NULL);
8497         if (ret < 0)
8498                 goto out;
8499         root1 = root->fs_info->chunk_root;
8500         level = btrfs_header_level(root1->node);
8501         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8502                                     root1->node->start, 0, level, 0,
8503                                     root1->nodesize, NULL);
8504         if (ret < 0)
8505                 goto out;
8506         btrfs_init_path(&path);
8507         key.offset = 0;
8508         key.objectid = 0;
8509         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8510         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8511                                         &key, &path, 0, 0);
8512         if (ret < 0)
8513                 goto out;
8514         while(1) {
8515                 leaf = path.nodes[0];
8516                 slot = path.slots[0];
8517                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8518                         ret = btrfs_next_leaf(root, &path);
8519                         if (ret != 0)
8520                                 break;
8521                         leaf = path.nodes[0];
8522                         slot = path.slots[0];
8523                 }
8524                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8525                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8526                         unsigned long offset;
8527                         u64 last_snapshot;
8528
8529                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8530                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8531                         last_snapshot = btrfs_root_last_snapshot(&ri);
8532                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8533                                 level = btrfs_root_level(&ri);
8534                                 level_size = root->nodesize;
8535                                 ret = add_root_item_to_list(&normal_trees,
8536                                                 found_key.objectid,
8537                                                 btrfs_root_bytenr(&ri),
8538                                                 last_snapshot, level,
8539                                                 0, level_size, NULL);
8540                                 if (ret < 0)
8541                                         goto out;
8542                         } else {
8543                                 level = btrfs_root_level(&ri);
8544                                 level_size = root->nodesize;
8545                                 objectid = found_key.objectid;
8546                                 btrfs_disk_key_to_cpu(&found_key,
8547                                                       &ri.drop_progress);
8548                                 ret = add_root_item_to_list(&dropping_trees,
8549                                                 objectid,
8550                                                 btrfs_root_bytenr(&ri),
8551                                                 last_snapshot, level,
8552                                                 ri.drop_level,
8553                                                 level_size, &found_key);
8554                                 if (ret < 0)
8555                                         goto out;
8556                         }
8557                 }
8558                 path.slots[0]++;
8559         }
8560         btrfs_release_path(&path);
8561
8562         /*
8563          * check_block can return -EAGAIN if it fixes something, please keep
8564          * this in mind when dealing with return values from these functions, if
8565          * we get -EAGAIN we want to fall through and restart the loop.
8566          */
8567         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8568                                   &seen, &reada, &nodes, &extent_cache,
8569                                   &chunk_cache, &dev_cache, &block_group_cache,
8570                                   &dev_extent_cache);
8571         if (ret < 0) {
8572                 if (ret == -EAGAIN)
8573                         goto loop;
8574                 goto out;
8575         }
8576         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8577                                   &pending, &seen, &reada, &nodes,
8578                                   &extent_cache, &chunk_cache, &dev_cache,
8579                                   &block_group_cache, &dev_extent_cache);
8580         if (ret < 0) {
8581                 if (ret == -EAGAIN)
8582                         goto loop;
8583                 goto out;
8584         }
8585
8586         ret = check_chunks(&chunk_cache, &block_group_cache,
8587                            &dev_extent_cache, NULL, NULL, NULL, 0);
8588         if (ret) {
8589                 if (ret == -EAGAIN)
8590                         goto loop;
8591                 err = ret;
8592         }
8593
8594         ret = check_extent_refs(root, &extent_cache);
8595         if (ret < 0) {
8596                 if (ret == -EAGAIN)
8597                         goto loop;
8598                 goto out;
8599         }
8600
8601         ret = check_devices(&dev_cache, &dev_extent_cache);
8602         if (ret && err)
8603                 ret = err;
8604
8605 out:
8606         task_stop(ctx.info);
8607         if (repair) {
8608                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8609                 extent_io_tree_cleanup(&excluded_extents);
8610                 root->fs_info->fsck_extent_cache = NULL;
8611                 root->fs_info->free_extent_hook = NULL;
8612                 root->fs_info->corrupt_blocks = NULL;
8613                 root->fs_info->excluded_extents = NULL;
8614         }
8615         free(bits);
8616         free_chunk_cache_tree(&chunk_cache);
8617         free_device_cache_tree(&dev_cache);
8618         free_block_group_tree(&block_group_cache);
8619         free_device_extent_tree(&dev_extent_cache);
8620         free_extent_cache_tree(&seen);
8621         free_extent_cache_tree(&pending);
8622         free_extent_cache_tree(&reada);
8623         free_extent_cache_tree(&nodes);
8624         return ret;
8625 loop:
8626         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8627         free_extent_cache_tree(&seen);
8628         free_extent_cache_tree(&pending);
8629         free_extent_cache_tree(&reada);
8630         free_extent_cache_tree(&nodes);
8631         free_chunk_cache_tree(&chunk_cache);
8632         free_block_group_tree(&block_group_cache);
8633         free_device_cache_tree(&dev_cache);
8634         free_device_extent_tree(&dev_extent_cache);
8635         free_extent_record_cache(root->fs_info, &extent_cache);
8636         free_root_item_list(&normal_trees);
8637         free_root_item_list(&dropping_trees);
8638         extent_io_tree_cleanup(&excluded_extents);
8639         goto again;
8640 }
8641
8642 /*
8643  * Check backrefs of a tree block given by @bytenr or @eb.
8644  *
8645  * @root:       the root containing the @bytenr or @eb
8646  * @eb:         tree block extent buffer, can be NULL
8647  * @bytenr:     bytenr of the tree block to search
8648  * @level:      tree level of the tree block
8649  * @owner:      owner of the tree block
8650  *
8651  * Return >0 for any error found and output error message
8652  * Return 0 for no error found
8653  */
8654 static int check_tree_block_ref(struct btrfs_root *root,
8655                                 struct extent_buffer *eb, u64 bytenr,
8656                                 int level, u64 owner)
8657 {
8658         struct btrfs_key key;
8659         struct btrfs_root *extent_root = root->fs_info->extent_root;
8660         struct btrfs_path path;
8661         struct btrfs_extent_item *ei;
8662         struct btrfs_extent_inline_ref *iref;
8663         struct extent_buffer *leaf;
8664         unsigned long end;
8665         unsigned long ptr;
8666         int slot;
8667         int skinny_level;
8668         int type;
8669         u32 nodesize = root->nodesize;
8670         u32 item_size;
8671         u64 offset;
8672         int found_ref = 0;
8673         int err = 0;
8674         int ret;
8675
8676         btrfs_init_path(&path);
8677         key.objectid = bytenr;
8678         if (btrfs_fs_incompat(root->fs_info,
8679                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8680                 key.type = BTRFS_METADATA_ITEM_KEY;
8681         else
8682                 key.type = BTRFS_EXTENT_ITEM_KEY;
8683         key.offset = (u64)-1;
8684
8685         /* Search for the backref in extent tree */
8686         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8687         if (ret < 0) {
8688                 err |= BACKREF_MISSING;
8689                 goto out;
8690         }
8691         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8692         if (ret) {
8693                 err |= BACKREF_MISSING;
8694                 goto out;
8695         }
8696
8697         leaf = path.nodes[0];
8698         slot = path.slots[0];
8699         btrfs_item_key_to_cpu(leaf, &key, slot);
8700
8701         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8702
8703         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8704                 skinny_level = (int)key.offset;
8705                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8706         } else {
8707                 struct btrfs_tree_block_info *info;
8708
8709                 info = (struct btrfs_tree_block_info *)(ei + 1);
8710                 skinny_level = btrfs_tree_block_level(leaf, info);
8711                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8712         }
8713
8714         if (eb) {
8715                 u64 header_gen;
8716                 u64 extent_gen;
8717
8718                 if (!(btrfs_extent_flags(leaf, ei) &
8719                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8720                         error(
8721                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8722                                 key.objectid, nodesize,
8723                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8724                         err = BACKREF_MISMATCH;
8725                 }
8726                 header_gen = btrfs_header_generation(eb);
8727                 extent_gen = btrfs_extent_generation(leaf, ei);
8728                 if (header_gen != extent_gen) {
8729                         error(
8730         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8731                                 key.objectid, nodesize, header_gen,
8732                                 extent_gen);
8733                         err = BACKREF_MISMATCH;
8734                 }
8735                 if (level != skinny_level) {
8736                         error(
8737                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8738                                 key.objectid, nodesize, level, skinny_level);
8739                         err = BACKREF_MISMATCH;
8740                 }
8741                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8742                         error(
8743                         "extent[%llu %u] is referred by other roots than %llu",
8744                                 key.objectid, nodesize, root->objectid);
8745                         err = BACKREF_MISMATCH;
8746                 }
8747         }
8748
8749         /*
8750          * Iterate the extent/metadata item to find the exact backref
8751          */
8752         item_size = btrfs_item_size_nr(leaf, slot);
8753         ptr = (unsigned long)iref;
8754         end = (unsigned long)ei + item_size;
8755         while (ptr < end) {
8756                 iref = (struct btrfs_extent_inline_ref *)ptr;
8757                 type = btrfs_extent_inline_ref_type(leaf, iref);
8758                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8759
8760                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8761                         (offset == root->objectid || offset == owner)) {
8762                         found_ref = 1;
8763                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8764                         /* Check if the backref points to valid referencer */
8765                         found_ref = !check_tree_block_ref(root, NULL, offset,
8766                                                           level + 1, owner);
8767                 }
8768
8769                 if (found_ref)
8770                         break;
8771                 ptr += btrfs_extent_inline_ref_size(type);
8772         }
8773
8774         /*
8775          * Inlined extent item doesn't have what we need, check
8776          * TREE_BLOCK_REF_KEY
8777          */
8778         if (!found_ref) {
8779                 btrfs_release_path(&path);
8780                 key.objectid = bytenr;
8781                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8782                 key.offset = root->objectid;
8783
8784                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8785                 if (!ret)
8786                         found_ref = 1;
8787         }
8788         if (!found_ref)
8789                 err |= BACKREF_MISSING;
8790 out:
8791         btrfs_release_path(&path);
8792         if (eb && (err & BACKREF_MISSING))
8793                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8794                         bytenr, nodesize, owner, level);
8795         return err;
8796 }
8797
8798 /*
8799  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8800  *
8801  * Return >0 any error found and output error message
8802  * Return 0 for no error found
8803  */
8804 static int check_extent_data_item(struct btrfs_root *root,
8805                                   struct extent_buffer *eb, int slot)
8806 {
8807         struct btrfs_file_extent_item *fi;
8808         struct btrfs_path path;
8809         struct btrfs_root *extent_root = root->fs_info->extent_root;
8810         struct btrfs_key fi_key;
8811         struct btrfs_key dbref_key;
8812         struct extent_buffer *leaf;
8813         struct btrfs_extent_item *ei;
8814         struct btrfs_extent_inline_ref *iref;
8815         struct btrfs_extent_data_ref *dref;
8816         u64 owner;
8817         u64 file_extent_gen;
8818         u64 disk_bytenr;
8819         u64 disk_num_bytes;
8820         u64 extent_num_bytes;
8821         u64 extent_flags;
8822         u64 extent_gen;
8823         u32 item_size;
8824         unsigned long end;
8825         unsigned long ptr;
8826         int type;
8827         u64 ref_root;
8828         int found_dbackref = 0;
8829         int err = 0;
8830         int ret;
8831
8832         btrfs_item_key_to_cpu(eb, &fi_key, slot);
8833         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8834         file_extent_gen = btrfs_file_extent_generation(eb, fi);
8835
8836         /* Nothing to check for hole and inline data extents */
8837         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8838             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8839                 return 0;
8840
8841         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8842         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8843         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8844
8845         /* Check unaligned disk_num_bytes and num_bytes */
8846         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8847                 error(
8848 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8849                         fi_key.objectid, fi_key.offset, disk_num_bytes,
8850                         root->sectorsize);
8851                 err |= BYTES_UNALIGNED;
8852         } else {
8853                 data_bytes_allocated += disk_num_bytes;
8854         }
8855         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8856                 error(
8857 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8858                         fi_key.objectid, fi_key.offset, extent_num_bytes,
8859                         root->sectorsize);
8860                 err |= BYTES_UNALIGNED;
8861         } else {
8862                 data_bytes_referenced += extent_num_bytes;
8863         }
8864         owner = btrfs_header_owner(eb);
8865
8866         /* Check the extent item of the file extent in extent tree */
8867         btrfs_init_path(&path);
8868         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8869         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8870         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8871
8872         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8873         if (ret) {
8874                 err |= BACKREF_MISSING;
8875                 goto error;
8876         }
8877
8878         leaf = path.nodes[0];
8879         slot = path.slots[0];
8880         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8881
8882         extent_flags = btrfs_extent_flags(leaf, ei);
8883         extent_gen = btrfs_extent_generation(leaf, ei);
8884
8885         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8886                 error(
8887                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8888                     disk_bytenr, disk_num_bytes,
8889                     BTRFS_EXTENT_FLAG_DATA);
8890                 err |= BACKREF_MISMATCH;
8891         }
8892
8893         if (file_extent_gen < extent_gen) {
8894                 error(
8895 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8896                         disk_bytenr, disk_num_bytes, file_extent_gen,
8897                         extent_gen);
8898                 err |= BACKREF_MISMATCH;
8899         }
8900
8901         /* Check data backref inside that extent item */
8902         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8903         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8904         ptr = (unsigned long)iref;
8905         end = (unsigned long)ei + item_size;
8906         while (ptr < end) {
8907                 iref = (struct btrfs_extent_inline_ref *)ptr;
8908                 type = btrfs_extent_inline_ref_type(leaf, iref);
8909                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8910
8911                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8912                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
8913                         if (ref_root == owner || ref_root == root->objectid)
8914                                 found_dbackref = 1;
8915                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8916                         found_dbackref = !check_tree_block_ref(root, NULL,
8917                                 btrfs_extent_inline_ref_offset(leaf, iref),
8918                                 0, owner);
8919                 }
8920
8921                 if (found_dbackref)
8922                         break;
8923                 ptr += btrfs_extent_inline_ref_size(type);
8924         }
8925
8926         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8927         if (!found_dbackref) {
8928                 btrfs_release_path(&path);
8929
8930                 btrfs_init_path(&path);
8931                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8932                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8933                 dbref_key.offset = hash_extent_data_ref(root->objectid,
8934                                 fi_key.objectid, fi_key.offset);
8935
8936                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8937                                         &dbref_key, &path, 0, 0);
8938                 if (!ret)
8939                         found_dbackref = 1;
8940         }
8941
8942         if (!found_dbackref)
8943                 err |= BACKREF_MISSING;
8944 error:
8945         btrfs_release_path(&path);
8946         if (err & BACKREF_MISSING) {
8947                 error("data extent[%llu %llu] backref lost",
8948                       disk_bytenr, disk_num_bytes);
8949         }
8950         return err;
8951 }
8952
8953 /*
8954  * Get real tree block level for the case like shared block
8955  * Return >= 0 as tree level
8956  * Return <0 for error
8957  */
8958 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8959 {
8960         struct extent_buffer *eb;
8961         struct btrfs_path path;
8962         struct btrfs_key key;
8963         struct btrfs_extent_item *ei;
8964         u64 flags;
8965         u64 transid;
8966         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8967         u8 backref_level;
8968         u8 header_level;
8969         int ret;
8970
8971         /* Search extent tree for extent generation and level */
8972         key.objectid = bytenr;
8973         key.type = BTRFS_METADATA_ITEM_KEY;
8974         key.offset = (u64)-1;
8975
8976         btrfs_init_path(&path);
8977         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8978         if (ret < 0)
8979                 goto release_out;
8980         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8981         if (ret < 0)
8982                 goto release_out;
8983         if (ret > 0) {
8984                 ret = -ENOENT;
8985                 goto release_out;
8986         }
8987
8988         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8989         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8990                             struct btrfs_extent_item);
8991         flags = btrfs_extent_flags(path.nodes[0], ei);
8992         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8993                 ret = -ENOENT;
8994                 goto release_out;
8995         }
8996
8997         /* Get transid for later read_tree_block() check */
8998         transid = btrfs_extent_generation(path.nodes[0], ei);
8999
9000         /* Get backref level as one source */
9001         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9002                 backref_level = key.offset;
9003         } else {
9004                 struct btrfs_tree_block_info *info;
9005
9006                 info = (struct btrfs_tree_block_info *)(ei + 1);
9007                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9008         }
9009         btrfs_release_path(&path);
9010
9011         /* Get level from tree block as an alternative source */
9012         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9013         if (!extent_buffer_uptodate(eb)) {
9014                 free_extent_buffer(eb);
9015                 return -EIO;
9016         }
9017         header_level = btrfs_header_level(eb);
9018         free_extent_buffer(eb);
9019
9020         if (header_level != backref_level)
9021                 return -EIO;
9022         return header_level;
9023
9024 release_out:
9025         btrfs_release_path(&path);
9026         return ret;
9027 }
9028
9029 /*
9030  * Check if a tree block backref is valid (points to a valid tree block)
9031  * if level == -1, level will be resolved
9032  * Return >0 for any error found and print error message
9033  */
9034 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9035                                     u64 bytenr, int level)
9036 {
9037         struct btrfs_root *root;
9038         struct btrfs_key key;
9039         struct btrfs_path path;
9040         struct extent_buffer *eb;
9041         struct extent_buffer *node;
9042         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9043         int err = 0;
9044         int ret;
9045
9046         /* Query level for level == -1 special case */
9047         if (level == -1)
9048                 level = query_tree_block_level(fs_info, bytenr);
9049         if (level < 0) {
9050                 err |= REFERENCER_MISSING;
9051                 goto out;
9052         }
9053
9054         key.objectid = root_id;
9055         key.type = BTRFS_ROOT_ITEM_KEY;
9056         key.offset = (u64)-1;
9057
9058         root = btrfs_read_fs_root(fs_info, &key);
9059         if (IS_ERR(root)) {
9060                 err |= REFERENCER_MISSING;
9061                 goto out;
9062         }
9063
9064         /* Read out the tree block to get item/node key */
9065         eb = read_tree_block(root, bytenr, root->nodesize, 0);
9066         if (!extent_buffer_uptodate(eb)) {
9067                 err |= REFERENCER_MISSING;
9068                 free_extent_buffer(eb);
9069                 goto out;
9070         }
9071
9072         /* Empty tree, no need to check key */
9073         if (!btrfs_header_nritems(eb) && !level) {
9074                 free_extent_buffer(eb);
9075                 goto out;
9076         }
9077
9078         if (level)
9079                 btrfs_node_key_to_cpu(eb, &key, 0);
9080         else
9081                 btrfs_item_key_to_cpu(eb, &key, 0);
9082
9083         free_extent_buffer(eb);
9084
9085         btrfs_init_path(&path);
9086         /* Search with the first key, to ensure we can reach it */
9087         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9088         if (ret) {
9089                 err |= REFERENCER_MISSING;
9090                 goto release_out;
9091         }
9092
9093         node = path.nodes[level];
9094         if (btrfs_header_bytenr(node) != bytenr) {
9095                 error(
9096         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9097                         bytenr, nodesize, bytenr,
9098                         btrfs_header_bytenr(node));
9099                 err |= REFERENCER_MISMATCH;
9100         }
9101         if (btrfs_header_level(node) != level) {
9102                 error(
9103         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9104                         bytenr, nodesize, level,
9105                         btrfs_header_level(node));
9106                 err |= REFERENCER_MISMATCH;
9107         }
9108
9109 release_out:
9110         btrfs_release_path(&path);
9111 out:
9112         if (err & REFERENCER_MISSING) {
9113                 if (level < 0)
9114                         error("extent [%llu %d] lost referencer (owner: %llu)",
9115                                 bytenr, nodesize, root_id);
9116                 else
9117                         error(
9118                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9119                                 bytenr, nodesize, root_id, level);
9120         }
9121
9122         return err;
9123 }
9124
9125 /*
9126  * Check referencer for shared block backref
9127  * If level == -1, this function will resolve the level.
9128  */
9129 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9130                                      u64 parent, u64 bytenr, int level)
9131 {
9132         struct extent_buffer *eb;
9133         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9134         u32 nr;
9135         int found_parent = 0;
9136         int i;
9137
9138         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9139         if (!extent_buffer_uptodate(eb))
9140                 goto out;
9141
9142         if (level == -1)
9143                 level = query_tree_block_level(fs_info, bytenr);
9144         if (level < 0)
9145                 goto out;
9146
9147         if (level + 1 != btrfs_header_level(eb))
9148                 goto out;
9149
9150         nr = btrfs_header_nritems(eb);
9151         for (i = 0; i < nr; i++) {
9152                 if (bytenr == btrfs_node_blockptr(eb, i)) {
9153                         found_parent = 1;
9154                         break;
9155                 }
9156         }
9157 out:
9158         free_extent_buffer(eb);
9159         if (!found_parent) {
9160                 error(
9161         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9162                         bytenr, nodesize, parent, level);
9163                 return REFERENCER_MISSING;
9164         }
9165         return 0;
9166 }
9167
9168 /*
9169  * Check referencer for normal (inlined) data ref
9170  * If len == 0, it will be resolved by searching in extent tree
9171  */
9172 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9173                                      u64 root_id, u64 objectid, u64 offset,
9174                                      u64 bytenr, u64 len, u32 count)
9175 {
9176         struct btrfs_root *root;
9177         struct btrfs_root *extent_root = fs_info->extent_root;
9178         struct btrfs_key key;
9179         struct btrfs_path path;
9180         struct extent_buffer *leaf;
9181         struct btrfs_file_extent_item *fi;
9182         u32 found_count = 0;
9183         int slot;
9184         int ret = 0;
9185
9186         if (!len) {
9187                 key.objectid = bytenr;
9188                 key.type = BTRFS_EXTENT_ITEM_KEY;
9189                 key.offset = (u64)-1;
9190
9191                 btrfs_init_path(&path);
9192                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9193                 if (ret < 0)
9194                         goto out;
9195                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9196                 if (ret)
9197                         goto out;
9198                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9199                 if (key.objectid != bytenr ||
9200                     key.type != BTRFS_EXTENT_ITEM_KEY)
9201                         goto out;
9202                 len = key.offset;
9203                 btrfs_release_path(&path);
9204         }
9205         key.objectid = root_id;
9206         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
9207         key.offset = (u64)-1;
9208         btrfs_init_path(&path);
9209
9210         root = btrfs_read_fs_root(fs_info, &key);
9211         if (IS_ERR(root))
9212                 goto out;
9213
9214         key.objectid = objectid;
9215         key.type = BTRFS_EXTENT_DATA_KEY;
9216         /*
9217          * It can be nasty as data backref offset is
9218          * file offset - file extent offset, which is smaller or
9219          * equal to original backref offset.  The only special case is
9220          * overflow.  So we need to special check and do further search.
9221          */
9222         key.offset = offset & (1ULL << 63) ? 0 : offset;
9223
9224         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9225         if (ret < 0)
9226                 goto out;
9227
9228         /*
9229          * Search afterwards to get correct one
9230          * NOTE: As we must do a comprehensive check on the data backref to
9231          * make sure the dref count also matches, we must iterate all file
9232          * extents for that inode.
9233          */
9234         while (1) {
9235                 leaf = path.nodes[0];
9236                 slot = path.slots[0];
9237
9238                 btrfs_item_key_to_cpu(leaf, &key, slot);
9239                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9240                         break;
9241                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9242                 /*
9243                  * Except normal disk bytenr and disk num bytes, we still
9244                  * need to do extra check on dbackref offset as
9245                  * dbackref offset = file_offset - file_extent_offset
9246                  */
9247                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9248                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9249                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9250                     offset)
9251                         found_count++;
9252
9253                 ret = btrfs_next_item(root, &path);
9254                 if (ret)
9255                         break;
9256         }
9257 out:
9258         btrfs_release_path(&path);
9259         if (found_count != count) {
9260                 error(
9261 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9262                         bytenr, len, root_id, objectid, offset, count, found_count);
9263                 return REFERENCER_MISSING;
9264         }
9265         return 0;
9266 }
9267
9268 /*
9269  * Check if the referencer of a shared data backref exists
9270  */
9271 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9272                                      u64 parent, u64 bytenr)
9273 {
9274         struct extent_buffer *eb;
9275         struct btrfs_key key;
9276         struct btrfs_file_extent_item *fi;
9277         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9278         u32 nr;
9279         int found_parent = 0;
9280         int i;
9281
9282         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9283         if (!extent_buffer_uptodate(eb))
9284                 goto out;
9285
9286         nr = btrfs_header_nritems(eb);
9287         for (i = 0; i < nr; i++) {
9288                 btrfs_item_key_to_cpu(eb, &key, i);
9289                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9290                         continue;
9291
9292                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9293                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9294                         continue;
9295
9296                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9297                         found_parent = 1;
9298                         break;
9299                 }
9300         }
9301
9302 out:
9303         free_extent_buffer(eb);
9304         if (!found_parent) {
9305                 error("shared extent %llu referencer lost (parent: %llu)",
9306                         bytenr, parent);
9307                 return REFERENCER_MISSING;
9308         }
9309         return 0;
9310 }
9311
9312 /*
9313  * This function will check a given extent item, including its backref and
9314  * itself (like crossing stripe boundary and type)
9315  *
9316  * Since we don't use extent_record anymore, introduce new error bit
9317  */
9318 static int check_extent_item(struct btrfs_fs_info *fs_info,
9319                              struct extent_buffer *eb, int slot)
9320 {
9321         struct btrfs_extent_item *ei;
9322         struct btrfs_extent_inline_ref *iref;
9323         struct btrfs_extent_data_ref *dref;
9324         unsigned long end;
9325         unsigned long ptr;
9326         int type;
9327         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9328         u32 item_size = btrfs_item_size_nr(eb, slot);
9329         u64 flags;
9330         u64 offset;
9331         int metadata = 0;
9332         int level;
9333         struct btrfs_key key;
9334         int ret;
9335         int err = 0;
9336
9337         btrfs_item_key_to_cpu(eb, &key, slot);
9338         if (key.type == BTRFS_EXTENT_ITEM_KEY)
9339                 bytes_used += key.offset;
9340         else
9341                 bytes_used += nodesize;
9342
9343         if (item_size < sizeof(*ei)) {
9344                 /*
9345                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9346                  * old thing when on disk format is still un-determined.
9347                  * No need to care about it anymore
9348                  */
9349                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9350                 return -ENOTTY;
9351         }
9352
9353         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9354         flags = btrfs_extent_flags(eb, ei);
9355
9356         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9357                 metadata = 1;
9358         if (metadata && check_crossing_stripes(key.objectid, eb->len)) {
9359                 error("bad metadata [%llu, %llu) crossing stripe boundary",
9360                       key.objectid, key.objectid + nodesize);
9361                 err |= CROSSING_STRIPE_BOUNDARY;
9362         }
9363
9364         ptr = (unsigned long)(ei + 1);
9365
9366         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9367                 /* Old EXTENT_ITEM metadata */
9368                 struct btrfs_tree_block_info *info;
9369
9370                 info = (struct btrfs_tree_block_info *)ptr;
9371                 level = btrfs_tree_block_level(eb, info);
9372                 ptr += sizeof(struct btrfs_tree_block_info);
9373         } else {
9374                 /* New METADATA_ITEM */
9375                 level = key.offset;
9376         }
9377         end = (unsigned long)ei + item_size;
9378
9379         if (ptr >= end) {
9380                 err |= ITEM_SIZE_MISMATCH;
9381                 goto out;
9382         }
9383
9384         /* Now check every backref in this extent item */
9385 next:
9386         iref = (struct btrfs_extent_inline_ref *)ptr;
9387         type = btrfs_extent_inline_ref_type(eb, iref);
9388         offset = btrfs_extent_inline_ref_offset(eb, iref);
9389         switch (type) {
9390         case BTRFS_TREE_BLOCK_REF_KEY:
9391                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9392                                                level);
9393                 err |= ret;
9394                 break;
9395         case BTRFS_SHARED_BLOCK_REF_KEY:
9396                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9397                                                  level);
9398                 err |= ret;
9399                 break;
9400         case BTRFS_EXTENT_DATA_REF_KEY:
9401                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9402                 ret = check_extent_data_backref(fs_info,
9403                                 btrfs_extent_data_ref_root(eb, dref),
9404                                 btrfs_extent_data_ref_objectid(eb, dref),
9405                                 btrfs_extent_data_ref_offset(eb, dref),
9406                                 key.objectid, key.offset,
9407                                 btrfs_extent_data_ref_count(eb, dref));
9408                 err |= ret;
9409                 break;
9410         case BTRFS_SHARED_DATA_REF_KEY:
9411                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9412                 err |= ret;
9413                 break;
9414         default:
9415                 error("extent[%llu %d %llu] has unknown ref type: %d",
9416                         key.objectid, key.type, key.offset, type);
9417                 err |= UNKNOWN_TYPE;
9418                 goto out;
9419         }
9420
9421         ptr += btrfs_extent_inline_ref_size(type);
9422         if (ptr < end)
9423                 goto next;
9424
9425 out:
9426         return err;
9427 }
9428
9429 /*
9430  * Check if a dev extent item is referred correctly by its chunk
9431  */
9432 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9433                                  struct extent_buffer *eb, int slot)
9434 {
9435         struct btrfs_root *chunk_root = fs_info->chunk_root;
9436         struct btrfs_dev_extent *ptr;
9437         struct btrfs_path path;
9438         struct btrfs_key chunk_key;
9439         struct btrfs_key devext_key;
9440         struct btrfs_chunk *chunk;
9441         struct extent_buffer *l;
9442         int num_stripes;
9443         u64 length;
9444         int i;
9445         int found_chunk = 0;
9446         int ret;
9447
9448         btrfs_item_key_to_cpu(eb, &devext_key, slot);
9449         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9450         length = btrfs_dev_extent_length(eb, ptr);
9451
9452         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9453         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9454         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9455
9456         btrfs_init_path(&path);
9457         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9458         if (ret)
9459                 goto out;
9460
9461         l = path.nodes[0];
9462         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9463         if (btrfs_chunk_length(l, chunk) != length)
9464                 goto out;
9465
9466         num_stripes = btrfs_chunk_num_stripes(l, chunk);
9467         for (i = 0; i < num_stripes; i++) {
9468                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9469                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9470
9471                 if (devid == devext_key.objectid &&
9472                     offset == devext_key.offset) {
9473                         found_chunk = 1;
9474                         break;
9475                 }
9476         }
9477 out:
9478         btrfs_release_path(&path);
9479         if (!found_chunk) {
9480                 error(
9481                 "device extent[%llu, %llu, %llu] did not find the related chunk",
9482                         devext_key.objectid, devext_key.offset, length);
9483                 return REFERENCER_MISSING;
9484         }
9485         return 0;
9486 }
9487
9488 /*
9489  * Check if the used space is correct with the dev item
9490  */
9491 static int check_dev_item(struct btrfs_fs_info *fs_info,
9492                           struct extent_buffer *eb, int slot)
9493 {
9494         struct btrfs_root *dev_root = fs_info->dev_root;
9495         struct btrfs_dev_item *dev_item;
9496         struct btrfs_path path;
9497         struct btrfs_key key;
9498         struct btrfs_dev_extent *ptr;
9499         u64 dev_id;
9500         u64 used;
9501         u64 total = 0;
9502         int ret;
9503
9504         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9505         dev_id = btrfs_device_id(eb, dev_item);
9506         used = btrfs_device_bytes_used(eb, dev_item);
9507
9508         key.objectid = dev_id;
9509         key.type = BTRFS_DEV_EXTENT_KEY;
9510         key.offset = 0;
9511
9512         btrfs_init_path(&path);
9513         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9514         if (ret < 0) {
9515                 btrfs_item_key_to_cpu(eb, &key, slot);
9516                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9517                         key.objectid, key.type, key.offset);
9518                 btrfs_release_path(&path);
9519                 return REFERENCER_MISSING;
9520         }
9521
9522         /* Iterate dev_extents to calculate the used space of a device */
9523         while (1) {
9524                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9525
9526                 if (key.objectid > dev_id)
9527                         break;
9528                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9529                         goto next;
9530
9531                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9532                                      struct btrfs_dev_extent);
9533                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9534 next:
9535                 ret = btrfs_next_item(dev_root, &path);
9536                 if (ret)
9537                         break;
9538         }
9539         btrfs_release_path(&path);
9540
9541         if (used != total) {
9542                 btrfs_item_key_to_cpu(eb, &key, slot);
9543                 error(
9544 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9545                         total, used, BTRFS_ROOT_TREE_OBJECTID,
9546                         BTRFS_DEV_EXTENT_KEY, dev_id);
9547                 return ACCOUNTING_MISMATCH;
9548         }
9549         return 0;
9550 }
9551
9552 /*
9553  * Check a block group item with its referener (chunk) and its used space
9554  * with extent/metadata item
9555  */
9556 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9557                                   struct extent_buffer *eb, int slot)
9558 {
9559         struct btrfs_root *extent_root = fs_info->extent_root;
9560         struct btrfs_root *chunk_root = fs_info->chunk_root;
9561         struct btrfs_block_group_item *bi;
9562         struct btrfs_block_group_item bg_item;
9563         struct btrfs_path path;
9564         struct btrfs_key bg_key;
9565         struct btrfs_key chunk_key;
9566         struct btrfs_key extent_key;
9567         struct btrfs_chunk *chunk;
9568         struct extent_buffer *leaf;
9569         struct btrfs_extent_item *ei;
9570         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9571         u64 flags;
9572         u64 bg_flags;
9573         u64 used;
9574         u64 total = 0;
9575         int ret;
9576         int err = 0;
9577
9578         btrfs_item_key_to_cpu(eb, &bg_key, slot);
9579         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9580         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9581         used = btrfs_block_group_used(&bg_item);
9582         bg_flags = btrfs_block_group_flags(&bg_item);
9583
9584         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9585         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9586         chunk_key.offset = bg_key.objectid;
9587
9588         btrfs_init_path(&path);
9589         /* Search for the referencer chunk */
9590         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9591         if (ret) {
9592                 error(
9593                 "block group[%llu %llu] did not find the related chunk item",
9594                         bg_key.objectid, bg_key.offset);
9595                 err |= REFERENCER_MISSING;
9596         } else {
9597                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9598                                         struct btrfs_chunk);
9599                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9600                                                 bg_key.offset) {
9601                         error(
9602         "block group[%llu %llu] related chunk item length does not match",
9603                                 bg_key.objectid, bg_key.offset);
9604                         err |= REFERENCER_MISMATCH;
9605                 }
9606         }
9607         btrfs_release_path(&path);
9608
9609         /* Search from the block group bytenr */
9610         extent_key.objectid = bg_key.objectid;
9611         extent_key.type = 0;
9612         extent_key.offset = 0;
9613
9614         btrfs_init_path(&path);
9615         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9616         if (ret < 0)
9617                 goto out;
9618
9619         /* Iterate extent tree to account used space */
9620         while (1) {
9621                 leaf = path.nodes[0];
9622                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9623                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9624                         break;
9625
9626                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9627                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9628                         goto next;
9629                 if (extent_key.objectid < bg_key.objectid)
9630                         goto next;
9631
9632                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9633                         total += nodesize;
9634                 else
9635                         total += extent_key.offset;
9636
9637                 ei = btrfs_item_ptr(leaf, path.slots[0],
9638                                     struct btrfs_extent_item);
9639                 flags = btrfs_extent_flags(leaf, ei);
9640                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9641                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9642                                 error(
9643                         "bad extent[%llu, %llu) type mismatch with chunk",
9644                                         extent_key.objectid,
9645                                         extent_key.objectid + extent_key.offset);
9646                                 err |= CHUNK_TYPE_MISMATCH;
9647                         }
9648                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9649                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9650                                     BTRFS_BLOCK_GROUP_METADATA))) {
9651                                 error(
9652                         "bad extent[%llu, %llu) type mismatch with chunk",
9653                                         extent_key.objectid,
9654                                         extent_key.objectid + nodesize);
9655                                 err |= CHUNK_TYPE_MISMATCH;
9656                         }
9657                 }
9658 next:
9659                 ret = btrfs_next_item(extent_root, &path);
9660                 if (ret)
9661                         break;
9662         }
9663
9664 out:
9665         btrfs_release_path(&path);
9666
9667         if (total != used) {
9668                 error(
9669                 "block group[%llu %llu] used %llu but extent items used %llu",
9670                         bg_key.objectid, bg_key.offset, used, total);
9671                 err |= ACCOUNTING_MISMATCH;
9672         }
9673         return err;
9674 }
9675
9676 /*
9677  * Check a chunk item.
9678  * Including checking all referred dev_extents and block group
9679  */
9680 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9681                             struct extent_buffer *eb, int slot)
9682 {
9683         struct btrfs_root *extent_root = fs_info->extent_root;
9684         struct btrfs_root *dev_root = fs_info->dev_root;
9685         struct btrfs_path path;
9686         struct btrfs_key chunk_key;
9687         struct btrfs_key bg_key;
9688         struct btrfs_key devext_key;
9689         struct btrfs_chunk *chunk;
9690         struct extent_buffer *leaf;
9691         struct btrfs_block_group_item *bi;
9692         struct btrfs_block_group_item bg_item;
9693         struct btrfs_dev_extent *ptr;
9694         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9695         u64 length;
9696         u64 chunk_end;
9697         u64 type;
9698         u64 profile;
9699         int num_stripes;
9700         u64 offset;
9701         u64 objectid;
9702         int i;
9703         int ret;
9704         int err = 0;
9705
9706         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9707         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9708         length = btrfs_chunk_length(eb, chunk);
9709         chunk_end = chunk_key.offset + length;
9710         if (!IS_ALIGNED(length, sectorsize)) {
9711                 error("chunk[%llu %llu) not aligned to %u",
9712                         chunk_key.offset, chunk_end, sectorsize);
9713                 err |= BYTES_UNALIGNED;
9714                 goto out;
9715         }
9716
9717         type = btrfs_chunk_type(eb, chunk);
9718         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9719         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9720                 error("chunk[%llu %llu) has no chunk type",
9721                         chunk_key.offset, chunk_end);
9722                 err |= UNKNOWN_TYPE;
9723         }
9724         if (profile && (profile & (profile - 1))) {
9725                 error("chunk[%llu %llu) multiple profiles detected: %llx",
9726                         chunk_key.offset, chunk_end, profile);
9727                 err |= UNKNOWN_TYPE;
9728         }
9729
9730         bg_key.objectid = chunk_key.offset;
9731         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9732         bg_key.offset = length;
9733
9734         btrfs_init_path(&path);
9735         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9736         if (ret) {
9737                 error(
9738                 "chunk[%llu %llu) did not find the related block group item",
9739                         chunk_key.offset, chunk_end);
9740                 err |= REFERENCER_MISSING;
9741         } else{
9742                 leaf = path.nodes[0];
9743                 bi = btrfs_item_ptr(leaf, path.slots[0],
9744                                     struct btrfs_block_group_item);
9745                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9746                                    sizeof(bg_item));
9747                 if (btrfs_block_group_flags(&bg_item) != type) {
9748                         error(
9749 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9750                                 chunk_key.offset, chunk_end, type,
9751                                 btrfs_block_group_flags(&bg_item));
9752                         err |= REFERENCER_MISSING;
9753                 }
9754         }
9755
9756         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9757         for (i = 0; i < num_stripes; i++) {
9758                 btrfs_release_path(&path);
9759                 btrfs_init_path(&path);
9760                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9761                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9762                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9763
9764                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9765                                         0, 0);
9766                 if (ret)
9767                         goto not_match_dev;
9768
9769                 leaf = path.nodes[0];
9770                 ptr = btrfs_item_ptr(leaf, path.slots[0],
9771                                      struct btrfs_dev_extent);
9772                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9773                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9774                 if (objectid != chunk_key.objectid ||
9775                     offset != chunk_key.offset ||
9776                     btrfs_dev_extent_length(leaf, ptr) != length)
9777                         goto not_match_dev;
9778                 continue;
9779 not_match_dev:
9780                 err |= BACKREF_MISSING;
9781                 error(
9782                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9783                         chunk_key.objectid, chunk_end, i);
9784                 continue;
9785         }
9786         btrfs_release_path(&path);
9787 out:
9788         return err;
9789 }
9790
9791 /*
9792  * Main entry function to check known items and update related accounting info
9793  */
9794 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9795 {
9796         struct btrfs_fs_info *fs_info = root->fs_info;
9797         struct btrfs_key key;
9798         int slot = 0;
9799         int type;
9800         struct btrfs_extent_data_ref *dref;
9801         int ret;
9802         int err = 0;
9803
9804 next:
9805         btrfs_item_key_to_cpu(eb, &key, slot);
9806         type = btrfs_key_type(&key);
9807
9808         switch (type) {
9809         case BTRFS_EXTENT_DATA_KEY:
9810                 ret = check_extent_data_item(root, eb, slot);
9811                 err |= ret;
9812                 break;
9813         case BTRFS_BLOCK_GROUP_ITEM_KEY:
9814                 ret = check_block_group_item(fs_info, eb, slot);
9815                 err |= ret;
9816                 break;
9817         case BTRFS_DEV_ITEM_KEY:
9818                 ret = check_dev_item(fs_info, eb, slot);
9819                 err |= ret;
9820                 break;
9821         case BTRFS_CHUNK_ITEM_KEY:
9822                 ret = check_chunk_item(fs_info, eb, slot);
9823                 err |= ret;
9824                 break;
9825         case BTRFS_DEV_EXTENT_KEY:
9826                 ret = check_dev_extent_item(fs_info, eb, slot);
9827                 err |= ret;
9828                 break;
9829         case BTRFS_EXTENT_ITEM_KEY:
9830         case BTRFS_METADATA_ITEM_KEY:
9831                 ret = check_extent_item(fs_info, eb, slot);
9832                 err |= ret;
9833                 break;
9834         case BTRFS_EXTENT_CSUM_KEY:
9835                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9836                 break;
9837         case BTRFS_TREE_BLOCK_REF_KEY:
9838                 ret = check_tree_block_backref(fs_info, key.offset,
9839                                                key.objectid, -1);
9840                 err |= ret;
9841                 break;
9842         case BTRFS_EXTENT_DATA_REF_KEY:
9843                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9844                 ret = check_extent_data_backref(fs_info,
9845                                 btrfs_extent_data_ref_root(eb, dref),
9846                                 btrfs_extent_data_ref_objectid(eb, dref),
9847                                 btrfs_extent_data_ref_offset(eb, dref),
9848                                 key.objectid, 0,
9849                                 btrfs_extent_data_ref_count(eb, dref));
9850                 err |= ret;
9851                 break;
9852         case BTRFS_SHARED_BLOCK_REF_KEY:
9853                 ret = check_shared_block_backref(fs_info, key.offset,
9854                                                  key.objectid, -1);
9855                 err |= ret;
9856                 break;
9857         case BTRFS_SHARED_DATA_REF_KEY:
9858                 ret = check_shared_data_backref(fs_info, key.offset,
9859                                                 key.objectid);
9860                 err |= ret;
9861                 break;
9862         default:
9863                 break;
9864         }
9865
9866         if (++slot < btrfs_header_nritems(eb))
9867                 goto next;
9868
9869         return err;
9870 }
9871
9872 /*
9873  * Helper function for later fs/subvol tree check.  To determine if a tree
9874  * block should be checked.
9875  * This function will ensure only the direct referencer with lowest rootid to
9876  * check a fs/subvolume tree block.
9877  *
9878  * Backref check at extent tree would detect errors like missing subvolume
9879  * tree, so we can do aggressive check to reduce duplicated checks.
9880  */
9881 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9882 {
9883         struct btrfs_root *extent_root = root->fs_info->extent_root;
9884         struct btrfs_key key;
9885         struct btrfs_path path;
9886         struct extent_buffer *leaf;
9887         int slot;
9888         struct btrfs_extent_item *ei;
9889         unsigned long ptr;
9890         unsigned long end;
9891         int type;
9892         u32 item_size;
9893         u64 offset;
9894         struct btrfs_extent_inline_ref *iref;
9895         int ret;
9896
9897         btrfs_init_path(&path);
9898         key.objectid = btrfs_header_bytenr(eb);
9899         key.type = BTRFS_METADATA_ITEM_KEY;
9900         key.offset = (u64)-1;
9901
9902         /*
9903          * Any failure in backref resolving means we can't determine
9904          * whom the tree block belongs to.
9905          * So in that case, we need to check that tree block
9906          */
9907         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9908         if (ret < 0)
9909                 goto need_check;
9910
9911         ret = btrfs_previous_extent_item(extent_root, &path,
9912                                          btrfs_header_bytenr(eb));
9913         if (ret)
9914                 goto need_check;
9915
9916         leaf = path.nodes[0];
9917         slot = path.slots[0];
9918         btrfs_item_key_to_cpu(leaf, &key, slot);
9919         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9920
9921         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9922                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9923         } else {
9924                 struct btrfs_tree_block_info *info;
9925
9926                 info = (struct btrfs_tree_block_info *)(ei + 1);
9927                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9928         }
9929
9930         item_size = btrfs_item_size_nr(leaf, slot);
9931         ptr = (unsigned long)iref;
9932         end = (unsigned long)ei + item_size;
9933         while (ptr < end) {
9934                 iref = (struct btrfs_extent_inline_ref *)ptr;
9935                 type = btrfs_extent_inline_ref_type(leaf, iref);
9936                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9937
9938                 /*
9939                  * We only check the tree block if current root is
9940                  * the lowest referencer of it.
9941                  */
9942                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9943                     offset < root->objectid) {
9944                         btrfs_release_path(&path);
9945                         return 0;
9946                 }
9947
9948                 ptr += btrfs_extent_inline_ref_size(type);
9949         }
9950         /*
9951          * Normally we should also check keyed tree block ref, but that may be
9952          * very time consuming.  Inlined ref should already make us skip a lot
9953          * of refs now.  So skip search keyed tree block ref.
9954          */
9955
9956 need_check:
9957         btrfs_release_path(&path);
9958         return 1;
9959 }
9960
9961 /*
9962  * Traversal function for tree block. We will do:
9963  * 1) Skip shared fs/subvolume tree blocks
9964  * 2) Update related bytes accounting
9965  * 3) Pre-order traversal
9966  */
9967 static int traverse_tree_block(struct btrfs_root *root,
9968                                 struct extent_buffer *node)
9969 {
9970         struct extent_buffer *eb;
9971         int level;
9972         u64 nr;
9973         int i;
9974         int err = 0;
9975         int ret;
9976
9977         /*
9978          * Skip shared fs/subvolume tree block, in that case they will
9979          * be checked by referencer with lowest rootid
9980          */
9981         if (is_fstree(root->objectid) && !should_check(root, node))
9982                 return 0;
9983
9984         /* Update bytes accounting */
9985         total_btree_bytes += node->len;
9986         if (fs_root_objectid(btrfs_header_owner(node)))
9987                 total_fs_tree_bytes += node->len;
9988         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
9989                 total_extent_tree_bytes += node->len;
9990         if (!found_old_backref &&
9991             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
9992             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
9993             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
9994                 found_old_backref = 1;
9995
9996         /* pre-order tranversal, check itself first */
9997         level = btrfs_header_level(node);
9998         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
9999                                    btrfs_header_level(node),
10000                                    btrfs_header_owner(node));
10001         err |= ret;
10002         if (err)
10003                 error(
10004         "check %s failed root %llu bytenr %llu level %d, force continue check",
10005                         level ? "node":"leaf", root->objectid,
10006                         btrfs_header_bytenr(node), btrfs_header_level(node));
10007
10008         if (!level) {
10009                 btree_space_waste += btrfs_leaf_free_space(root, node);
10010                 ret = check_leaf_items(root, node);
10011                 err |= ret;
10012                 return err;
10013         }
10014
10015         nr = btrfs_header_nritems(node);
10016         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10017                 sizeof(struct btrfs_key_ptr);
10018
10019         /* Then check all its children */
10020         for (i = 0; i < nr; i++) {
10021                 u64 blocknr = btrfs_node_blockptr(node, i);
10022
10023                 /*
10024                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10025                  * to call the function itself.
10026                  */
10027                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10028                 if (extent_buffer_uptodate(eb)) {
10029                         ret = traverse_tree_block(root, eb);
10030                         err |= ret;
10031                 }
10032                 free_extent_buffer(eb);
10033         }
10034
10035         return err;
10036 }
10037
10038 /*
10039  * Low memory usage version check_chunks_and_extents.
10040  */
10041 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10042 {
10043         struct btrfs_path path;
10044         struct btrfs_key key;
10045         struct btrfs_root *root1;
10046         struct btrfs_root *cur_root;
10047         int err = 0;
10048         int ret;
10049
10050         root1 = root->fs_info->chunk_root;
10051         ret = traverse_tree_block(root1, root1->node);
10052         err |= ret;
10053
10054         root1 = root->fs_info->tree_root;
10055         ret = traverse_tree_block(root1, root1->node);
10056         err |= ret;
10057
10058         btrfs_init_path(&path);
10059         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10060         key.offset = 0;
10061         key.type = BTRFS_ROOT_ITEM_KEY;
10062
10063         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10064         if (ret) {
10065                 error("cannot find extent treet in tree_root");
10066                 goto out;
10067         }
10068
10069         while (1) {
10070                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10071                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10072                         goto next;
10073                 key.offset = (u64)-1;
10074
10075                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10076                 if (IS_ERR(cur_root) || !cur_root) {
10077                         error("failed to read tree: %lld", key.objectid);
10078                         goto next;
10079                 }
10080
10081                 ret = traverse_tree_block(cur_root, cur_root->node);
10082                 err |= ret;
10083
10084 next:
10085                 ret = btrfs_next_item(root1, &path);
10086                 if (ret)
10087                         goto out;
10088         }
10089
10090 out:
10091         btrfs_release_path(&path);
10092         return err;
10093 }
10094
10095 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10096                            struct btrfs_root *root, int overwrite)
10097 {
10098         struct extent_buffer *c;
10099         struct extent_buffer *old = root->node;
10100         int level;
10101         int ret;
10102         struct btrfs_disk_key disk_key = {0,0,0};
10103
10104         level = 0;
10105
10106         if (overwrite) {
10107                 c = old;
10108                 extent_buffer_get(c);
10109                 goto init;
10110         }
10111         c = btrfs_alloc_free_block(trans, root,
10112                                    root->nodesize,
10113                                    root->root_key.objectid,
10114                                    &disk_key, level, 0, 0);
10115         if (IS_ERR(c)) {
10116                 c = old;
10117                 extent_buffer_get(c);
10118                 overwrite = 1;
10119         }
10120 init:
10121         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10122         btrfs_set_header_level(c, level);
10123         btrfs_set_header_bytenr(c, c->start);
10124         btrfs_set_header_generation(c, trans->transid);
10125         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10126         btrfs_set_header_owner(c, root->root_key.objectid);
10127
10128         write_extent_buffer(c, root->fs_info->fsid,
10129                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
10130
10131         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10132                             btrfs_header_chunk_tree_uuid(c),
10133                             BTRFS_UUID_SIZE);
10134
10135         btrfs_mark_buffer_dirty(c);
10136         /*
10137          * this case can happen in the following case:
10138          *
10139          * 1.overwrite previous root.
10140          *
10141          * 2.reinit reloc data root, this is because we skip pin
10142          * down reloc data tree before which means we can allocate
10143          * same block bytenr here.
10144          */
10145         if (old->start == c->start) {
10146                 btrfs_set_root_generation(&root->root_item,
10147                                           trans->transid);
10148                 root->root_item.level = btrfs_header_level(root->node);
10149                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10150                                         &root->root_key, &root->root_item);
10151                 if (ret) {
10152                         free_extent_buffer(c);
10153                         return ret;
10154                 }
10155         }
10156         free_extent_buffer(old);
10157         root->node = c;
10158         add_root_to_dirty_list(root);
10159         return 0;
10160 }
10161
10162 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10163                                 struct extent_buffer *eb, int tree_root)
10164 {
10165         struct extent_buffer *tmp;
10166         struct btrfs_root_item *ri;
10167         struct btrfs_key key;
10168         u64 bytenr;
10169         u32 nodesize;
10170         int level = btrfs_header_level(eb);
10171         int nritems;
10172         int ret;
10173         int i;
10174
10175         /*
10176          * If we have pinned this block before, don't pin it again.
10177          * This can not only avoid forever loop with broken filesystem
10178          * but also give us some speedups.
10179          */
10180         if (test_range_bit(&fs_info->pinned_extents, eb->start,
10181                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10182                 return 0;
10183
10184         btrfs_pin_extent(fs_info, eb->start, eb->len);
10185
10186         nodesize = btrfs_super_nodesize(fs_info->super_copy);
10187         nritems = btrfs_header_nritems(eb);
10188         for (i = 0; i < nritems; i++) {
10189                 if (level == 0) {
10190                         btrfs_item_key_to_cpu(eb, &key, i);
10191                         if (key.type != BTRFS_ROOT_ITEM_KEY)
10192                                 continue;
10193                         /* Skip the extent root and reloc roots */
10194                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10195                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10196                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10197                                 continue;
10198                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10199                         bytenr = btrfs_disk_root_bytenr(eb, ri);
10200
10201                         /*
10202                          * If at any point we start needing the real root we
10203                          * will have to build a stump root for the root we are
10204                          * in, but for now this doesn't actually use the root so
10205                          * just pass in extent_root.
10206                          */
10207                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10208                                               nodesize, 0);
10209                         if (!extent_buffer_uptodate(tmp)) {
10210                                 fprintf(stderr, "Error reading root block\n");
10211                                 return -EIO;
10212                         }
10213                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
10214                         free_extent_buffer(tmp);
10215                         if (ret)
10216                                 return ret;
10217                 } else {
10218                         bytenr = btrfs_node_blockptr(eb, i);
10219
10220                         /* If we aren't the tree root don't read the block */
10221                         if (level == 1 && !tree_root) {
10222                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
10223                                 continue;
10224                         }
10225
10226                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10227                                               nodesize, 0);
10228                         if (!extent_buffer_uptodate(tmp)) {
10229                                 fprintf(stderr, "Error reading tree block\n");
10230                                 return -EIO;
10231                         }
10232                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10233                         free_extent_buffer(tmp);
10234                         if (ret)
10235                                 return ret;
10236                 }
10237         }
10238
10239         return 0;
10240 }
10241
10242 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10243 {
10244         int ret;
10245
10246         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10247         if (ret)
10248                 return ret;
10249
10250         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10251 }
10252
10253 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10254 {
10255         struct btrfs_block_group_cache *cache;
10256         struct btrfs_path *path;
10257         struct extent_buffer *leaf;
10258         struct btrfs_chunk *chunk;
10259         struct btrfs_key key;
10260         int ret;
10261         u64 start;
10262
10263         path = btrfs_alloc_path();
10264         if (!path)
10265                 return -ENOMEM;
10266
10267         key.objectid = 0;
10268         key.type = BTRFS_CHUNK_ITEM_KEY;
10269         key.offset = 0;
10270
10271         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10272         if (ret < 0) {
10273                 btrfs_free_path(path);
10274                 return ret;
10275         }
10276
10277         /*
10278          * We do this in case the block groups were screwed up and had alloc
10279          * bits that aren't actually set on the chunks.  This happens with
10280          * restored images every time and could happen in real life I guess.
10281          */
10282         fs_info->avail_data_alloc_bits = 0;
10283         fs_info->avail_metadata_alloc_bits = 0;
10284         fs_info->avail_system_alloc_bits = 0;
10285
10286         /* First we need to create the in-memory block groups */
10287         while (1) {
10288                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10289                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
10290                         if (ret < 0) {
10291                                 btrfs_free_path(path);
10292                                 return ret;
10293                         }
10294                         if (ret) {
10295                                 ret = 0;
10296                                 break;
10297                         }
10298                 }
10299                 leaf = path->nodes[0];
10300                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10301                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10302                         path->slots[0]++;
10303                         continue;
10304                 }
10305
10306                 chunk = btrfs_item_ptr(leaf, path->slots[0],
10307                                        struct btrfs_chunk);
10308                 btrfs_add_block_group(fs_info, 0,
10309                                       btrfs_chunk_type(leaf, chunk),
10310                                       key.objectid, key.offset,
10311                                       btrfs_chunk_length(leaf, chunk));
10312                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10313                                  key.offset + btrfs_chunk_length(leaf, chunk),
10314                                  GFP_NOFS);
10315                 path->slots[0]++;
10316         }
10317         start = 0;
10318         while (1) {
10319                 cache = btrfs_lookup_first_block_group(fs_info, start);
10320                 if (!cache)
10321                         break;
10322                 cache->cached = 1;
10323                 start = cache->key.objectid + cache->key.offset;
10324         }
10325
10326         btrfs_free_path(path);
10327         return 0;
10328 }
10329
10330 static int reset_balance(struct btrfs_trans_handle *trans,
10331                          struct btrfs_fs_info *fs_info)
10332 {
10333         struct btrfs_root *root = fs_info->tree_root;
10334         struct btrfs_path *path;
10335         struct extent_buffer *leaf;
10336         struct btrfs_key key;
10337         int del_slot, del_nr = 0;
10338         int ret;
10339         int found = 0;
10340
10341         path = btrfs_alloc_path();
10342         if (!path)
10343                 return -ENOMEM;
10344
10345         key.objectid = BTRFS_BALANCE_OBJECTID;
10346         key.type = BTRFS_BALANCE_ITEM_KEY;
10347         key.offset = 0;
10348
10349         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10350         if (ret) {
10351                 if (ret > 0)
10352                         ret = 0;
10353                 if (!ret)
10354                         goto reinit_data_reloc;
10355                 else
10356                         goto out;
10357         }
10358
10359         ret = btrfs_del_item(trans, root, path);
10360         if (ret)
10361                 goto out;
10362         btrfs_release_path(path);
10363
10364         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10365         key.type = BTRFS_ROOT_ITEM_KEY;
10366         key.offset = 0;
10367
10368         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10369         if (ret < 0)
10370                 goto out;
10371         while (1) {
10372                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10373                         if (!found)
10374                                 break;
10375
10376                         if (del_nr) {
10377                                 ret = btrfs_del_items(trans, root, path,
10378                                                       del_slot, del_nr);
10379                                 del_nr = 0;
10380                                 if (ret)
10381                                         goto out;
10382                         }
10383                         key.offset++;
10384                         btrfs_release_path(path);
10385
10386                         found = 0;
10387                         ret = btrfs_search_slot(trans, root, &key, path,
10388                                                 -1, 1);
10389                         if (ret < 0)
10390                                 goto out;
10391                         continue;
10392                 }
10393                 found = 1;
10394                 leaf = path->nodes[0];
10395                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10396                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10397                         break;
10398                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10399                         path->slots[0]++;
10400                         continue;
10401                 }
10402                 if (!del_nr) {
10403                         del_slot = path->slots[0];
10404                         del_nr = 1;
10405                 } else {
10406                         del_nr++;
10407                 }
10408                 path->slots[0]++;
10409         }
10410
10411         if (del_nr) {
10412                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10413                 if (ret)
10414                         goto out;
10415         }
10416         btrfs_release_path(path);
10417
10418 reinit_data_reloc:
10419         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10420         key.type = BTRFS_ROOT_ITEM_KEY;
10421         key.offset = (u64)-1;
10422         root = btrfs_read_fs_root(fs_info, &key);
10423         if (IS_ERR(root)) {
10424                 fprintf(stderr, "Error reading data reloc tree\n");
10425                 ret = PTR_ERR(root);
10426                 goto out;
10427         }
10428         record_root_in_trans(trans, root);
10429         ret = btrfs_fsck_reinit_root(trans, root, 0);
10430         if (ret)
10431                 goto out;
10432         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10433 out:
10434         btrfs_free_path(path);
10435         return ret;
10436 }
10437
10438 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10439                               struct btrfs_fs_info *fs_info)
10440 {
10441         u64 start = 0;
10442         int ret;
10443
10444         /*
10445          * The only reason we don't do this is because right now we're just
10446          * walking the trees we find and pinning down their bytes, we don't look
10447          * at any of the leaves.  In order to do mixed groups we'd have to check
10448          * the leaves of any fs roots and pin down the bytes for any file
10449          * extents we find.  Not hard but why do it if we don't have to?
10450          */
10451         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10452                 fprintf(stderr, "We don't support re-initing the extent tree "
10453                         "for mixed block groups yet, please notify a btrfs "
10454                         "developer you want to do this so they can add this "
10455                         "functionality.\n");
10456                 return -EINVAL;
10457         }
10458
10459         /*
10460          * first we need to walk all of the trees except the extent tree and pin
10461          * down the bytes that are in use so we don't overwrite any existing
10462          * metadata.
10463          */
10464         ret = pin_metadata_blocks(fs_info);
10465         if (ret) {
10466                 fprintf(stderr, "error pinning down used bytes\n");
10467                 return ret;
10468         }
10469
10470         /*
10471          * Need to drop all the block groups since we're going to recreate all
10472          * of them again.
10473          */
10474         btrfs_free_block_groups(fs_info);
10475         ret = reset_block_groups(fs_info);
10476         if (ret) {
10477                 fprintf(stderr, "error resetting the block groups\n");
10478                 return ret;
10479         }
10480
10481         /* Ok we can allocate now, reinit the extent root */
10482         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10483         if (ret) {
10484                 fprintf(stderr, "extent root initialization failed\n");
10485                 /*
10486                  * When the transaction code is updated we should end the
10487                  * transaction, but for now progs only knows about commit so
10488                  * just return an error.
10489                  */
10490                 return ret;
10491         }
10492
10493         /*
10494          * Now we have all the in-memory block groups setup so we can make
10495          * allocations properly, and the metadata we care about is safe since we
10496          * pinned all of it above.
10497          */
10498         while (1) {
10499                 struct btrfs_block_group_cache *cache;
10500
10501                 cache = btrfs_lookup_first_block_group(fs_info, start);
10502                 if (!cache)
10503                         break;
10504                 start = cache->key.objectid + cache->key.offset;
10505                 ret = btrfs_insert_item(trans, fs_info->extent_root,
10506                                         &cache->key, &cache->item,
10507                                         sizeof(cache->item));
10508                 if (ret) {
10509                         fprintf(stderr, "Error adding block group\n");
10510                         return ret;
10511                 }
10512                 btrfs_extent_post_op(trans, fs_info->extent_root);
10513         }
10514
10515         ret = reset_balance(trans, fs_info);
10516         if (ret)
10517                 fprintf(stderr, "error resetting the pending balance\n");
10518
10519         return ret;
10520 }
10521
10522 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10523 {
10524         struct btrfs_path *path;
10525         struct btrfs_trans_handle *trans;
10526         struct btrfs_key key;
10527         int ret;
10528
10529         printf("Recowing metadata block %llu\n", eb->start);
10530         key.objectid = btrfs_header_owner(eb);
10531         key.type = BTRFS_ROOT_ITEM_KEY;
10532         key.offset = (u64)-1;
10533
10534         root = btrfs_read_fs_root(root->fs_info, &key);
10535         if (IS_ERR(root)) {
10536                 fprintf(stderr, "Couldn't find owner root %llu\n",
10537                         key.objectid);
10538                 return PTR_ERR(root);
10539         }
10540
10541         path = btrfs_alloc_path();
10542         if (!path)
10543                 return -ENOMEM;
10544
10545         trans = btrfs_start_transaction(root, 1);
10546         if (IS_ERR(trans)) {
10547                 btrfs_free_path(path);
10548                 return PTR_ERR(trans);
10549         }
10550
10551         path->lowest_level = btrfs_header_level(eb);
10552         if (path->lowest_level)
10553                 btrfs_node_key_to_cpu(eb, &key, 0);
10554         else
10555                 btrfs_item_key_to_cpu(eb, &key, 0);
10556
10557         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10558         btrfs_commit_transaction(trans, root);
10559         btrfs_free_path(path);
10560         return ret;
10561 }
10562
10563 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10564 {
10565         struct btrfs_path *path;
10566         struct btrfs_trans_handle *trans;
10567         struct btrfs_key key;
10568         int ret;
10569
10570         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10571                bad->key.type, bad->key.offset);
10572         key.objectid = bad->root_id;
10573         key.type = BTRFS_ROOT_ITEM_KEY;
10574         key.offset = (u64)-1;
10575
10576         root = btrfs_read_fs_root(root->fs_info, &key);
10577         if (IS_ERR(root)) {
10578                 fprintf(stderr, "Couldn't find owner root %llu\n",
10579                         key.objectid);
10580                 return PTR_ERR(root);
10581         }
10582
10583         path = btrfs_alloc_path();
10584         if (!path)
10585                 return -ENOMEM;
10586
10587         trans = btrfs_start_transaction(root, 1);
10588         if (IS_ERR(trans)) {
10589                 btrfs_free_path(path);
10590                 return PTR_ERR(trans);
10591         }
10592
10593         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10594         if (ret) {
10595                 if (ret > 0)
10596                         ret = 0;
10597                 goto out;
10598         }
10599         ret = btrfs_del_item(trans, root, path);
10600 out:
10601         btrfs_commit_transaction(trans, root);
10602         btrfs_free_path(path);
10603         return ret;
10604 }
10605
10606 static int zero_log_tree(struct btrfs_root *root)
10607 {
10608         struct btrfs_trans_handle *trans;
10609         int ret;
10610
10611         trans = btrfs_start_transaction(root, 1);
10612         if (IS_ERR(trans)) {
10613                 ret = PTR_ERR(trans);
10614                 return ret;
10615         }
10616         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10617         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10618         ret = btrfs_commit_transaction(trans, root);
10619         return ret;
10620 }
10621
10622 static int populate_csum(struct btrfs_trans_handle *trans,
10623                          struct btrfs_root *csum_root, char *buf, u64 start,
10624                          u64 len)
10625 {
10626         u64 offset = 0;
10627         u64 sectorsize;
10628         int ret = 0;
10629
10630         while (offset < len) {
10631                 sectorsize = csum_root->sectorsize;
10632                 ret = read_extent_data(csum_root, buf, start + offset,
10633                                        &sectorsize, 0);
10634                 if (ret)
10635                         break;
10636                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10637                                             start + offset, buf, sectorsize);
10638                 if (ret)
10639                         break;
10640                 offset += sectorsize;
10641         }
10642         return ret;
10643 }
10644
10645 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10646                                       struct btrfs_root *csum_root,
10647                                       struct btrfs_root *cur_root)
10648 {
10649         struct btrfs_path *path;
10650         struct btrfs_key key;
10651         struct extent_buffer *node;
10652         struct btrfs_file_extent_item *fi;
10653         char *buf = NULL;
10654         u64 start = 0;
10655         u64 len = 0;
10656         int slot = 0;
10657         int ret = 0;
10658
10659         path = btrfs_alloc_path();
10660         if (!path)
10661                 return -ENOMEM;
10662         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10663         if (!buf) {
10664                 ret = -ENOMEM;
10665                 goto out;
10666         }
10667
10668         key.objectid = 0;
10669         key.offset = 0;
10670         key.type = 0;
10671
10672         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10673         if (ret < 0)
10674                 goto out;
10675         /* Iterate all regular file extents and fill its csum */
10676         while (1) {
10677                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10678
10679                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10680                         goto next;
10681                 node = path->nodes[0];
10682                 slot = path->slots[0];
10683                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10684                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10685                         goto next;
10686                 start = btrfs_file_extent_disk_bytenr(node, fi);
10687                 len = btrfs_file_extent_disk_num_bytes(node, fi);
10688
10689                 ret = populate_csum(trans, csum_root, buf, start, len);
10690                 if (ret == -EEXIST)
10691                         ret = 0;
10692                 if (ret < 0)
10693                         goto out;
10694 next:
10695                 /*
10696                  * TODO: if next leaf is corrupted, jump to nearest next valid
10697                  * leaf.
10698                  */
10699                 ret = btrfs_next_item(cur_root, path);
10700                 if (ret < 0)
10701                         goto out;
10702                 if (ret > 0) {
10703                         ret = 0;
10704                         goto out;
10705                 }
10706         }
10707
10708 out:
10709         btrfs_free_path(path);
10710         free(buf);
10711         return ret;
10712 }
10713
10714 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10715                                   struct btrfs_root *csum_root)
10716 {
10717         struct btrfs_fs_info *fs_info = csum_root->fs_info;
10718         struct btrfs_path *path;
10719         struct btrfs_root *tree_root = fs_info->tree_root;
10720         struct btrfs_root *cur_root;
10721         struct extent_buffer *node;
10722         struct btrfs_key key;
10723         int slot = 0;
10724         int ret = 0;
10725
10726         path = btrfs_alloc_path();
10727         if (!path)
10728                 return -ENOMEM;
10729
10730         key.objectid = BTRFS_FS_TREE_OBJECTID;
10731         key.offset = 0;
10732         key.type = BTRFS_ROOT_ITEM_KEY;
10733
10734         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10735         if (ret < 0)
10736                 goto out;
10737         if (ret > 0) {
10738                 ret = -ENOENT;
10739                 goto out;
10740         }
10741
10742         while (1) {
10743                 node = path->nodes[0];
10744                 slot = path->slots[0];
10745                 btrfs_item_key_to_cpu(node, &key, slot);
10746                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10747                         goto out;
10748                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10749                         goto next;
10750                 if (!is_fstree(key.objectid))
10751                         goto next;
10752                 key.offset = (u64)-1;
10753
10754                 cur_root = btrfs_read_fs_root(fs_info, &key);
10755                 if (IS_ERR(cur_root) || !cur_root) {
10756                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10757                                 key.objectid);
10758                         goto out;
10759                 }
10760                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10761                                 cur_root);
10762                 if (ret < 0)
10763                         goto out;
10764 next:
10765                 ret = btrfs_next_item(tree_root, path);
10766                 if (ret > 0) {
10767                         ret = 0;
10768                         goto out;
10769                 }
10770                 if (ret < 0)
10771                         goto out;
10772         }
10773
10774 out:
10775         btrfs_free_path(path);
10776         return ret;
10777 }
10778
10779 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10780                                       struct btrfs_root *csum_root)
10781 {
10782         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10783         struct btrfs_path *path;
10784         struct btrfs_extent_item *ei;
10785         struct extent_buffer *leaf;
10786         char *buf;
10787         struct btrfs_key key;
10788         int ret;
10789
10790         path = btrfs_alloc_path();
10791         if (!path)
10792                 return -ENOMEM;
10793
10794         key.objectid = 0;
10795         key.type = BTRFS_EXTENT_ITEM_KEY;
10796         key.offset = 0;
10797
10798         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10799         if (ret < 0) {
10800                 btrfs_free_path(path);
10801                 return ret;
10802         }
10803
10804         buf = malloc(csum_root->sectorsize);
10805         if (!buf) {
10806                 btrfs_free_path(path);
10807                 return -ENOMEM;
10808         }
10809
10810         while (1) {
10811                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10812                         ret = btrfs_next_leaf(extent_root, path);
10813                         if (ret < 0)
10814                                 break;
10815                         if (ret) {
10816                                 ret = 0;
10817                                 break;
10818                         }
10819                 }
10820                 leaf = path->nodes[0];
10821
10822                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10823                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10824                         path->slots[0]++;
10825                         continue;
10826                 }
10827
10828                 ei = btrfs_item_ptr(leaf, path->slots[0],
10829                                     struct btrfs_extent_item);
10830                 if (!(btrfs_extent_flags(leaf, ei) &
10831                       BTRFS_EXTENT_FLAG_DATA)) {
10832                         path->slots[0]++;
10833                         continue;
10834                 }
10835
10836                 ret = populate_csum(trans, csum_root, buf, key.objectid,
10837                                     key.offset);
10838                 if (ret)
10839                         break;
10840                 path->slots[0]++;
10841         }
10842
10843         btrfs_free_path(path);
10844         free(buf);
10845         return ret;
10846 }
10847
10848 /*
10849  * Recalculate the csum and put it into the csum tree.
10850  *
10851  * Extent tree init will wipe out all the extent info, so in that case, we
10852  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
10853  * will use fs/subvol trees to init the csum tree.
10854  */
10855 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10856                           struct btrfs_root *csum_root,
10857                           int search_fs_tree)
10858 {
10859         if (search_fs_tree)
10860                 return fill_csum_tree_from_fs(trans, csum_root);
10861         else
10862                 return fill_csum_tree_from_extent(trans, csum_root);
10863 }
10864
10865 static void free_roots_info_cache(void)
10866 {
10867         if (!roots_info_cache)
10868                 return;
10869
10870         while (!cache_tree_empty(roots_info_cache)) {
10871                 struct cache_extent *entry;
10872                 struct root_item_info *rii;
10873
10874                 entry = first_cache_extent(roots_info_cache);
10875                 if (!entry)
10876                         break;
10877                 remove_cache_extent(roots_info_cache, entry);
10878                 rii = container_of(entry, struct root_item_info, cache_extent);
10879                 free(rii);
10880         }
10881
10882         free(roots_info_cache);
10883         roots_info_cache = NULL;
10884 }
10885
10886 static int build_roots_info_cache(struct btrfs_fs_info *info)
10887 {
10888         int ret = 0;
10889         struct btrfs_key key;
10890         struct extent_buffer *leaf;
10891         struct btrfs_path *path;
10892
10893         if (!roots_info_cache) {
10894                 roots_info_cache = malloc(sizeof(*roots_info_cache));
10895                 if (!roots_info_cache)
10896                         return -ENOMEM;
10897                 cache_tree_init(roots_info_cache);
10898         }
10899
10900         path = btrfs_alloc_path();
10901         if (!path)
10902                 return -ENOMEM;
10903
10904         key.objectid = 0;
10905         key.type = BTRFS_EXTENT_ITEM_KEY;
10906         key.offset = 0;
10907
10908         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10909         if (ret < 0)
10910                 goto out;
10911         leaf = path->nodes[0];
10912
10913         while (1) {
10914                 struct btrfs_key found_key;
10915                 struct btrfs_extent_item *ei;
10916                 struct btrfs_extent_inline_ref *iref;
10917                 int slot = path->slots[0];
10918                 int type;
10919                 u64 flags;
10920                 u64 root_id;
10921                 u8 level;
10922                 struct cache_extent *entry;
10923                 struct root_item_info *rii;
10924
10925                 if (slot >= btrfs_header_nritems(leaf)) {
10926                         ret = btrfs_next_leaf(info->extent_root, path);
10927                         if (ret < 0) {
10928                                 break;
10929                         } else if (ret) {
10930                                 ret = 0;
10931                                 break;
10932                         }
10933                         leaf = path->nodes[0];
10934                         slot = path->slots[0];
10935                 }
10936
10937                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10938
10939                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10940                     found_key.type != BTRFS_METADATA_ITEM_KEY)
10941                         goto next;
10942
10943                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10944                 flags = btrfs_extent_flags(leaf, ei);
10945
10946                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10947                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10948                         goto next;
10949
10950                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10951                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10952                         level = found_key.offset;
10953                 } else {
10954                         struct btrfs_tree_block_info *binfo;
10955
10956                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
10957                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10958                         level = btrfs_tree_block_level(leaf, binfo);
10959                 }
10960
10961                 /*
10962                  * For a root extent, it must be of the following type and the
10963                  * first (and only one) iref in the item.
10964                  */
10965                 type = btrfs_extent_inline_ref_type(leaf, iref);
10966                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10967                         goto next;
10968
10969                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10970                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10971                 if (!entry) {
10972                         rii = malloc(sizeof(struct root_item_info));
10973                         if (!rii) {
10974                                 ret = -ENOMEM;
10975                                 goto out;
10976                         }
10977                         rii->cache_extent.start = root_id;
10978                         rii->cache_extent.size = 1;
10979                         rii->level = (u8)-1;
10980                         entry = &rii->cache_extent;
10981                         ret = insert_cache_extent(roots_info_cache, entry);
10982                         ASSERT(ret == 0);
10983                 } else {
10984                         rii = container_of(entry, struct root_item_info,
10985                                            cache_extent);
10986                 }
10987
10988                 ASSERT(rii->cache_extent.start == root_id);
10989                 ASSERT(rii->cache_extent.size == 1);
10990
10991                 if (level > rii->level || rii->level == (u8)-1) {
10992                         rii->level = level;
10993                         rii->bytenr = found_key.objectid;
10994                         rii->gen = btrfs_extent_generation(leaf, ei);
10995                         rii->node_count = 1;
10996                 } else if (level == rii->level) {
10997                         rii->node_count++;
10998                 }
10999 next:
11000                 path->slots[0]++;
11001         }
11002
11003 out:
11004         btrfs_free_path(path);
11005
11006         return ret;
11007 }
11008
11009 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11010                                   struct btrfs_path *path,
11011                                   const struct btrfs_key *root_key,
11012                                   const int read_only_mode)
11013 {
11014         const u64 root_id = root_key->objectid;
11015         struct cache_extent *entry;
11016         struct root_item_info *rii;
11017         struct btrfs_root_item ri;
11018         unsigned long offset;
11019
11020         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11021         if (!entry) {
11022                 fprintf(stderr,
11023                         "Error: could not find extent items for root %llu\n",
11024                         root_key->objectid);
11025                 return -ENOENT;
11026         }
11027
11028         rii = container_of(entry, struct root_item_info, cache_extent);
11029         ASSERT(rii->cache_extent.start == root_id);
11030         ASSERT(rii->cache_extent.size == 1);
11031
11032         if (rii->node_count != 1) {
11033                 fprintf(stderr,
11034                         "Error: could not find btree root extent for root %llu\n",
11035                         root_id);
11036                 return -ENOENT;
11037         }
11038
11039         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11040         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11041
11042         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11043             btrfs_root_level(&ri) != rii->level ||
11044             btrfs_root_generation(&ri) != rii->gen) {
11045
11046                 /*
11047                  * If we're in repair mode but our caller told us to not update
11048                  * the root item, i.e. just check if it needs to be updated, don't
11049                  * print this message, since the caller will call us again shortly
11050                  * for the same root item without read only mode (the caller will
11051                  * open a transaction first).
11052                  */
11053                 if (!(read_only_mode && repair))
11054                         fprintf(stderr,
11055                                 "%sroot item for root %llu,"
11056                                 " current bytenr %llu, current gen %llu, current level %u,"
11057                                 " new bytenr %llu, new gen %llu, new level %u\n",
11058                                 (read_only_mode ? "" : "fixing "),
11059                                 root_id,
11060                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11061                                 btrfs_root_level(&ri),
11062                                 rii->bytenr, rii->gen, rii->level);
11063
11064                 if (btrfs_root_generation(&ri) > rii->gen) {
11065                         fprintf(stderr,
11066                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11067                                 root_id, btrfs_root_generation(&ri), rii->gen);
11068                         return -EINVAL;
11069                 }
11070
11071                 if (!read_only_mode) {
11072                         btrfs_set_root_bytenr(&ri, rii->bytenr);
11073                         btrfs_set_root_level(&ri, rii->level);
11074                         btrfs_set_root_generation(&ri, rii->gen);
11075                         write_extent_buffer(path->nodes[0], &ri,
11076                                             offset, sizeof(ri));
11077                 }
11078
11079                 return 1;
11080         }
11081
11082         return 0;
11083 }
11084
11085 /*
11086  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11087  * caused read-only snapshots to be corrupted if they were created at a moment
11088  * when the source subvolume/snapshot had orphan items. The issue was that the
11089  * on-disk root items became incorrect, referring to the pre orphan cleanup root
11090  * node instead of the post orphan cleanup root node.
11091  * So this function, and its callees, just detects and fixes those cases. Even
11092  * though the regression was for read-only snapshots, this function applies to
11093  * any snapshot/subvolume root.
11094  * This must be run before any other repair code - not doing it so, makes other
11095  * repair code delete or modify backrefs in the extent tree for example, which
11096  * will result in an inconsistent fs after repairing the root items.
11097  */
11098 static int repair_root_items(struct btrfs_fs_info *info)
11099 {
11100         struct btrfs_path *path = NULL;
11101         struct btrfs_key key;
11102         struct extent_buffer *leaf;
11103         struct btrfs_trans_handle *trans = NULL;
11104         int ret = 0;
11105         int bad_roots = 0;
11106         int need_trans = 0;
11107
11108         ret = build_roots_info_cache(info);
11109         if (ret)
11110                 goto out;
11111
11112         path = btrfs_alloc_path();
11113         if (!path) {
11114                 ret = -ENOMEM;
11115                 goto out;
11116         }
11117
11118         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11119         key.type = BTRFS_ROOT_ITEM_KEY;
11120         key.offset = 0;
11121
11122 again:
11123         /*
11124          * Avoid opening and committing transactions if a leaf doesn't have
11125          * any root items that need to be fixed, so that we avoid rotating
11126          * backup roots unnecessarily.
11127          */
11128         if (need_trans) {
11129                 trans = btrfs_start_transaction(info->tree_root, 1);
11130                 if (IS_ERR(trans)) {
11131                         ret = PTR_ERR(trans);
11132                         goto out;
11133                 }
11134         }
11135
11136         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11137                                 0, trans ? 1 : 0);
11138         if (ret < 0)
11139                 goto out;
11140         leaf = path->nodes[0];
11141
11142         while (1) {
11143                 struct btrfs_key found_key;
11144
11145                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11146                         int no_more_keys = find_next_key(path, &key);
11147
11148                         btrfs_release_path(path);
11149                         if (trans) {
11150                                 ret = btrfs_commit_transaction(trans,
11151                                                                info->tree_root);
11152                                 trans = NULL;
11153                                 if (ret < 0)
11154                                         goto out;
11155                         }
11156                         need_trans = 0;
11157                         if (no_more_keys)
11158                                 break;
11159                         goto again;
11160                 }
11161
11162                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11163
11164                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11165                         goto next;
11166                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11167                         goto next;
11168
11169                 ret = maybe_repair_root_item(info, path, &found_key,
11170                                              trans ? 0 : 1);
11171                 if (ret < 0)
11172                         goto out;
11173                 if (ret) {
11174                         if (!trans && repair) {
11175                                 need_trans = 1;
11176                                 key = found_key;
11177                                 btrfs_release_path(path);
11178                                 goto again;
11179                         }
11180                         bad_roots++;
11181                 }
11182 next:
11183                 path->slots[0]++;
11184         }
11185         ret = 0;
11186 out:
11187         free_roots_info_cache();
11188         btrfs_free_path(path);
11189         if (trans)
11190                 btrfs_commit_transaction(trans, info->tree_root);
11191         if (ret < 0)
11192                 return ret;
11193
11194         return bad_roots;
11195 }
11196
11197 const char * const cmd_check_usage[] = {
11198         "btrfs check [options] <device>",
11199         "Check structural integrity of a filesystem (unmounted).",
11200         "Check structural integrity of an unmounted filesystem. Verify internal",
11201         "trees' consistency and item connectivity. In the repair mode try to",
11202         "fix the problems found. ",
11203         "WARNING: the repair mode is considered dangerous",
11204         "",
11205         "-s|--super <superblock>     use this superblock copy",
11206         "-b|--backup                 use the first valid backup root copy",
11207         "--repair                    try to repair the filesystem",
11208         "--readonly                  run in read-only mode (default)",
11209         "--init-csum-tree            create a new CRC tree",
11210         "--init-extent-tree          create a new extent tree",
11211         "--mode <MODE>               select mode, allows to make some memory/IO",
11212         "                            trade-offs, where MODE is one of:",
11213         "                            original - read inodes and extents to memory (requires",
11214         "                                       more memory, does less IO)",
11215         "                            lowmem   - try to use less memory but read blocks again",
11216         "                                       when needed",
11217         "--check-data-csum           verify checksums of data blocks",
11218         "-Q|--qgroup-report           print a report on qgroup consistency",
11219         "-E|--subvol-extents <subvolid>",
11220         "                            print subvolume extents and sharing state",
11221         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
11222         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
11223         "-p|--progress               indicate progress",
11224         NULL
11225 };
11226
11227 int cmd_check(int argc, char **argv)
11228 {
11229         struct cache_tree root_cache;
11230         struct btrfs_root *root;
11231         struct btrfs_fs_info *info;
11232         u64 bytenr = 0;
11233         u64 subvolid = 0;
11234         u64 tree_root_bytenr = 0;
11235         u64 chunk_root_bytenr = 0;
11236         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11237         int ret;
11238         u64 num;
11239         int init_csum_tree = 0;
11240         int readonly = 0;
11241         int qgroup_report = 0;
11242         int qgroups_repaired = 0;
11243         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11244
11245         while(1) {
11246                 int c;
11247                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11248                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11249                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11250                         GETOPT_VAL_MODE };
11251                 static const struct option long_options[] = {
11252                         { "super", required_argument, NULL, 's' },
11253                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11254                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11255                         { "init-csum-tree", no_argument, NULL,
11256                                 GETOPT_VAL_INIT_CSUM },
11257                         { "init-extent-tree", no_argument, NULL,
11258                                 GETOPT_VAL_INIT_EXTENT },
11259                         { "check-data-csum", no_argument, NULL,
11260                                 GETOPT_VAL_CHECK_CSUM },
11261                         { "backup", no_argument, NULL, 'b' },
11262                         { "subvol-extents", required_argument, NULL, 'E' },
11263                         { "qgroup-report", no_argument, NULL, 'Q' },
11264                         { "tree-root", required_argument, NULL, 'r' },
11265                         { "chunk-root", required_argument, NULL,
11266                                 GETOPT_VAL_CHUNK_TREE },
11267                         { "progress", no_argument, NULL, 'p' },
11268                         { "mode", required_argument, NULL,
11269                                 GETOPT_VAL_MODE },
11270                         { NULL, 0, NULL, 0}
11271                 };
11272
11273                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11274                 if (c < 0)
11275                         break;
11276                 switch(c) {
11277                         case 'a': /* ignored */ break;
11278                         case 'b':
11279                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11280                                 break;
11281                         case 's':
11282                                 num = arg_strtou64(optarg);
11283                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11284                                         fprintf(stderr,
11285                                                 "ERROR: super mirror should be less than: %d\n",
11286                                                 BTRFS_SUPER_MIRROR_MAX);
11287                                         exit(1);
11288                                 }
11289                                 bytenr = btrfs_sb_offset(((int)num));
11290                                 printf("using SB copy %llu, bytenr %llu\n", num,
11291                                        (unsigned long long)bytenr);
11292                                 break;
11293                         case 'Q':
11294                                 qgroup_report = 1;
11295                                 break;
11296                         case 'E':
11297                                 subvolid = arg_strtou64(optarg);
11298                                 break;
11299                         case 'r':
11300                                 tree_root_bytenr = arg_strtou64(optarg);
11301                                 break;
11302                         case GETOPT_VAL_CHUNK_TREE:
11303                                 chunk_root_bytenr = arg_strtou64(optarg);
11304                                 break;
11305                         case 'p':
11306                                 ctx.progress_enabled = true;
11307                                 break;
11308                         case '?':
11309                         case 'h':
11310                                 usage(cmd_check_usage);
11311                         case GETOPT_VAL_REPAIR:
11312                                 printf("enabling repair mode\n");
11313                                 repair = 1;
11314                                 ctree_flags |= OPEN_CTREE_WRITES;
11315                                 break;
11316                         case GETOPT_VAL_READONLY:
11317                                 readonly = 1;
11318                                 break;
11319                         case GETOPT_VAL_INIT_CSUM:
11320                                 printf("Creating a new CRC tree\n");
11321                                 init_csum_tree = 1;
11322                                 repair = 1;
11323                                 ctree_flags |= OPEN_CTREE_WRITES;
11324                                 break;
11325                         case GETOPT_VAL_INIT_EXTENT:
11326                                 init_extent_tree = 1;
11327                                 ctree_flags |= (OPEN_CTREE_WRITES |
11328                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
11329                                 repair = 1;
11330                                 break;
11331                         case GETOPT_VAL_CHECK_CSUM:
11332                                 check_data_csum = 1;
11333                                 break;
11334                         case GETOPT_VAL_MODE:
11335                                 check_mode = parse_check_mode(optarg);
11336                                 if (check_mode == CHECK_MODE_UNKNOWN) {
11337                                         error("unknown mode: %s", optarg);
11338                                         exit(1);
11339                                 }
11340                                 break;
11341                 }
11342         }
11343
11344         if (check_argc_exact(argc - optind, 1))
11345                 usage(cmd_check_usage);
11346
11347         if (ctx.progress_enabled) {
11348                 ctx.tp = TASK_NOTHING;
11349                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11350         }
11351
11352         /* This check is the only reason for --readonly to exist */
11353         if (readonly && repair) {
11354                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
11355                 exit(1);
11356         }
11357
11358         /*
11359          * Not supported yet
11360          */
11361         if (repair && check_mode == CHECK_MODE_LOWMEM) {
11362                 error("Low memory mode doesn't support repair yet");
11363                 exit(1);
11364         }
11365
11366         radix_tree_init();
11367         cache_tree_init(&root_cache);
11368
11369         if((ret = check_mounted(argv[optind])) < 0) {
11370                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
11371                 goto err_out;
11372         } else if(ret) {
11373                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
11374                 ret = -EBUSY;
11375                 goto err_out;
11376         }
11377
11378         /* only allow partial opening under repair mode */
11379         if (repair)
11380                 ctree_flags |= OPEN_CTREE_PARTIAL;
11381
11382         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11383                                   chunk_root_bytenr, ctree_flags);
11384         if (!info) {
11385                 fprintf(stderr, "Couldn't open file system\n");
11386                 ret = -EIO;
11387                 goto err_out;
11388         }
11389
11390         global_info = info;
11391         root = info->fs_root;
11392
11393         /*
11394          * repair mode will force us to commit transaction which
11395          * will make us fail to load log tree when mounting.
11396          */
11397         if (repair && btrfs_super_log_root(info->super_copy)) {
11398                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
11399                 if (!ret) {
11400                         ret = 1;
11401                         goto close_out;
11402                 }
11403                 ret = zero_log_tree(root);
11404                 if (ret) {
11405                         fprintf(stderr, "fail to zero log tree\n");
11406                         goto close_out;
11407                 }
11408         }
11409
11410         uuid_unparse(info->super_copy->fsid, uuidbuf);
11411         if (qgroup_report) {
11412                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11413                        uuidbuf);
11414                 ret = qgroup_verify_all(info);
11415                 if (ret == 0)
11416                         report_qgroups(1);
11417                 goto close_out;
11418         }
11419         if (subvolid) {
11420                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11421                        subvolid, argv[optind], uuidbuf);
11422                 ret = print_extent_state(info, subvolid);
11423                 goto close_out;
11424         }
11425         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11426
11427         if (!extent_buffer_uptodate(info->tree_root->node) ||
11428             !extent_buffer_uptodate(info->dev_root->node) ||
11429             !extent_buffer_uptodate(info->chunk_root->node)) {
11430                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11431                 ret = -EIO;
11432                 goto close_out;
11433         }
11434
11435         if (init_extent_tree || init_csum_tree) {
11436                 struct btrfs_trans_handle *trans;
11437
11438                 trans = btrfs_start_transaction(info->extent_root, 0);
11439                 if (IS_ERR(trans)) {
11440                         fprintf(stderr, "Error starting transaction\n");
11441                         ret = PTR_ERR(trans);
11442                         goto close_out;
11443                 }
11444
11445                 if (init_extent_tree) {
11446                         printf("Creating a new extent tree\n");
11447                         ret = reinit_extent_tree(trans, info);
11448                         if (ret)
11449                                 goto close_out;
11450                 }
11451
11452                 if (init_csum_tree) {
11453                         fprintf(stderr, "Reinit crc root\n");
11454                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11455                         if (ret) {
11456                                 fprintf(stderr, "crc root initialization failed\n");
11457                                 ret = -EIO;
11458                                 goto close_out;
11459                         }
11460
11461                         ret = fill_csum_tree(trans, info->csum_root,
11462                                              init_extent_tree);
11463                         if (ret) {
11464                                 fprintf(stderr, "crc refilling failed\n");
11465                                 return -EIO;
11466                         }
11467                 }
11468                 /*
11469                  * Ok now we commit and run the normal fsck, which will add
11470                  * extent entries for all of the items it finds.
11471                  */
11472                 ret = btrfs_commit_transaction(trans, info->extent_root);
11473                 if (ret)
11474                         goto close_out;
11475         }
11476         if (!extent_buffer_uptodate(info->extent_root->node)) {
11477                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11478                 ret = -EIO;
11479                 goto close_out;
11480         }
11481         if (!extent_buffer_uptodate(info->csum_root->node)) {
11482                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
11483                 ret = -EIO;
11484                 goto close_out;
11485         }
11486
11487         if (!ctx.progress_enabled)
11488                 fprintf(stderr, "checking extents\n");
11489         if (check_mode == CHECK_MODE_LOWMEM)
11490                 ret = check_chunks_and_extents_v2(root);
11491         else
11492                 ret = check_chunks_and_extents(root);
11493         if (ret)
11494                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
11495
11496         ret = repair_root_items(info);
11497         if (ret < 0)
11498                 goto close_out;
11499         if (repair) {
11500                 fprintf(stderr, "Fixed %d roots.\n", ret);
11501                 ret = 0;
11502         } else if (ret > 0) {
11503                 fprintf(stderr,
11504                        "Found %d roots with an outdated root item.\n",
11505                        ret);
11506                 fprintf(stderr,
11507                         "Please run a filesystem check with the option --repair to fix them.\n");
11508                 ret = 1;
11509                 goto close_out;
11510         }
11511
11512         if (!ctx.progress_enabled) {
11513                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11514                         fprintf(stderr, "checking free space tree\n");
11515                 else
11516                         fprintf(stderr, "checking free space cache\n");
11517         }
11518         ret = check_space_cache(root);
11519         if (ret)
11520                 goto out;
11521
11522         /*
11523          * We used to have to have these hole extents in between our real
11524          * extents so if we don't have this flag set we need to make sure there
11525          * are no gaps in the file extents for inodes, otherwise we can just
11526          * ignore it when this happens.
11527          */
11528         no_holes = btrfs_fs_incompat(root->fs_info,
11529                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11530         if (!ctx.progress_enabled)
11531                 fprintf(stderr, "checking fs roots\n");
11532         ret = check_fs_roots(root, &root_cache);
11533         if (ret)
11534                 goto out;
11535
11536         fprintf(stderr, "checking csums\n");
11537         ret = check_csums(root);
11538         if (ret)
11539                 goto out;
11540
11541         fprintf(stderr, "checking root refs\n");
11542         ret = check_root_refs(root, &root_cache);
11543         if (ret)
11544                 goto out;
11545
11546         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11547                 struct extent_buffer *eb;
11548
11549                 eb = list_first_entry(&root->fs_info->recow_ebs,
11550                                       struct extent_buffer, recow);
11551                 list_del_init(&eb->recow);
11552                 ret = recow_extent_buffer(root, eb);
11553                 if (ret)
11554                         break;
11555         }
11556
11557         while (!list_empty(&delete_items)) {
11558                 struct bad_item *bad;
11559
11560                 bad = list_first_entry(&delete_items, struct bad_item, list);
11561                 list_del_init(&bad->list);
11562                 if (repair)
11563                         ret = delete_bad_item(root, bad);
11564                 free(bad);
11565         }
11566
11567         if (info->quota_enabled) {
11568                 int err;
11569                 fprintf(stderr, "checking quota groups\n");
11570                 err = qgroup_verify_all(info);
11571                 if (err)
11572                         goto out;
11573                 report_qgroups(0);
11574                 err = repair_qgroups(info, &qgroups_repaired);
11575                 if (err)
11576                         goto out;
11577         }
11578
11579         if (!list_empty(&root->fs_info->recow_ebs)) {
11580                 fprintf(stderr, "Transid errors in file system\n");
11581                 ret = 1;
11582         }
11583 out:
11584         /* Don't override original ret */
11585         if (!ret && qgroups_repaired)
11586                 ret = qgroups_repaired;
11587
11588         if (found_old_backref) { /*
11589                  * there was a disk format change when mixed
11590                  * backref was in testing tree. The old format
11591                  * existed about one week.
11592                  */
11593                 printf("\n * Found old mixed backref format. "
11594                        "The old format is not supported! *"
11595                        "\n * Please mount the FS in readonly mode, "
11596                        "backup data and re-format the FS. *\n\n");
11597                 ret = 1;
11598         }
11599         printf("found %llu bytes used err is %d\n",
11600                (unsigned long long)bytes_used, ret);
11601         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11602         printf("total tree bytes: %llu\n",
11603                (unsigned long long)total_btree_bytes);
11604         printf("total fs tree bytes: %llu\n",
11605                (unsigned long long)total_fs_tree_bytes);
11606         printf("total extent tree bytes: %llu\n",
11607                (unsigned long long)total_extent_tree_bytes);
11608         printf("btree space waste bytes: %llu\n",
11609                (unsigned long long)btree_space_waste);
11610         printf("file data blocks allocated: %llu\n referenced %llu\n",
11611                 (unsigned long long)data_bytes_allocated,
11612                 (unsigned long long)data_bytes_referenced);
11613
11614         free_qgroup_counts();
11615         free_root_recs_tree(&root_cache);
11616 close_out:
11617         close_ctree(root);
11618 err_out:
11619         if (ctx.progress_enabled)
11620                 task_deinit(ctx.info);
11621
11622         return ret;
11623 }