btrfs-progs: print help test to stdout
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
76
77 enum btrfs_check_mode {
78         CHECK_MODE_ORIGINAL,
79         CHECK_MODE_LOWMEM,
80         CHECK_MODE_UNKNOWN,
81         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
82 };
83
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
85
86 struct extent_backref {
87         struct rb_node node;
88         unsigned int is_data:1;
89         unsigned int found_extent_tree:1;
90         unsigned int full_backref:1;
91         unsigned int found_ref:1;
92         unsigned int broken:1;
93 };
94
95 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
96 {
97         return rb_entry(node, struct extent_backref, node);
98 }
99
100 struct data_backref {
101         struct extent_backref node;
102         union {
103                 u64 parent;
104                 u64 root;
105         };
106         u64 owner;
107         u64 offset;
108         u64 disk_bytenr;
109         u64 bytes;
110         u64 ram_bytes;
111         u32 num_refs;
112         u32 found_ref;
113 };
114
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
116 {
117         return container_of(back, struct data_backref, node);
118 }
119
120 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
121 {
122         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
123         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
124         struct data_backref *back1 = to_data_backref(ext1);
125         struct data_backref *back2 = to_data_backref(ext2);
126
127         WARN_ON(!ext1->is_data);
128         WARN_ON(!ext2->is_data);
129
130         /* parent and root are a union, so this covers both */
131         if (back1->parent > back2->parent)
132                 return 1;
133         if (back1->parent < back2->parent)
134                 return -1;
135
136         /* This is a full backref and the parents match. */
137         if (back1->node.full_backref)
138                 return 0;
139
140         if (back1->owner > back2->owner)
141                 return 1;
142         if (back1->owner < back2->owner)
143                 return -1;
144
145         if (back1->offset > back2->offset)
146                 return 1;
147         if (back1->offset < back2->offset)
148                 return -1;
149
150         if (back1->bytes > back2->bytes)
151                 return 1;
152         if (back1->bytes < back2->bytes)
153                 return -1;
154
155         if (back1->found_ref && back2->found_ref) {
156                 if (back1->disk_bytenr > back2->disk_bytenr)
157                         return 1;
158                 if (back1->disk_bytenr < back2->disk_bytenr)
159                         return -1;
160
161                 if (back1->found_ref > back2->found_ref)
162                         return 1;
163                 if (back1->found_ref < back2->found_ref)
164                         return -1;
165         }
166
167         return 0;
168 }
169
170 /*
171  * Much like data_backref, just removed the undetermined members
172  * and change it to use list_head.
173  * During extent scan, it is stored in root->orphan_data_extent.
174  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
175  */
176 struct orphan_data_extent {
177         struct list_head list;
178         u64 root;
179         u64 objectid;
180         u64 offset;
181         u64 disk_bytenr;
182         u64 disk_len;
183 };
184
185 struct tree_backref {
186         struct extent_backref node;
187         union {
188                 u64 parent;
189                 u64 root;
190         };
191 };
192
193 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
194 {
195         return container_of(back, struct tree_backref, node);
196 }
197
198 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
199 {
200         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
201         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
202         struct tree_backref *back1 = to_tree_backref(ext1);
203         struct tree_backref *back2 = to_tree_backref(ext2);
204
205         WARN_ON(ext1->is_data);
206         WARN_ON(ext2->is_data);
207
208         /* parent and root are a union, so this covers both */
209         if (back1->parent > back2->parent)
210                 return 1;
211         if (back1->parent < back2->parent)
212                 return -1;
213
214         return 0;
215 }
216
217 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
218 {
219         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
220         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
221
222         if (ext1->is_data > ext2->is_data)
223                 return 1;
224
225         if (ext1->is_data < ext2->is_data)
226                 return -1;
227
228         if (ext1->full_backref > ext2->full_backref)
229                 return 1;
230         if (ext1->full_backref < ext2->full_backref)
231                 return -1;
232
233         if (ext1->is_data)
234                 return compare_data_backref(node1, node2);
235         else
236                 return compare_tree_backref(node1, node2);
237 }
238
239 /* Explicit initialization for extent_record::flag_block_full_backref */
240 enum { FLAG_UNSET = 2 };
241
242 struct extent_record {
243         struct list_head backrefs;
244         struct list_head dups;
245         struct rb_root backref_tree;
246         struct list_head list;
247         struct cache_extent cache;
248         struct btrfs_disk_key parent_key;
249         u64 start;
250         u64 max_size;
251         u64 nr;
252         u64 refs;
253         u64 extent_item_refs;
254         u64 generation;
255         u64 parent_generation;
256         u64 info_objectid;
257         u32 num_duplicates;
258         u8 info_level;
259         unsigned int flag_block_full_backref:2;
260         unsigned int found_rec:1;
261         unsigned int content_checked:1;
262         unsigned int owner_ref_checked:1;
263         unsigned int is_root:1;
264         unsigned int metadata:1;
265         unsigned int bad_full_backref:1;
266         unsigned int crossing_stripes:1;
267         unsigned int wrong_chunk_type:1;
268 };
269
270 static inline struct extent_record* to_extent_record(struct list_head *entry)
271 {
272         return container_of(entry, struct extent_record, list);
273 }
274
275 struct inode_backref {
276         struct list_head list;
277         unsigned int found_dir_item:1;
278         unsigned int found_dir_index:1;
279         unsigned int found_inode_ref:1;
280         unsigned int filetype:8;
281         int errors;
282         unsigned int ref_type;
283         u64 dir;
284         u64 index;
285         u16 namelen;
286         char name[0];
287 };
288
289 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
290 {
291         return list_entry(entry, struct inode_backref, list);
292 }
293
294 struct root_item_record {
295         struct list_head list;
296         u64 objectid;
297         u64 bytenr;
298         u64 last_snapshot;
299         u8 level;
300         u8 drop_level;
301         int level_size;
302         struct btrfs_key drop_key;
303 };
304
305 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
306 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
307 #define REF_ERR_NO_INODE_REF            (1 << 2)
308 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
309 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
310 #define REF_ERR_DUP_INODE_REF           (1 << 5)
311 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
312 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
313 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
314 #define REF_ERR_NO_ROOT_REF             (1 << 9)
315 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
316 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
317 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
318
319 struct file_extent_hole {
320         struct rb_node node;
321         u64 start;
322         u64 len;
323 };
324
325 struct inode_record {
326         struct list_head backrefs;
327         unsigned int checked:1;
328         unsigned int merging:1;
329         unsigned int found_inode_item:1;
330         unsigned int found_dir_item:1;
331         unsigned int found_file_extent:1;
332         unsigned int found_csum_item:1;
333         unsigned int some_csum_missing:1;
334         unsigned int nodatasum:1;
335         int errors;
336
337         u64 ino;
338         u32 nlink;
339         u32 imode;
340         u64 isize;
341         u64 nbytes;
342
343         u32 found_link;
344         u64 found_size;
345         u64 extent_start;
346         u64 extent_end;
347         struct rb_root holes;
348         struct list_head orphan_extents;
349
350         u32 refs;
351 };
352
353 #define I_ERR_NO_INODE_ITEM             (1 << 0)
354 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
355 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
356 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
357 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
358 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
359 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
360 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
361 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
362 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
363 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
364 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
365 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
366 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
367 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
368
369 struct root_backref {
370         struct list_head list;
371         unsigned int found_dir_item:1;
372         unsigned int found_dir_index:1;
373         unsigned int found_back_ref:1;
374         unsigned int found_forward_ref:1;
375         unsigned int reachable:1;
376         int errors;
377         u64 ref_root;
378         u64 dir;
379         u64 index;
380         u16 namelen;
381         char name[0];
382 };
383
384 static inline struct root_backref* to_root_backref(struct list_head *entry)
385 {
386         return list_entry(entry, struct root_backref, list);
387 }
388
389 struct root_record {
390         struct list_head backrefs;
391         struct cache_extent cache;
392         unsigned int found_root_item:1;
393         u64 objectid;
394         u32 found_ref;
395 };
396
397 struct ptr_node {
398         struct cache_extent cache;
399         void *data;
400 };
401
402 struct shared_node {
403         struct cache_extent cache;
404         struct cache_tree root_cache;
405         struct cache_tree inode_cache;
406         struct inode_record *current;
407         u32 refs;
408 };
409
410 struct block_info {
411         u64 start;
412         u32 size;
413 };
414
415 struct walk_control {
416         struct cache_tree shared;
417         struct shared_node *nodes[BTRFS_MAX_LEVEL];
418         int active_node;
419         int root_level;
420 };
421
422 struct bad_item {
423         struct btrfs_key key;
424         u64 root_id;
425         struct list_head list;
426 };
427
428 struct extent_entry {
429         u64 bytenr;
430         u64 bytes;
431         int count;
432         int broken;
433         struct list_head list;
434 };
435
436 struct root_item_info {
437         /* level of the root */
438         u8 level;
439         /* number of nodes at this level, must be 1 for a root */
440         int node_count;
441         u64 bytenr;
442         u64 gen;
443         struct cache_extent cache_extent;
444 };
445
446 /*
447  * Error bit for low memory mode check.
448  *
449  * Currently no caller cares about it yet.  Just internal use for error
450  * classification.
451  */
452 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
453 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
454 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
455 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
456 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
457 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
458 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
459 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
460 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
461 #define CHUNK_TYPE_MISMATCH     (1 << 8)
462
463 static void *print_status_check(void *p)
464 {
465         struct task_ctx *priv = p;
466         const char work_indicator[] = { '.', 'o', 'O', 'o' };
467         uint32_t count = 0;
468         static char *task_position_string[] = {
469                 "checking extents",
470                 "checking free space cache",
471                 "checking fs roots",
472         };
473
474         task_period_start(priv->info, 1000 /* 1s */);
475
476         if (priv->tp == TASK_NOTHING)
477                 return NULL;
478
479         while (1) {
480                 printf("%s [%c]\r", task_position_string[priv->tp],
481                                 work_indicator[count % 4]);
482                 count++;
483                 fflush(stdout);
484                 task_period_wait(priv->info);
485         }
486         return NULL;
487 }
488
489 static int print_status_return(void *p)
490 {
491         printf("\n");
492         fflush(stdout);
493
494         return 0;
495 }
496
497 static enum btrfs_check_mode parse_check_mode(const char *str)
498 {
499         if (strcmp(str, "lowmem") == 0)
500                 return CHECK_MODE_LOWMEM;
501         if (strcmp(str, "orig") == 0)
502                 return CHECK_MODE_ORIGINAL;
503         if (strcmp(str, "original") == 0)
504                 return CHECK_MODE_ORIGINAL;
505
506         return CHECK_MODE_UNKNOWN;
507 }
508
509 /* Compatible function to allow reuse of old codes */
510 static u64 first_extent_gap(struct rb_root *holes)
511 {
512         struct file_extent_hole *hole;
513
514         if (RB_EMPTY_ROOT(holes))
515                 return (u64)-1;
516
517         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
518         return hole->start;
519 }
520
521 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
522 {
523         struct file_extent_hole *hole1;
524         struct file_extent_hole *hole2;
525
526         hole1 = rb_entry(node1, struct file_extent_hole, node);
527         hole2 = rb_entry(node2, struct file_extent_hole, node);
528
529         if (hole1->start > hole2->start)
530                 return -1;
531         if (hole1->start < hole2->start)
532                 return 1;
533         /* Now hole1->start == hole2->start */
534         if (hole1->len >= hole2->len)
535                 /*
536                  * Hole 1 will be merge center
537                  * Same hole will be merged later
538                  */
539                 return -1;
540         /* Hole 2 will be merge center */
541         return 1;
542 }
543
544 /*
545  * Add a hole to the record
546  *
547  * This will do hole merge for copy_file_extent_holes(),
548  * which will ensure there won't be continuous holes.
549  */
550 static int add_file_extent_hole(struct rb_root *holes,
551                                 u64 start, u64 len)
552 {
553         struct file_extent_hole *hole;
554         struct file_extent_hole *prev = NULL;
555         struct file_extent_hole *next = NULL;
556
557         hole = malloc(sizeof(*hole));
558         if (!hole)
559                 return -ENOMEM;
560         hole->start = start;
561         hole->len = len;
562         /* Since compare will not return 0, no -EEXIST will happen */
563         rb_insert(holes, &hole->node, compare_hole);
564
565         /* simple merge with previous hole */
566         if (rb_prev(&hole->node))
567                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
568                                 node);
569         if (prev && prev->start + prev->len >= hole->start) {
570                 hole->len = hole->start + hole->len - prev->start;
571                 hole->start = prev->start;
572                 rb_erase(&prev->node, holes);
573                 free(prev);
574                 prev = NULL;
575         }
576
577         /* iterate merge with next holes */
578         while (1) {
579                 if (!rb_next(&hole->node))
580                         break;
581                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
582                                         node);
583                 if (hole->start + hole->len >= next->start) {
584                         if (hole->start + hole->len <= next->start + next->len)
585                                 hole->len = next->start + next->len -
586                                             hole->start;
587                         rb_erase(&next->node, holes);
588                         free(next);
589                         next = NULL;
590                 } else
591                         break;
592         }
593         return 0;
594 }
595
596 static int compare_hole_range(struct rb_node *node, void *data)
597 {
598         struct file_extent_hole *hole;
599         u64 start;
600
601         hole = (struct file_extent_hole *)data;
602         start = hole->start;
603
604         hole = rb_entry(node, struct file_extent_hole, node);
605         if (start < hole->start)
606                 return -1;
607         if (start >= hole->start && start < hole->start + hole->len)
608                 return 0;
609         return 1;
610 }
611
612 /*
613  * Delete a hole in the record
614  *
615  * This will do the hole split and is much restrict than add.
616  */
617 static int del_file_extent_hole(struct rb_root *holes,
618                                 u64 start, u64 len)
619 {
620         struct file_extent_hole *hole;
621         struct file_extent_hole tmp;
622         u64 prev_start = 0;
623         u64 prev_len = 0;
624         u64 next_start = 0;
625         u64 next_len = 0;
626         struct rb_node *node;
627         int have_prev = 0;
628         int have_next = 0;
629         int ret = 0;
630
631         tmp.start = start;
632         tmp.len = len;
633         node = rb_search(holes, &tmp, compare_hole_range, NULL);
634         if (!node)
635                 return -EEXIST;
636         hole = rb_entry(node, struct file_extent_hole, node);
637         if (start + len > hole->start + hole->len)
638                 return -EEXIST;
639
640         /*
641          * Now there will be no overlap, delete the hole and re-add the
642          * split(s) if they exists.
643          */
644         if (start > hole->start) {
645                 prev_start = hole->start;
646                 prev_len = start - hole->start;
647                 have_prev = 1;
648         }
649         if (hole->start + hole->len > start + len) {
650                 next_start = start + len;
651                 next_len = hole->start + hole->len - start - len;
652                 have_next = 1;
653         }
654         rb_erase(node, holes);
655         free(hole);
656         if (have_prev) {
657                 ret = add_file_extent_hole(holes, prev_start, prev_len);
658                 if (ret < 0)
659                         return ret;
660         }
661         if (have_next) {
662                 ret = add_file_extent_hole(holes, next_start, next_len);
663                 if (ret < 0)
664                         return ret;
665         }
666         return 0;
667 }
668
669 static int copy_file_extent_holes(struct rb_root *dst,
670                                   struct rb_root *src)
671 {
672         struct file_extent_hole *hole;
673         struct rb_node *node;
674         int ret = 0;
675
676         node = rb_first(src);
677         while (node) {
678                 hole = rb_entry(node, struct file_extent_hole, node);
679                 ret = add_file_extent_hole(dst, hole->start, hole->len);
680                 if (ret)
681                         break;
682                 node = rb_next(node);
683         }
684         return ret;
685 }
686
687 static void free_file_extent_holes(struct rb_root *holes)
688 {
689         struct rb_node *node;
690         struct file_extent_hole *hole;
691
692         node = rb_first(holes);
693         while (node) {
694                 hole = rb_entry(node, struct file_extent_hole, node);
695                 rb_erase(node, holes);
696                 free(hole);
697                 node = rb_first(holes);
698         }
699 }
700
701 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
702
703 static void record_root_in_trans(struct btrfs_trans_handle *trans,
704                                  struct btrfs_root *root)
705 {
706         if (root->last_trans != trans->transid) {
707                 root->track_dirty = 1;
708                 root->last_trans = trans->transid;
709                 root->commit_root = root->node;
710                 extent_buffer_get(root->node);
711         }
712 }
713
714 static u8 imode_to_type(u32 imode)
715 {
716 #define S_SHIFT 12
717         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
718                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
719                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
720                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
721                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
722                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
723                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
724                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
725         };
726
727         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
728 #undef S_SHIFT
729 }
730
731 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
732 {
733         struct device_record *rec1;
734         struct device_record *rec2;
735
736         rec1 = rb_entry(node1, struct device_record, node);
737         rec2 = rb_entry(node2, struct device_record, node);
738         if (rec1->devid > rec2->devid)
739                 return -1;
740         else if (rec1->devid < rec2->devid)
741                 return 1;
742         else
743                 return 0;
744 }
745
746 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
747 {
748         struct inode_record *rec;
749         struct inode_backref *backref;
750         struct inode_backref *orig;
751         struct inode_backref *tmp;
752         struct orphan_data_extent *src_orphan;
753         struct orphan_data_extent *dst_orphan;
754         size_t size;
755         int ret;
756
757         rec = malloc(sizeof(*rec));
758         if (!rec)
759                 return ERR_PTR(-ENOMEM);
760         memcpy(rec, orig_rec, sizeof(*rec));
761         rec->refs = 1;
762         INIT_LIST_HEAD(&rec->backrefs);
763         INIT_LIST_HEAD(&rec->orphan_extents);
764         rec->holes = RB_ROOT;
765
766         list_for_each_entry(orig, &orig_rec->backrefs, list) {
767                 size = sizeof(*orig) + orig->namelen + 1;
768                 backref = malloc(size);
769                 if (!backref) {
770                         ret = -ENOMEM;
771                         goto cleanup;
772                 }
773                 memcpy(backref, orig, size);
774                 list_add_tail(&backref->list, &rec->backrefs);
775         }
776         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
777                 dst_orphan = malloc(sizeof(*dst_orphan));
778                 if (!dst_orphan) {
779                         ret = -ENOMEM;
780                         goto cleanup;
781                 }
782                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
783                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
784         }
785         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
786         BUG_ON(ret < 0);
787
788         return rec;
789
790 cleanup:
791         if (!list_empty(&rec->backrefs))
792                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
793                         list_del(&orig->list);
794                         free(orig);
795                 }
796
797         if (!list_empty(&rec->orphan_extents))
798                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
799                         list_del(&orig->list);
800                         free(orig);
801                 }
802
803         free(rec);
804
805         return ERR_PTR(ret);
806 }
807
808 static void print_orphan_data_extents(struct list_head *orphan_extents,
809                                       u64 objectid)
810 {
811         struct orphan_data_extent *orphan;
812
813         if (list_empty(orphan_extents))
814                 return;
815         printf("The following data extent is lost in tree %llu:\n",
816                objectid);
817         list_for_each_entry(orphan, orphan_extents, list) {
818                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
819                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
820                        orphan->disk_len);
821         }
822 }
823
824 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
825 {
826         u64 root_objectid = root->root_key.objectid;
827         int errors = rec->errors;
828
829         if (!errors)
830                 return;
831         /* reloc root errors, we print its corresponding fs root objectid*/
832         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
833                 root_objectid = root->root_key.offset;
834                 fprintf(stderr, "reloc");
835         }
836         fprintf(stderr, "root %llu inode %llu errors %x",
837                 (unsigned long long) root_objectid,
838                 (unsigned long long) rec->ino, rec->errors);
839
840         if (errors & I_ERR_NO_INODE_ITEM)
841                 fprintf(stderr, ", no inode item");
842         if (errors & I_ERR_NO_ORPHAN_ITEM)
843                 fprintf(stderr, ", no orphan item");
844         if (errors & I_ERR_DUP_INODE_ITEM)
845                 fprintf(stderr, ", dup inode item");
846         if (errors & I_ERR_DUP_DIR_INDEX)
847                 fprintf(stderr, ", dup dir index");
848         if (errors & I_ERR_ODD_DIR_ITEM)
849                 fprintf(stderr, ", odd dir item");
850         if (errors & I_ERR_ODD_FILE_EXTENT)
851                 fprintf(stderr, ", odd file extent");
852         if (errors & I_ERR_BAD_FILE_EXTENT)
853                 fprintf(stderr, ", bad file extent");
854         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
855                 fprintf(stderr, ", file extent overlap");
856         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
857                 fprintf(stderr, ", file extent discount");
858         if (errors & I_ERR_DIR_ISIZE_WRONG)
859                 fprintf(stderr, ", dir isize wrong");
860         if (errors & I_ERR_FILE_NBYTES_WRONG)
861                 fprintf(stderr, ", nbytes wrong");
862         if (errors & I_ERR_ODD_CSUM_ITEM)
863                 fprintf(stderr, ", odd csum item");
864         if (errors & I_ERR_SOME_CSUM_MISSING)
865                 fprintf(stderr, ", some csum missing");
866         if (errors & I_ERR_LINK_COUNT_WRONG)
867                 fprintf(stderr, ", link count wrong");
868         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
869                 fprintf(stderr, ", orphan file extent");
870         fprintf(stderr, "\n");
871         /* Print the orphan extents if needed */
872         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
873                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
874
875         /* Print the holes if needed */
876         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
877                 struct file_extent_hole *hole;
878                 struct rb_node *node;
879                 int found = 0;
880
881                 node = rb_first(&rec->holes);
882                 fprintf(stderr, "Found file extent holes:\n");
883                 while (node) {
884                         found = 1;
885                         hole = rb_entry(node, struct file_extent_hole, node);
886                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
887                                 hole->start, hole->len);
888                         node = rb_next(node);
889                 }
890                 if (!found)
891                         fprintf(stderr, "\tstart: 0, len: %llu\n",
892                                 round_up(rec->isize, root->sectorsize));
893         }
894 }
895
896 static void print_ref_error(int errors)
897 {
898         if (errors & REF_ERR_NO_DIR_ITEM)
899                 fprintf(stderr, ", no dir item");
900         if (errors & REF_ERR_NO_DIR_INDEX)
901                 fprintf(stderr, ", no dir index");
902         if (errors & REF_ERR_NO_INODE_REF)
903                 fprintf(stderr, ", no inode ref");
904         if (errors & REF_ERR_DUP_DIR_ITEM)
905                 fprintf(stderr, ", dup dir item");
906         if (errors & REF_ERR_DUP_DIR_INDEX)
907                 fprintf(stderr, ", dup dir index");
908         if (errors & REF_ERR_DUP_INODE_REF)
909                 fprintf(stderr, ", dup inode ref");
910         if (errors & REF_ERR_INDEX_UNMATCH)
911                 fprintf(stderr, ", index mismatch");
912         if (errors & REF_ERR_FILETYPE_UNMATCH)
913                 fprintf(stderr, ", filetype mismatch");
914         if (errors & REF_ERR_NAME_TOO_LONG)
915                 fprintf(stderr, ", name too long");
916         if (errors & REF_ERR_NO_ROOT_REF)
917                 fprintf(stderr, ", no root ref");
918         if (errors & REF_ERR_NO_ROOT_BACKREF)
919                 fprintf(stderr, ", no root backref");
920         if (errors & REF_ERR_DUP_ROOT_REF)
921                 fprintf(stderr, ", dup root ref");
922         if (errors & REF_ERR_DUP_ROOT_BACKREF)
923                 fprintf(stderr, ", dup root backref");
924         fprintf(stderr, "\n");
925 }
926
927 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
928                                           u64 ino, int mod)
929 {
930         struct ptr_node *node;
931         struct cache_extent *cache;
932         struct inode_record *rec = NULL;
933         int ret;
934
935         cache = lookup_cache_extent(inode_cache, ino, 1);
936         if (cache) {
937                 node = container_of(cache, struct ptr_node, cache);
938                 rec = node->data;
939                 if (mod && rec->refs > 1) {
940                         node->data = clone_inode_rec(rec);
941                         if (IS_ERR(node->data))
942                                 return node->data;
943                         rec->refs--;
944                         rec = node->data;
945                 }
946         } else if (mod) {
947                 rec = calloc(1, sizeof(*rec));
948                 if (!rec)
949                         return ERR_PTR(-ENOMEM);
950                 rec->ino = ino;
951                 rec->extent_start = (u64)-1;
952                 rec->refs = 1;
953                 INIT_LIST_HEAD(&rec->backrefs);
954                 INIT_LIST_HEAD(&rec->orphan_extents);
955                 rec->holes = RB_ROOT;
956
957                 node = malloc(sizeof(*node));
958                 if (!node) {
959                         free(rec);
960                         return ERR_PTR(-ENOMEM);
961                 }
962                 node->cache.start = ino;
963                 node->cache.size = 1;
964                 node->data = rec;
965
966                 if (ino == BTRFS_FREE_INO_OBJECTID)
967                         rec->found_link = 1;
968
969                 ret = insert_cache_extent(inode_cache, &node->cache);
970                 if (ret)
971                         return ERR_PTR(-EEXIST);
972         }
973         return rec;
974 }
975
976 static void free_orphan_data_extents(struct list_head *orphan_extents)
977 {
978         struct orphan_data_extent *orphan;
979
980         while (!list_empty(orphan_extents)) {
981                 orphan = list_entry(orphan_extents->next,
982                                     struct orphan_data_extent, list);
983                 list_del(&orphan->list);
984                 free(orphan);
985         }
986 }
987
988 static void free_inode_rec(struct inode_record *rec)
989 {
990         struct inode_backref *backref;
991
992         if (--rec->refs > 0)
993                 return;
994
995         while (!list_empty(&rec->backrefs)) {
996                 backref = to_inode_backref(rec->backrefs.next);
997                 list_del(&backref->list);
998                 free(backref);
999         }
1000         free_orphan_data_extents(&rec->orphan_extents);
1001         free_file_extent_holes(&rec->holes);
1002         free(rec);
1003 }
1004
1005 static int can_free_inode_rec(struct inode_record *rec)
1006 {
1007         if (!rec->errors && rec->checked && rec->found_inode_item &&
1008             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1009                 return 1;
1010         return 0;
1011 }
1012
1013 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1014                                  struct inode_record *rec)
1015 {
1016         struct cache_extent *cache;
1017         struct inode_backref *tmp, *backref;
1018         struct ptr_node *node;
1019         unsigned char filetype;
1020
1021         if (!rec->found_inode_item)
1022                 return;
1023
1024         filetype = imode_to_type(rec->imode);
1025         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1026                 if (backref->found_dir_item && backref->found_dir_index) {
1027                         if (backref->filetype != filetype)
1028                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1029                         if (!backref->errors && backref->found_inode_ref &&
1030                             rec->nlink == rec->found_link) {
1031                                 list_del(&backref->list);
1032                                 free(backref);
1033                         }
1034                 }
1035         }
1036
1037         if (!rec->checked || rec->merging)
1038                 return;
1039
1040         if (S_ISDIR(rec->imode)) {
1041                 if (rec->found_size != rec->isize)
1042                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1043                 if (rec->found_file_extent)
1044                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1045         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1046                 if (rec->found_dir_item)
1047                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1048                 if (rec->found_size != rec->nbytes)
1049                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1050                 if (rec->nlink > 0 && !no_holes &&
1051                     (rec->extent_end < rec->isize ||
1052                      first_extent_gap(&rec->holes) < rec->isize))
1053                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1054         }
1055
1056         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1057                 if (rec->found_csum_item && rec->nodatasum)
1058                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1059                 if (rec->some_csum_missing && !rec->nodatasum)
1060                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1061         }
1062
1063         BUG_ON(rec->refs != 1);
1064         if (can_free_inode_rec(rec)) {
1065                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1066                 node = container_of(cache, struct ptr_node, cache);
1067                 BUG_ON(node->data != rec);
1068                 remove_cache_extent(inode_cache, &node->cache);
1069                 free(node);
1070                 free_inode_rec(rec);
1071         }
1072 }
1073
1074 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1075 {
1076         struct btrfs_path path;
1077         struct btrfs_key key;
1078         int ret;
1079
1080         key.objectid = BTRFS_ORPHAN_OBJECTID;
1081         key.type = BTRFS_ORPHAN_ITEM_KEY;
1082         key.offset = ino;
1083
1084         btrfs_init_path(&path);
1085         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1086         btrfs_release_path(&path);
1087         if (ret > 0)
1088                 ret = -ENOENT;
1089         return ret;
1090 }
1091
1092 static int process_inode_item(struct extent_buffer *eb,
1093                               int slot, struct btrfs_key *key,
1094                               struct shared_node *active_node)
1095 {
1096         struct inode_record *rec;
1097         struct btrfs_inode_item *item;
1098
1099         rec = active_node->current;
1100         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1101         if (rec->found_inode_item) {
1102                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1103                 return 1;
1104         }
1105         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1106         rec->nlink = btrfs_inode_nlink(eb, item);
1107         rec->isize = btrfs_inode_size(eb, item);
1108         rec->nbytes = btrfs_inode_nbytes(eb, item);
1109         rec->imode = btrfs_inode_mode(eb, item);
1110         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1111                 rec->nodatasum = 1;
1112         rec->found_inode_item = 1;
1113         if (rec->nlink == 0)
1114                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1115         maybe_free_inode_rec(&active_node->inode_cache, rec);
1116         return 0;
1117 }
1118
1119 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1120                                                 const char *name,
1121                                                 int namelen, u64 dir)
1122 {
1123         struct inode_backref *backref;
1124
1125         list_for_each_entry(backref, &rec->backrefs, list) {
1126                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1127                         break;
1128                 if (backref->dir != dir || backref->namelen != namelen)
1129                         continue;
1130                 if (memcmp(name, backref->name, namelen))
1131                         continue;
1132                 return backref;
1133         }
1134
1135         backref = malloc(sizeof(*backref) + namelen + 1);
1136         if (!backref)
1137                 return NULL;
1138         memset(backref, 0, sizeof(*backref));
1139         backref->dir = dir;
1140         backref->namelen = namelen;
1141         memcpy(backref->name, name, namelen);
1142         backref->name[namelen] = '\0';
1143         list_add_tail(&backref->list, &rec->backrefs);
1144         return backref;
1145 }
1146
1147 static int add_inode_backref(struct cache_tree *inode_cache,
1148                              u64 ino, u64 dir, u64 index,
1149                              const char *name, int namelen,
1150                              int filetype, int itemtype, int errors)
1151 {
1152         struct inode_record *rec;
1153         struct inode_backref *backref;
1154
1155         rec = get_inode_rec(inode_cache, ino, 1);
1156         BUG_ON(IS_ERR(rec));
1157         backref = get_inode_backref(rec, name, namelen, dir);
1158         BUG_ON(!backref);
1159         if (errors)
1160                 backref->errors |= errors;
1161         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1162                 if (backref->found_dir_index)
1163                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1164                 if (backref->found_inode_ref && backref->index != index)
1165                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1166                 if (backref->found_dir_item && backref->filetype != filetype)
1167                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1168
1169                 backref->index = index;
1170                 backref->filetype = filetype;
1171                 backref->found_dir_index = 1;
1172         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1173                 rec->found_link++;
1174                 if (backref->found_dir_item)
1175                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1176                 if (backref->found_dir_index && backref->filetype != filetype)
1177                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1178
1179                 backref->filetype = filetype;
1180                 backref->found_dir_item = 1;
1181         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1182                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1183                 if (backref->found_inode_ref)
1184                         backref->errors |= REF_ERR_DUP_INODE_REF;
1185                 if (backref->found_dir_index && backref->index != index)
1186                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1187                 else
1188                         backref->index = index;
1189
1190                 backref->ref_type = itemtype;
1191                 backref->found_inode_ref = 1;
1192         } else {
1193                 BUG_ON(1);
1194         }
1195
1196         maybe_free_inode_rec(inode_cache, rec);
1197         return 0;
1198 }
1199
1200 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1201                             struct cache_tree *dst_cache)
1202 {
1203         struct inode_backref *backref;
1204         u32 dir_count = 0;
1205         int ret = 0;
1206
1207         dst->merging = 1;
1208         list_for_each_entry(backref, &src->backrefs, list) {
1209                 if (backref->found_dir_index) {
1210                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1211                                         backref->index, backref->name,
1212                                         backref->namelen, backref->filetype,
1213                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1214                 }
1215                 if (backref->found_dir_item) {
1216                         dir_count++;
1217                         add_inode_backref(dst_cache, dst->ino,
1218                                         backref->dir, 0, backref->name,
1219                                         backref->namelen, backref->filetype,
1220                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1221                 }
1222                 if (backref->found_inode_ref) {
1223                         add_inode_backref(dst_cache, dst->ino,
1224                                         backref->dir, backref->index,
1225                                         backref->name, backref->namelen, 0,
1226                                         backref->ref_type, backref->errors);
1227                 }
1228         }
1229
1230         if (src->found_dir_item)
1231                 dst->found_dir_item = 1;
1232         if (src->found_file_extent)
1233                 dst->found_file_extent = 1;
1234         if (src->found_csum_item)
1235                 dst->found_csum_item = 1;
1236         if (src->some_csum_missing)
1237                 dst->some_csum_missing = 1;
1238         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1239                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1240                 if (ret < 0)
1241                         return ret;
1242         }
1243
1244         BUG_ON(src->found_link < dir_count);
1245         dst->found_link += src->found_link - dir_count;
1246         dst->found_size += src->found_size;
1247         if (src->extent_start != (u64)-1) {
1248                 if (dst->extent_start == (u64)-1) {
1249                         dst->extent_start = src->extent_start;
1250                         dst->extent_end = src->extent_end;
1251                 } else {
1252                         if (dst->extent_end > src->extent_start)
1253                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1254                         else if (dst->extent_end < src->extent_start) {
1255                                 ret = add_file_extent_hole(&dst->holes,
1256                                         dst->extent_end,
1257                                         src->extent_start - dst->extent_end);
1258                         }
1259                         if (dst->extent_end < src->extent_end)
1260                                 dst->extent_end = src->extent_end;
1261                 }
1262         }
1263
1264         dst->errors |= src->errors;
1265         if (src->found_inode_item) {
1266                 if (!dst->found_inode_item) {
1267                         dst->nlink = src->nlink;
1268                         dst->isize = src->isize;
1269                         dst->nbytes = src->nbytes;
1270                         dst->imode = src->imode;
1271                         dst->nodatasum = src->nodatasum;
1272                         dst->found_inode_item = 1;
1273                 } else {
1274                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1275                 }
1276         }
1277         dst->merging = 0;
1278
1279         return 0;
1280 }
1281
1282 static int splice_shared_node(struct shared_node *src_node,
1283                               struct shared_node *dst_node)
1284 {
1285         struct cache_extent *cache;
1286         struct ptr_node *node, *ins;
1287         struct cache_tree *src, *dst;
1288         struct inode_record *rec, *conflict;
1289         u64 current_ino = 0;
1290         int splice = 0;
1291         int ret;
1292
1293         if (--src_node->refs == 0)
1294                 splice = 1;
1295         if (src_node->current)
1296                 current_ino = src_node->current->ino;
1297
1298         src = &src_node->root_cache;
1299         dst = &dst_node->root_cache;
1300 again:
1301         cache = search_cache_extent(src, 0);
1302         while (cache) {
1303                 node = container_of(cache, struct ptr_node, cache);
1304                 rec = node->data;
1305                 cache = next_cache_extent(cache);
1306
1307                 if (splice) {
1308                         remove_cache_extent(src, &node->cache);
1309                         ins = node;
1310                 } else {
1311                         ins = malloc(sizeof(*ins));
1312                         BUG_ON(!ins);
1313                         ins->cache.start = node->cache.start;
1314                         ins->cache.size = node->cache.size;
1315                         ins->data = rec;
1316                         rec->refs++;
1317                 }
1318                 ret = insert_cache_extent(dst, &ins->cache);
1319                 if (ret == -EEXIST) {
1320                         conflict = get_inode_rec(dst, rec->ino, 1);
1321                         BUG_ON(IS_ERR(conflict));
1322                         merge_inode_recs(rec, conflict, dst);
1323                         if (rec->checked) {
1324                                 conflict->checked = 1;
1325                                 if (dst_node->current == conflict)
1326                                         dst_node->current = NULL;
1327                         }
1328                         maybe_free_inode_rec(dst, conflict);
1329                         free_inode_rec(rec);
1330                         free(ins);
1331                 } else {
1332                         BUG_ON(ret);
1333                 }
1334         }
1335
1336         if (src == &src_node->root_cache) {
1337                 src = &src_node->inode_cache;
1338                 dst = &dst_node->inode_cache;
1339                 goto again;
1340         }
1341
1342         if (current_ino > 0 && (!dst_node->current ||
1343             current_ino > dst_node->current->ino)) {
1344                 if (dst_node->current) {
1345                         dst_node->current->checked = 1;
1346                         maybe_free_inode_rec(dst, dst_node->current);
1347                 }
1348                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1349                 BUG_ON(IS_ERR(dst_node->current));
1350         }
1351         return 0;
1352 }
1353
1354 static void free_inode_ptr(struct cache_extent *cache)
1355 {
1356         struct ptr_node *node;
1357         struct inode_record *rec;
1358
1359         node = container_of(cache, struct ptr_node, cache);
1360         rec = node->data;
1361         free_inode_rec(rec);
1362         free(node);
1363 }
1364
1365 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1366
1367 static struct shared_node *find_shared_node(struct cache_tree *shared,
1368                                             u64 bytenr)
1369 {
1370         struct cache_extent *cache;
1371         struct shared_node *node;
1372
1373         cache = lookup_cache_extent(shared, bytenr, 1);
1374         if (cache) {
1375                 node = container_of(cache, struct shared_node, cache);
1376                 return node;
1377         }
1378         return NULL;
1379 }
1380
1381 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1382 {
1383         int ret;
1384         struct shared_node *node;
1385
1386         node = calloc(1, sizeof(*node));
1387         if (!node)
1388                 return -ENOMEM;
1389         node->cache.start = bytenr;
1390         node->cache.size = 1;
1391         cache_tree_init(&node->root_cache);
1392         cache_tree_init(&node->inode_cache);
1393         node->refs = refs;
1394
1395         ret = insert_cache_extent(shared, &node->cache);
1396
1397         return ret;
1398 }
1399
1400 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1401                              struct walk_control *wc, int level)
1402 {
1403         struct shared_node *node;
1404         struct shared_node *dest;
1405         int ret;
1406
1407         if (level == wc->active_node)
1408                 return 0;
1409
1410         BUG_ON(wc->active_node <= level);
1411         node = find_shared_node(&wc->shared, bytenr);
1412         if (!node) {
1413                 ret = add_shared_node(&wc->shared, bytenr, refs);
1414                 BUG_ON(ret);
1415                 node = find_shared_node(&wc->shared, bytenr);
1416                 wc->nodes[level] = node;
1417                 wc->active_node = level;
1418                 return 0;
1419         }
1420
1421         if (wc->root_level == wc->active_node &&
1422             btrfs_root_refs(&root->root_item) == 0) {
1423                 if (--node->refs == 0) {
1424                         free_inode_recs_tree(&node->root_cache);
1425                         free_inode_recs_tree(&node->inode_cache);
1426                         remove_cache_extent(&wc->shared, &node->cache);
1427                         free(node);
1428                 }
1429                 return 1;
1430         }
1431
1432         dest = wc->nodes[wc->active_node];
1433         splice_shared_node(node, dest);
1434         if (node->refs == 0) {
1435                 remove_cache_extent(&wc->shared, &node->cache);
1436                 free(node);
1437         }
1438         return 1;
1439 }
1440
1441 static int leave_shared_node(struct btrfs_root *root,
1442                              struct walk_control *wc, int level)
1443 {
1444         struct shared_node *node;
1445         struct shared_node *dest;
1446         int i;
1447
1448         if (level == wc->root_level)
1449                 return 0;
1450
1451         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1452                 if (wc->nodes[i])
1453                         break;
1454         }
1455         BUG_ON(i >= BTRFS_MAX_LEVEL);
1456
1457         node = wc->nodes[wc->active_node];
1458         wc->nodes[wc->active_node] = NULL;
1459         wc->active_node = i;
1460
1461         dest = wc->nodes[wc->active_node];
1462         if (wc->active_node < wc->root_level ||
1463             btrfs_root_refs(&root->root_item) > 0) {
1464                 BUG_ON(node->refs <= 1);
1465                 splice_shared_node(node, dest);
1466         } else {
1467                 BUG_ON(node->refs < 2);
1468                 node->refs--;
1469         }
1470         return 0;
1471 }
1472
1473 /*
1474  * Returns:
1475  * < 0 - on error
1476  * 1   - if the root with id child_root_id is a child of root parent_root_id
1477  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1478  *       has other root(s) as parent(s)
1479  * 2   - if the root child_root_id doesn't have any parent roots
1480  */
1481 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1482                          u64 child_root_id)
1483 {
1484         struct btrfs_path path;
1485         struct btrfs_key key;
1486         struct extent_buffer *leaf;
1487         int has_parent = 0;
1488         int ret;
1489
1490         btrfs_init_path(&path);
1491
1492         key.objectid = parent_root_id;
1493         key.type = BTRFS_ROOT_REF_KEY;
1494         key.offset = child_root_id;
1495         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1496                                 0, 0);
1497         if (ret < 0)
1498                 return ret;
1499         btrfs_release_path(&path);
1500         if (!ret)
1501                 return 1;
1502
1503         key.objectid = child_root_id;
1504         key.type = BTRFS_ROOT_BACKREF_KEY;
1505         key.offset = 0;
1506         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1507                                 0, 0);
1508         if (ret < 0)
1509                 goto out;
1510
1511         while (1) {
1512                 leaf = path.nodes[0];
1513                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1514                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1515                         if (ret)
1516                                 break;
1517                         leaf = path.nodes[0];
1518                 }
1519
1520                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1521                 if (key.objectid != child_root_id ||
1522                     key.type != BTRFS_ROOT_BACKREF_KEY)
1523                         break;
1524
1525                 has_parent = 1;
1526
1527                 if (key.offset == parent_root_id) {
1528                         btrfs_release_path(&path);
1529                         return 1;
1530                 }
1531
1532                 path.slots[0]++;
1533         }
1534 out:
1535         btrfs_release_path(&path);
1536         if (ret < 0)
1537                 return ret;
1538         return has_parent ? 0 : 2;
1539 }
1540
1541 static int process_dir_item(struct btrfs_root *root,
1542                             struct extent_buffer *eb,
1543                             int slot, struct btrfs_key *key,
1544                             struct shared_node *active_node)
1545 {
1546         u32 total;
1547         u32 cur = 0;
1548         u32 len;
1549         u32 name_len;
1550         u32 data_len;
1551         int error;
1552         int nritems = 0;
1553         int filetype;
1554         struct btrfs_dir_item *di;
1555         struct inode_record *rec;
1556         struct cache_tree *root_cache;
1557         struct cache_tree *inode_cache;
1558         struct btrfs_key location;
1559         char namebuf[BTRFS_NAME_LEN];
1560
1561         root_cache = &active_node->root_cache;
1562         inode_cache = &active_node->inode_cache;
1563         rec = active_node->current;
1564         rec->found_dir_item = 1;
1565
1566         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1567         total = btrfs_item_size_nr(eb, slot);
1568         while (cur < total) {
1569                 nritems++;
1570                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1571                 name_len = btrfs_dir_name_len(eb, di);
1572                 data_len = btrfs_dir_data_len(eb, di);
1573                 filetype = btrfs_dir_type(eb, di);
1574
1575                 rec->found_size += name_len;
1576                 if (name_len <= BTRFS_NAME_LEN) {
1577                         len = name_len;
1578                         error = 0;
1579                 } else {
1580                         len = BTRFS_NAME_LEN;
1581                         error = REF_ERR_NAME_TOO_LONG;
1582                 }
1583                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1584
1585                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1586                         add_inode_backref(inode_cache, location.objectid,
1587                                           key->objectid, key->offset, namebuf,
1588                                           len, filetype, key->type, error);
1589                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1590                         add_inode_backref(root_cache, location.objectid,
1591                                           key->objectid, key->offset,
1592                                           namebuf, len, filetype,
1593                                           key->type, error);
1594                 } else {
1595                         fprintf(stderr, "invalid location in dir item %u\n",
1596                                 location.type);
1597                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1598                                           key->objectid, key->offset, namebuf,
1599                                           len, filetype, key->type, error);
1600                 }
1601
1602                 len = sizeof(*di) + name_len + data_len;
1603                 di = (struct btrfs_dir_item *)((char *)di + len);
1604                 cur += len;
1605         }
1606         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1607                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1608
1609         return 0;
1610 }
1611
1612 static int process_inode_ref(struct extent_buffer *eb,
1613                              int slot, struct btrfs_key *key,
1614                              struct shared_node *active_node)
1615 {
1616         u32 total;
1617         u32 cur = 0;
1618         u32 len;
1619         u32 name_len;
1620         u64 index;
1621         int error;
1622         struct cache_tree *inode_cache;
1623         struct btrfs_inode_ref *ref;
1624         char namebuf[BTRFS_NAME_LEN];
1625
1626         inode_cache = &active_node->inode_cache;
1627
1628         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1629         total = btrfs_item_size_nr(eb, slot);
1630         while (cur < total) {
1631                 name_len = btrfs_inode_ref_name_len(eb, ref);
1632                 index = btrfs_inode_ref_index(eb, ref);
1633                 if (name_len <= BTRFS_NAME_LEN) {
1634                         len = name_len;
1635                         error = 0;
1636                 } else {
1637                         len = BTRFS_NAME_LEN;
1638                         error = REF_ERR_NAME_TOO_LONG;
1639                 }
1640                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1641                 add_inode_backref(inode_cache, key->objectid, key->offset,
1642                                   index, namebuf, len, 0, key->type, error);
1643
1644                 len = sizeof(*ref) + name_len;
1645                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1646                 cur += len;
1647         }
1648         return 0;
1649 }
1650
1651 static int process_inode_extref(struct extent_buffer *eb,
1652                                 int slot, struct btrfs_key *key,
1653                                 struct shared_node *active_node)
1654 {
1655         u32 total;
1656         u32 cur = 0;
1657         u32 len;
1658         u32 name_len;
1659         u64 index;
1660         u64 parent;
1661         int error;
1662         struct cache_tree *inode_cache;
1663         struct btrfs_inode_extref *extref;
1664         char namebuf[BTRFS_NAME_LEN];
1665
1666         inode_cache = &active_node->inode_cache;
1667
1668         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1669         total = btrfs_item_size_nr(eb, slot);
1670         while (cur < total) {
1671                 name_len = btrfs_inode_extref_name_len(eb, extref);
1672                 index = btrfs_inode_extref_index(eb, extref);
1673                 parent = btrfs_inode_extref_parent(eb, extref);
1674                 if (name_len <= BTRFS_NAME_LEN) {
1675                         len = name_len;
1676                         error = 0;
1677                 } else {
1678                         len = BTRFS_NAME_LEN;
1679                         error = REF_ERR_NAME_TOO_LONG;
1680                 }
1681                 read_extent_buffer(eb, namebuf,
1682                                    (unsigned long)(extref + 1), len);
1683                 add_inode_backref(inode_cache, key->objectid, parent,
1684                                   index, namebuf, len, 0, key->type, error);
1685
1686                 len = sizeof(*extref) + name_len;
1687                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1688                 cur += len;
1689         }
1690         return 0;
1691
1692 }
1693
1694 static int count_csum_range(struct btrfs_root *root, u64 start,
1695                             u64 len, u64 *found)
1696 {
1697         struct btrfs_key key;
1698         struct btrfs_path path;
1699         struct extent_buffer *leaf;
1700         int ret;
1701         size_t size;
1702         *found = 0;
1703         u64 csum_end;
1704         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1705
1706         btrfs_init_path(&path);
1707
1708         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1709         key.offset = start;
1710         key.type = BTRFS_EXTENT_CSUM_KEY;
1711
1712         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1713                                 &key, &path, 0, 0);
1714         if (ret < 0)
1715                 goto out;
1716         if (ret > 0 && path.slots[0] > 0) {
1717                 leaf = path.nodes[0];
1718                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1719                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1720                     key.type == BTRFS_EXTENT_CSUM_KEY)
1721                         path.slots[0]--;
1722         }
1723
1724         while (len > 0) {
1725                 leaf = path.nodes[0];
1726                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1727                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1728                         if (ret > 0)
1729                                 break;
1730                         else if (ret < 0)
1731                                 goto out;
1732                         leaf = path.nodes[0];
1733                 }
1734
1735                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1736                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1737                     key.type != BTRFS_EXTENT_CSUM_KEY)
1738                         break;
1739
1740                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1741                 if (key.offset >= start + len)
1742                         break;
1743
1744                 if (key.offset > start)
1745                         start = key.offset;
1746
1747                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1748                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1749                 if (csum_end > start) {
1750                         size = min(csum_end - start, len);
1751                         len -= size;
1752                         start += size;
1753                         *found += size;
1754                 }
1755
1756                 path.slots[0]++;
1757         }
1758 out:
1759         btrfs_release_path(&path);
1760         if (ret < 0)
1761                 return ret;
1762         return 0;
1763 }
1764
1765 static int process_file_extent(struct btrfs_root *root,
1766                                 struct extent_buffer *eb,
1767                                 int slot, struct btrfs_key *key,
1768                                 struct shared_node *active_node)
1769 {
1770         struct inode_record *rec;
1771         struct btrfs_file_extent_item *fi;
1772         u64 num_bytes = 0;
1773         u64 disk_bytenr = 0;
1774         u64 extent_offset = 0;
1775         u64 mask = root->sectorsize - 1;
1776         int extent_type;
1777         int ret;
1778
1779         rec = active_node->current;
1780         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1781         rec->found_file_extent = 1;
1782
1783         if (rec->extent_start == (u64)-1) {
1784                 rec->extent_start = key->offset;
1785                 rec->extent_end = key->offset;
1786         }
1787
1788         if (rec->extent_end > key->offset)
1789                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1790         else if (rec->extent_end < key->offset) {
1791                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1792                                            key->offset - rec->extent_end);
1793                 if (ret < 0)
1794                         return ret;
1795         }
1796
1797         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1798         extent_type = btrfs_file_extent_type(eb, fi);
1799
1800         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1801                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1802                 if (num_bytes == 0)
1803                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1804                 rec->found_size += num_bytes;
1805                 num_bytes = (num_bytes + mask) & ~mask;
1806         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1807                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1808                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1809                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1810                 extent_offset = btrfs_file_extent_offset(eb, fi);
1811                 if (num_bytes == 0 || (num_bytes & mask))
1812                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1813                 if (num_bytes + extent_offset >
1814                     btrfs_file_extent_ram_bytes(eb, fi))
1815                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1816                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1817                     (btrfs_file_extent_compression(eb, fi) ||
1818                      btrfs_file_extent_encryption(eb, fi) ||
1819                      btrfs_file_extent_other_encoding(eb, fi)))
1820                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1821                 if (disk_bytenr > 0)
1822                         rec->found_size += num_bytes;
1823         } else {
1824                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1825         }
1826         rec->extent_end = key->offset + num_bytes;
1827
1828         /*
1829          * The data reloc tree will copy full extents into its inode and then
1830          * copy the corresponding csums.  Because the extent it copied could be
1831          * a preallocated extent that hasn't been written to yet there may be no
1832          * csums to copy, ergo we won't have csums for our file extent.  This is
1833          * ok so just don't bother checking csums if the inode belongs to the
1834          * data reloc tree.
1835          */
1836         if (disk_bytenr > 0 &&
1837             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1838                 u64 found;
1839                 if (btrfs_file_extent_compression(eb, fi))
1840                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1841                 else
1842                         disk_bytenr += extent_offset;
1843
1844                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1845                 if (ret < 0)
1846                         return ret;
1847                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1848                         if (found > 0)
1849                                 rec->found_csum_item = 1;
1850                         if (found < num_bytes)
1851                                 rec->some_csum_missing = 1;
1852                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1853                         if (found > 0)
1854                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1855                 }
1856         }
1857         return 0;
1858 }
1859
1860 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1861                             struct walk_control *wc)
1862 {
1863         struct btrfs_key key;
1864         u32 nritems;
1865         int i;
1866         int ret = 0;
1867         struct cache_tree *inode_cache;
1868         struct shared_node *active_node;
1869
1870         if (wc->root_level == wc->active_node &&
1871             btrfs_root_refs(&root->root_item) == 0)
1872                 return 0;
1873
1874         active_node = wc->nodes[wc->active_node];
1875         inode_cache = &active_node->inode_cache;
1876         nritems = btrfs_header_nritems(eb);
1877         for (i = 0; i < nritems; i++) {
1878                 btrfs_item_key_to_cpu(eb, &key, i);
1879
1880                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1881                         continue;
1882                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1883                         continue;
1884
1885                 if (active_node->current == NULL ||
1886                     active_node->current->ino < key.objectid) {
1887                         if (active_node->current) {
1888                                 active_node->current->checked = 1;
1889                                 maybe_free_inode_rec(inode_cache,
1890                                                      active_node->current);
1891                         }
1892                         active_node->current = get_inode_rec(inode_cache,
1893                                                              key.objectid, 1);
1894                         BUG_ON(IS_ERR(active_node->current));
1895                 }
1896                 switch (key.type) {
1897                 case BTRFS_DIR_ITEM_KEY:
1898                 case BTRFS_DIR_INDEX_KEY:
1899                         ret = process_dir_item(root, eb, i, &key, active_node);
1900                         break;
1901                 case BTRFS_INODE_REF_KEY:
1902                         ret = process_inode_ref(eb, i, &key, active_node);
1903                         break;
1904                 case BTRFS_INODE_EXTREF_KEY:
1905                         ret = process_inode_extref(eb, i, &key, active_node);
1906                         break;
1907                 case BTRFS_INODE_ITEM_KEY:
1908                         ret = process_inode_item(eb, i, &key, active_node);
1909                         break;
1910                 case BTRFS_EXTENT_DATA_KEY:
1911                         ret = process_file_extent(root, eb, i, &key,
1912                                                   active_node);
1913                         break;
1914                 default:
1915                         break;
1916                 };
1917         }
1918         return ret;
1919 }
1920
1921 static void reada_walk_down(struct btrfs_root *root,
1922                             struct extent_buffer *node, int slot)
1923 {
1924         u64 bytenr;
1925         u64 ptr_gen;
1926         u32 nritems;
1927         u32 blocksize;
1928         int i;
1929         int level;
1930
1931         level = btrfs_header_level(node);
1932         if (level != 1)
1933                 return;
1934
1935         nritems = btrfs_header_nritems(node);
1936         blocksize = root->nodesize;
1937         for (i = slot; i < nritems; i++) {
1938                 bytenr = btrfs_node_blockptr(node, i);
1939                 ptr_gen = btrfs_node_ptr_generation(node, i);
1940                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1941         }
1942 }
1943
1944 /*
1945  * Check the child node/leaf by the following condition:
1946  * 1. the first item key of the node/leaf should be the same with the one
1947  *    in parent.
1948  * 2. block in parent node should match the child node/leaf.
1949  * 3. generation of parent node and child's header should be consistent.
1950  *
1951  * Or the child node/leaf pointed by the key in parent is not valid.
1952  *
1953  * We hope to check leaf owner too, but since subvol may share leaves,
1954  * which makes leaf owner check not so strong, key check should be
1955  * sufficient enough for that case.
1956  */
1957 static int check_child_node(struct btrfs_root *root,
1958                             struct extent_buffer *parent, int slot,
1959                             struct extent_buffer *child)
1960 {
1961         struct btrfs_key parent_key;
1962         struct btrfs_key child_key;
1963         int ret = 0;
1964
1965         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1966         if (btrfs_header_level(child) == 0)
1967                 btrfs_item_key_to_cpu(child, &child_key, 0);
1968         else
1969                 btrfs_node_key_to_cpu(child, &child_key, 0);
1970
1971         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1972                 ret = -EINVAL;
1973                 fprintf(stderr,
1974                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1975                         parent_key.objectid, parent_key.type, parent_key.offset,
1976                         child_key.objectid, child_key.type, child_key.offset);
1977         }
1978         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1979                 ret = -EINVAL;
1980                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1981                         btrfs_node_blockptr(parent, slot),
1982                         btrfs_header_bytenr(child));
1983         }
1984         if (btrfs_node_ptr_generation(parent, slot) !=
1985             btrfs_header_generation(child)) {
1986                 ret = -EINVAL;
1987                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1988                         btrfs_header_generation(child),
1989                         btrfs_node_ptr_generation(parent, slot));
1990         }
1991         return ret;
1992 }
1993
1994 struct node_refs {
1995         u64 bytenr[BTRFS_MAX_LEVEL];
1996         u64 refs[BTRFS_MAX_LEVEL];
1997 };
1998
1999 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2000                           struct walk_control *wc, int *level,
2001                           struct node_refs *nrefs)
2002 {
2003         enum btrfs_tree_block_status status;
2004         u64 bytenr;
2005         u64 ptr_gen;
2006         struct extent_buffer *next;
2007         struct extent_buffer *cur;
2008         u32 blocksize;
2009         int ret, err = 0;
2010         u64 refs;
2011
2012         WARN_ON(*level < 0);
2013         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2014
2015         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2016                 refs = nrefs->refs[*level];
2017                 ret = 0;
2018         } else {
2019                 ret = btrfs_lookup_extent_info(NULL, root,
2020                                        path->nodes[*level]->start,
2021                                        *level, 1, &refs, NULL);
2022                 if (ret < 0) {
2023                         err = ret;
2024                         goto out;
2025                 }
2026                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2027                 nrefs->refs[*level] = refs;
2028         }
2029
2030         if (refs > 1) {
2031                 ret = enter_shared_node(root, path->nodes[*level]->start,
2032                                         refs, wc, *level);
2033                 if (ret > 0) {
2034                         err = ret;
2035                         goto out;
2036                 }
2037         }
2038
2039         while (*level >= 0) {
2040                 WARN_ON(*level < 0);
2041                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2042                 cur = path->nodes[*level];
2043
2044                 if (btrfs_header_level(cur) != *level)
2045                         WARN_ON(1);
2046
2047                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2048                         break;
2049                 if (*level == 0) {
2050                         ret = process_one_leaf(root, cur, wc);
2051                         if (ret < 0)
2052                                 err = ret;
2053                         break;
2054                 }
2055                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2056                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2057                 blocksize = root->nodesize;
2058
2059                 if (bytenr == nrefs->bytenr[*level - 1]) {
2060                         refs = nrefs->refs[*level - 1];
2061                 } else {
2062                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2063                                         *level - 1, 1, &refs, NULL);
2064                         if (ret < 0) {
2065                                 refs = 0;
2066                         } else {
2067                                 nrefs->bytenr[*level - 1] = bytenr;
2068                                 nrefs->refs[*level - 1] = refs;
2069                         }
2070                 }
2071
2072                 if (refs > 1) {
2073                         ret = enter_shared_node(root, bytenr, refs,
2074                                                 wc, *level - 1);
2075                         if (ret > 0) {
2076                                 path->slots[*level]++;
2077                                 continue;
2078                         }
2079                 }
2080
2081                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2082                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2083                         free_extent_buffer(next);
2084                         reada_walk_down(root, cur, path->slots[*level]);
2085                         next = read_tree_block(root, bytenr, blocksize,
2086                                                ptr_gen);
2087                         if (!extent_buffer_uptodate(next)) {
2088                                 struct btrfs_key node_key;
2089
2090                                 btrfs_node_key_to_cpu(path->nodes[*level],
2091                                                       &node_key,
2092                                                       path->slots[*level]);
2093                                 btrfs_add_corrupt_extent_record(root->fs_info,
2094                                                 &node_key,
2095                                                 path->nodes[*level]->start,
2096                                                 root->nodesize, *level);
2097                                 err = -EIO;
2098                                 goto out;
2099                         }
2100                 }
2101
2102                 ret = check_child_node(root, cur, path->slots[*level], next);
2103                 if (ret) {
2104                         err = ret;
2105                         goto out;
2106                 }
2107
2108                 if (btrfs_is_leaf(next))
2109                         status = btrfs_check_leaf(root, NULL, next);
2110                 else
2111                         status = btrfs_check_node(root, NULL, next);
2112                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2113                         free_extent_buffer(next);
2114                         err = -EIO;
2115                         goto out;
2116                 }
2117
2118                 *level = *level - 1;
2119                 free_extent_buffer(path->nodes[*level]);
2120                 path->nodes[*level] = next;
2121                 path->slots[*level] = 0;
2122         }
2123 out:
2124         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2125         return err;
2126 }
2127
2128 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2129                         struct walk_control *wc, int *level)
2130 {
2131         int i;
2132         struct extent_buffer *leaf;
2133
2134         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2135                 leaf = path->nodes[i];
2136                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2137                         path->slots[i]++;
2138                         *level = i;
2139                         return 0;
2140                 } else {
2141                         free_extent_buffer(path->nodes[*level]);
2142                         path->nodes[*level] = NULL;
2143                         BUG_ON(*level > wc->active_node);
2144                         if (*level == wc->active_node)
2145                                 leave_shared_node(root, wc, *level);
2146                         *level = i + 1;
2147                 }
2148         }
2149         return 1;
2150 }
2151
2152 static int check_root_dir(struct inode_record *rec)
2153 {
2154         struct inode_backref *backref;
2155         int ret = -1;
2156
2157         if (!rec->found_inode_item || rec->errors)
2158                 goto out;
2159         if (rec->nlink != 1 || rec->found_link != 0)
2160                 goto out;
2161         if (list_empty(&rec->backrefs))
2162                 goto out;
2163         backref = to_inode_backref(rec->backrefs.next);
2164         if (!backref->found_inode_ref)
2165                 goto out;
2166         if (backref->index != 0 || backref->namelen != 2 ||
2167             memcmp(backref->name, "..", 2))
2168                 goto out;
2169         if (backref->found_dir_index || backref->found_dir_item)
2170                 goto out;
2171         ret = 0;
2172 out:
2173         return ret;
2174 }
2175
2176 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2177                               struct btrfs_root *root, struct btrfs_path *path,
2178                               struct inode_record *rec)
2179 {
2180         struct btrfs_inode_item *ei;
2181         struct btrfs_key key;
2182         int ret;
2183
2184         key.objectid = rec->ino;
2185         key.type = BTRFS_INODE_ITEM_KEY;
2186         key.offset = (u64)-1;
2187
2188         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2189         if (ret < 0)
2190                 goto out;
2191         if (ret) {
2192                 if (!path->slots[0]) {
2193                         ret = -ENOENT;
2194                         goto out;
2195                 }
2196                 path->slots[0]--;
2197                 ret = 0;
2198         }
2199         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2200         if (key.objectid != rec->ino) {
2201                 ret = -ENOENT;
2202                 goto out;
2203         }
2204
2205         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2206                             struct btrfs_inode_item);
2207         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2208         btrfs_mark_buffer_dirty(path->nodes[0]);
2209         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2210         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2211                root->root_key.objectid);
2212 out:
2213         btrfs_release_path(path);
2214         return ret;
2215 }
2216
2217 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2218                                     struct btrfs_root *root,
2219                                     struct btrfs_path *path,
2220                                     struct inode_record *rec)
2221 {
2222         int ret;
2223
2224         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2225         btrfs_release_path(path);
2226         if (!ret)
2227                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2228         return ret;
2229 }
2230
2231 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2232                                struct btrfs_root *root,
2233                                struct btrfs_path *path,
2234                                struct inode_record *rec)
2235 {
2236         struct btrfs_inode_item *ei;
2237         struct btrfs_key key;
2238         int ret = 0;
2239
2240         key.objectid = rec->ino;
2241         key.type = BTRFS_INODE_ITEM_KEY;
2242         key.offset = 0;
2243
2244         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2245         if (ret) {
2246                 if (ret > 0)
2247                         ret = -ENOENT;
2248                 goto out;
2249         }
2250
2251         /* Since ret == 0, no need to check anything */
2252         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2253                             struct btrfs_inode_item);
2254         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2255         btrfs_mark_buffer_dirty(path->nodes[0]);
2256         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2257         printf("reset nbytes for ino %llu root %llu\n",
2258                rec->ino, root->root_key.objectid);
2259 out:
2260         btrfs_release_path(path);
2261         return ret;
2262 }
2263
2264 static int add_missing_dir_index(struct btrfs_root *root,
2265                                  struct cache_tree *inode_cache,
2266                                  struct inode_record *rec,
2267                                  struct inode_backref *backref)
2268 {
2269         struct btrfs_path *path;
2270         struct btrfs_trans_handle *trans;
2271         struct btrfs_dir_item *dir_item;
2272         struct extent_buffer *leaf;
2273         struct btrfs_key key;
2274         struct btrfs_disk_key disk_key;
2275         struct inode_record *dir_rec;
2276         unsigned long name_ptr;
2277         u32 data_size = sizeof(*dir_item) + backref->namelen;
2278         int ret;
2279
2280         path = btrfs_alloc_path();
2281         if (!path)
2282                 return -ENOMEM;
2283
2284         trans = btrfs_start_transaction(root, 1);
2285         if (IS_ERR(trans)) {
2286                 btrfs_free_path(path);
2287                 return PTR_ERR(trans);
2288         }
2289
2290         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2291                 (unsigned long long)rec->ino);
2292         key.objectid = backref->dir;
2293         key.type = BTRFS_DIR_INDEX_KEY;
2294         key.offset = backref->index;
2295
2296         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2297         BUG_ON(ret);
2298
2299         leaf = path->nodes[0];
2300         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2301
2302         disk_key.objectid = cpu_to_le64(rec->ino);
2303         disk_key.type = BTRFS_INODE_ITEM_KEY;
2304         disk_key.offset = 0;
2305
2306         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2307         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2308         btrfs_set_dir_data_len(leaf, dir_item, 0);
2309         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2310         name_ptr = (unsigned long)(dir_item + 1);
2311         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2312         btrfs_mark_buffer_dirty(leaf);
2313         btrfs_free_path(path);
2314         btrfs_commit_transaction(trans, root);
2315
2316         backref->found_dir_index = 1;
2317         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2318         BUG_ON(IS_ERR(dir_rec));
2319         if (!dir_rec)
2320                 return 0;
2321         dir_rec->found_size += backref->namelen;
2322         if (dir_rec->found_size == dir_rec->isize &&
2323             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2324                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2325         if (dir_rec->found_size != dir_rec->isize)
2326                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2327
2328         return 0;
2329 }
2330
2331 static int delete_dir_index(struct btrfs_root *root,
2332                             struct cache_tree *inode_cache,
2333                             struct inode_record *rec,
2334                             struct inode_backref *backref)
2335 {
2336         struct btrfs_trans_handle *trans;
2337         struct btrfs_dir_item *di;
2338         struct btrfs_path *path;
2339         int ret = 0;
2340
2341         path = btrfs_alloc_path();
2342         if (!path)
2343                 return -ENOMEM;
2344
2345         trans = btrfs_start_transaction(root, 1);
2346         if (IS_ERR(trans)) {
2347                 btrfs_free_path(path);
2348                 return PTR_ERR(trans);
2349         }
2350
2351
2352         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2353                 (unsigned long long)backref->dir,
2354                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2355                 (unsigned long long)root->objectid);
2356
2357         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2358                                     backref->name, backref->namelen,
2359                                     backref->index, -1);
2360         if (IS_ERR(di)) {
2361                 ret = PTR_ERR(di);
2362                 btrfs_free_path(path);
2363                 btrfs_commit_transaction(trans, root);
2364                 if (ret == -ENOENT)
2365                         return 0;
2366                 return ret;
2367         }
2368
2369         if (!di)
2370                 ret = btrfs_del_item(trans, root, path);
2371         else
2372                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2373         BUG_ON(ret);
2374         btrfs_free_path(path);
2375         btrfs_commit_transaction(trans, root);
2376         return ret;
2377 }
2378
2379 static int create_inode_item(struct btrfs_root *root,
2380                              struct inode_record *rec,
2381                              struct inode_backref *backref, int root_dir)
2382 {
2383         struct btrfs_trans_handle *trans;
2384         struct btrfs_inode_item inode_item;
2385         time_t now = time(NULL);
2386         int ret;
2387
2388         trans = btrfs_start_transaction(root, 1);
2389         if (IS_ERR(trans)) {
2390                 ret = PTR_ERR(trans);
2391                 return ret;
2392         }
2393
2394         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2395                 "be incomplete, please check permissions and content after "
2396                 "the fsck completes.\n", (unsigned long long)root->objectid,
2397                 (unsigned long long)rec->ino);
2398
2399         memset(&inode_item, 0, sizeof(inode_item));
2400         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2401         if (root_dir)
2402                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2403         else
2404                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2405         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2406         if (rec->found_dir_item) {
2407                 if (rec->found_file_extent)
2408                         fprintf(stderr, "root %llu inode %llu has both a dir "
2409                                 "item and extents, unsure if it is a dir or a "
2410                                 "regular file so setting it as a directory\n",
2411                                 (unsigned long long)root->objectid,
2412                                 (unsigned long long)rec->ino);
2413                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2414                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2415         } else if (!rec->found_dir_item) {
2416                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2417                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2418         }
2419         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2420         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2421         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2422         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2423         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2424         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2425         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2426         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2427
2428         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2429         BUG_ON(ret);
2430         btrfs_commit_transaction(trans, root);
2431         return 0;
2432 }
2433
2434 static int repair_inode_backrefs(struct btrfs_root *root,
2435                                  struct inode_record *rec,
2436                                  struct cache_tree *inode_cache,
2437                                  int delete)
2438 {
2439         struct inode_backref *tmp, *backref;
2440         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2441         int ret = 0;
2442         int repaired = 0;
2443
2444         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2445                 if (!delete && rec->ino == root_dirid) {
2446                         if (!rec->found_inode_item) {
2447                                 ret = create_inode_item(root, rec, backref, 1);
2448                                 if (ret)
2449                                         break;
2450                                 repaired++;
2451                         }
2452                 }
2453
2454                 /* Index 0 for root dir's are special, don't mess with it */
2455                 if (rec->ino == root_dirid && backref->index == 0)
2456                         continue;
2457
2458                 if (delete &&
2459                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2460                      (backref->found_dir_index && backref->found_inode_ref &&
2461                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2462                         ret = delete_dir_index(root, inode_cache, rec, backref);
2463                         if (ret)
2464                                 break;
2465                         repaired++;
2466                         list_del(&backref->list);
2467                         free(backref);
2468                 }
2469
2470                 if (!delete && !backref->found_dir_index &&
2471                     backref->found_dir_item && backref->found_inode_ref) {
2472                         ret = add_missing_dir_index(root, inode_cache, rec,
2473                                                     backref);
2474                         if (ret)
2475                                 break;
2476                         repaired++;
2477                         if (backref->found_dir_item &&
2478                             backref->found_dir_index &&
2479                             backref->found_dir_index) {
2480                                 if (!backref->errors &&
2481                                     backref->found_inode_ref) {
2482                                         list_del(&backref->list);
2483                                         free(backref);
2484                                 }
2485                         }
2486                 }
2487
2488                 if (!delete && (!backref->found_dir_index &&
2489                                 !backref->found_dir_item &&
2490                                 backref->found_inode_ref)) {
2491                         struct btrfs_trans_handle *trans;
2492                         struct btrfs_key location;
2493
2494                         ret = check_dir_conflict(root, backref->name,
2495                                                  backref->namelen,
2496                                                  backref->dir,
2497                                                  backref->index);
2498                         if (ret) {
2499                                 /*
2500                                  * let nlink fixing routine to handle it,
2501                                  * which can do it better.
2502                                  */
2503                                 ret = 0;
2504                                 break;
2505                         }
2506                         location.objectid = rec->ino;
2507                         location.type = BTRFS_INODE_ITEM_KEY;
2508                         location.offset = 0;
2509
2510                         trans = btrfs_start_transaction(root, 1);
2511                         if (IS_ERR(trans)) {
2512                                 ret = PTR_ERR(trans);
2513                                 break;
2514                         }
2515                         fprintf(stderr, "adding missing dir index/item pair "
2516                                 "for inode %llu\n",
2517                                 (unsigned long long)rec->ino);
2518                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2519                                                     backref->namelen,
2520                                                     backref->dir, &location,
2521                                                     imode_to_type(rec->imode),
2522                                                     backref->index);
2523                         BUG_ON(ret);
2524                         btrfs_commit_transaction(trans, root);
2525                         repaired++;
2526                 }
2527
2528                 if (!delete && (backref->found_inode_ref &&
2529                                 backref->found_dir_index &&
2530                                 backref->found_dir_item &&
2531                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2532                                 !rec->found_inode_item)) {
2533                         ret = create_inode_item(root, rec, backref, 0);
2534                         if (ret)
2535                                 break;
2536                         repaired++;
2537                 }
2538
2539         }
2540         return ret ? ret : repaired;
2541 }
2542
2543 /*
2544  * To determine the file type for nlink/inode_item repair
2545  *
2546  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2547  * Return -ENOENT if file type is not found.
2548  */
2549 static int find_file_type(struct inode_record *rec, u8 *type)
2550 {
2551         struct inode_backref *backref;
2552
2553         /* For inode item recovered case */
2554         if (rec->found_inode_item) {
2555                 *type = imode_to_type(rec->imode);
2556                 return 0;
2557         }
2558
2559         list_for_each_entry(backref, &rec->backrefs, list) {
2560                 if (backref->found_dir_index || backref->found_dir_item) {
2561                         *type = backref->filetype;
2562                         return 0;
2563                 }
2564         }
2565         return -ENOENT;
2566 }
2567
2568 /*
2569  * To determine the file name for nlink repair
2570  *
2571  * Return 0 if file name is found, set name and namelen.
2572  * Return -ENOENT if file name is not found.
2573  */
2574 static int find_file_name(struct inode_record *rec,
2575                           char *name, int *namelen)
2576 {
2577         struct inode_backref *backref;
2578
2579         list_for_each_entry(backref, &rec->backrefs, list) {
2580                 if (backref->found_dir_index || backref->found_dir_item ||
2581                     backref->found_inode_ref) {
2582                         memcpy(name, backref->name, backref->namelen);
2583                         *namelen = backref->namelen;
2584                         return 0;
2585                 }
2586         }
2587         return -ENOENT;
2588 }
2589
2590 /* Reset the nlink of the inode to the correct one */
2591 static int reset_nlink(struct btrfs_trans_handle *trans,
2592                        struct btrfs_root *root,
2593                        struct btrfs_path *path,
2594                        struct inode_record *rec)
2595 {
2596         struct inode_backref *backref;
2597         struct inode_backref *tmp;
2598         struct btrfs_key key;
2599         struct btrfs_inode_item *inode_item;
2600         int ret = 0;
2601
2602         /* We don't believe this either, reset it and iterate backref */
2603         rec->found_link = 0;
2604
2605         /* Remove all backref including the valid ones */
2606         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2607                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2608                                    backref->index, backref->name,
2609                                    backref->namelen, 0);
2610                 if (ret < 0)
2611                         goto out;
2612
2613                 /* remove invalid backref, so it won't be added back */
2614                 if (!(backref->found_dir_index &&
2615                       backref->found_dir_item &&
2616                       backref->found_inode_ref)) {
2617                         list_del(&backref->list);
2618                         free(backref);
2619                 } else {
2620                         rec->found_link++;
2621                 }
2622         }
2623
2624         /* Set nlink to 0 */
2625         key.objectid = rec->ino;
2626         key.type = BTRFS_INODE_ITEM_KEY;
2627         key.offset = 0;
2628         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2629         if (ret < 0)
2630                 goto out;
2631         if (ret > 0) {
2632                 ret = -ENOENT;
2633                 goto out;
2634         }
2635         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2636                                     struct btrfs_inode_item);
2637         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2638         btrfs_mark_buffer_dirty(path->nodes[0]);
2639         btrfs_release_path(path);
2640
2641         /*
2642          * Add back valid inode_ref/dir_item/dir_index,
2643          * add_link() will handle the nlink inc, so new nlink must be correct
2644          */
2645         list_for_each_entry(backref, &rec->backrefs, list) {
2646                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2647                                      backref->name, backref->namelen,
2648                                      backref->filetype, &backref->index, 1);
2649                 if (ret < 0)
2650                         goto out;
2651         }
2652 out:
2653         btrfs_release_path(path);
2654         return ret;
2655 }
2656
2657 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2658                                struct btrfs_root *root,
2659                                struct btrfs_path *path,
2660                                struct inode_record *rec)
2661 {
2662         char *dir_name = "lost+found";
2663         char namebuf[BTRFS_NAME_LEN] = {0};
2664         u64 lost_found_ino;
2665         u32 mode = 0700;
2666         u8 type = 0;
2667         int namelen = 0;
2668         int name_recovered = 0;
2669         int type_recovered = 0;
2670         int ret = 0;
2671
2672         /*
2673          * Get file name and type first before these invalid inode ref
2674          * are deleted by remove_all_invalid_backref()
2675          */
2676         name_recovered = !find_file_name(rec, namebuf, &namelen);
2677         type_recovered = !find_file_type(rec, &type);
2678
2679         if (!name_recovered) {
2680                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2681                        rec->ino, rec->ino);
2682                 namelen = count_digits(rec->ino);
2683                 sprintf(namebuf, "%llu", rec->ino);
2684                 name_recovered = 1;
2685         }
2686         if (!type_recovered) {
2687                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2688                        rec->ino);
2689                 type = BTRFS_FT_REG_FILE;
2690                 type_recovered = 1;
2691         }
2692
2693         ret = reset_nlink(trans, root, path, rec);
2694         if (ret < 0) {
2695                 fprintf(stderr,
2696                         "Failed to reset nlink for inode %llu: %s\n",
2697                         rec->ino, strerror(-ret));
2698                 goto out;
2699         }
2700
2701         if (rec->found_link == 0) {
2702                 lost_found_ino = root->highest_inode;
2703                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2704                         ret = -EOVERFLOW;
2705                         goto out;
2706                 }
2707                 lost_found_ino++;
2708                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2709                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2710                                   mode);
2711                 if (ret < 0) {
2712                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2713                                 dir_name, strerror(-ret));
2714                         goto out;
2715                 }
2716                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2717                                      namebuf, namelen, type, NULL, 1);
2718                 /*
2719                  * Add ".INO" suffix several times to handle case where
2720                  * "FILENAME.INO" is already taken by another file.
2721                  */
2722                 while (ret == -EEXIST) {
2723                         /*
2724                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2725                          */
2726                         if (namelen + count_digits(rec->ino) + 1 >
2727                             BTRFS_NAME_LEN) {
2728                                 ret = -EFBIG;
2729                                 goto out;
2730                         }
2731                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2732                                  ".%llu", rec->ino);
2733                         namelen += count_digits(rec->ino) + 1;
2734                         ret = btrfs_add_link(trans, root, rec->ino,
2735                                              lost_found_ino, namebuf,
2736                                              namelen, type, NULL, 1);
2737                 }
2738                 if (ret < 0) {
2739                         fprintf(stderr,
2740                                 "Failed to link the inode %llu to %s dir: %s\n",
2741                                 rec->ino, dir_name, strerror(-ret));
2742                         goto out;
2743                 }
2744                 /*
2745                  * Just increase the found_link, don't actually add the
2746                  * backref. This will make things easier and this inode
2747                  * record will be freed after the repair is done.
2748                  * So fsck will not report problem about this inode.
2749                  */
2750                 rec->found_link++;
2751                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2752                        namelen, namebuf, dir_name);
2753         }
2754         printf("Fixed the nlink of inode %llu\n", rec->ino);
2755 out:
2756         /*
2757          * Clear the flag anyway, or we will loop forever for the same inode
2758          * as it will not be removed from the bad inode list and the dead loop
2759          * happens.
2760          */
2761         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2762         btrfs_release_path(path);
2763         return ret;
2764 }
2765
2766 /*
2767  * Check if there is any normal(reg or prealloc) file extent for given
2768  * ino.
2769  * This is used to determine the file type when neither its dir_index/item or
2770  * inode_item exists.
2771  *
2772  * This will *NOT* report error, if any error happens, just consider it does
2773  * not have any normal file extent.
2774  */
2775 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2776 {
2777         struct btrfs_path *path;
2778         struct btrfs_key key;
2779         struct btrfs_key found_key;
2780         struct btrfs_file_extent_item *fi;
2781         u8 type;
2782         int ret = 0;
2783
2784         path = btrfs_alloc_path();
2785         if (!path)
2786                 goto out;
2787         key.objectid = ino;
2788         key.type = BTRFS_EXTENT_DATA_KEY;
2789         key.offset = 0;
2790
2791         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2792         if (ret < 0) {
2793                 ret = 0;
2794                 goto out;
2795         }
2796         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2797                 ret = btrfs_next_leaf(root, path);
2798                 if (ret) {
2799                         ret = 0;
2800                         goto out;
2801                 }
2802         }
2803         while (1) {
2804                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2805                                       path->slots[0]);
2806                 if (found_key.objectid != ino ||
2807                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2808                         break;
2809                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2810                                     struct btrfs_file_extent_item);
2811                 type = btrfs_file_extent_type(path->nodes[0], fi);
2812                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2813                         ret = 1;
2814                         goto out;
2815                 }
2816         }
2817 out:
2818         btrfs_free_path(path);
2819         return ret;
2820 }
2821
2822 static u32 btrfs_type_to_imode(u8 type)
2823 {
2824         static u32 imode_by_btrfs_type[] = {
2825                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2826                 [BTRFS_FT_DIR]          = S_IFDIR,
2827                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2828                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2829                 [BTRFS_FT_FIFO]         = S_IFIFO,
2830                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2831                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2832         };
2833
2834         return imode_by_btrfs_type[(type)];
2835 }
2836
2837 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2838                                 struct btrfs_root *root,
2839                                 struct btrfs_path *path,
2840                                 struct inode_record *rec)
2841 {
2842         u8 filetype;
2843         u32 mode = 0700;
2844         int type_recovered = 0;
2845         int ret = 0;
2846
2847         printf("Trying to rebuild inode:%llu\n", rec->ino);
2848
2849         type_recovered = !find_file_type(rec, &filetype);
2850
2851         /*
2852          * Try to determine inode type if type not found.
2853          *
2854          * For found regular file extent, it must be FILE.
2855          * For found dir_item/index, it must be DIR.
2856          *
2857          * For undetermined one, use FILE as fallback.
2858          *
2859          * TODO:
2860          * 1. If found backref(inode_index/item is already handled) to it,
2861          *    it must be DIR.
2862          *    Need new inode-inode ref structure to allow search for that.
2863          */
2864         if (!type_recovered) {
2865                 if (rec->found_file_extent &&
2866                     find_normal_file_extent(root, rec->ino)) {
2867                         type_recovered = 1;
2868                         filetype = BTRFS_FT_REG_FILE;
2869                 } else if (rec->found_dir_item) {
2870                         type_recovered = 1;
2871                         filetype = BTRFS_FT_DIR;
2872                 } else if (!list_empty(&rec->orphan_extents)) {
2873                         type_recovered = 1;
2874                         filetype = BTRFS_FT_REG_FILE;
2875                 } else{
2876                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2877                                rec->ino);
2878                         type_recovered = 1;
2879                         filetype = BTRFS_FT_REG_FILE;
2880                 }
2881         }
2882
2883         ret = btrfs_new_inode(trans, root, rec->ino,
2884                               mode | btrfs_type_to_imode(filetype));
2885         if (ret < 0)
2886                 goto out;
2887
2888         /*
2889          * Here inode rebuild is done, we only rebuild the inode item,
2890          * don't repair the nlink(like move to lost+found).
2891          * That is the job of nlink repair.
2892          *
2893          * We just fill the record and return
2894          */
2895         rec->found_dir_item = 1;
2896         rec->imode = mode | btrfs_type_to_imode(filetype);
2897         rec->nlink = 0;
2898         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2899         /* Ensure the inode_nlinks repair function will be called */
2900         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2901 out:
2902         return ret;
2903 }
2904
2905 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2906                                       struct btrfs_root *root,
2907                                       struct btrfs_path *path,
2908                                       struct inode_record *rec)
2909 {
2910         struct orphan_data_extent *orphan;
2911         struct orphan_data_extent *tmp;
2912         int ret = 0;
2913
2914         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2915                 /*
2916                  * Check for conflicting file extents
2917                  *
2918                  * Here we don't know whether the extents is compressed or not,
2919                  * so we can only assume it not compressed nor data offset,
2920                  * and use its disk_len as extent length.
2921                  */
2922                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2923                                        orphan->offset, orphan->disk_len, 0);
2924                 btrfs_release_path(path);
2925                 if (ret < 0)
2926                         goto out;
2927                 if (!ret) {
2928                         fprintf(stderr,
2929                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2930                                 orphan->disk_bytenr, orphan->disk_len);
2931                         ret = btrfs_free_extent(trans,
2932                                         root->fs_info->extent_root,
2933                                         orphan->disk_bytenr, orphan->disk_len,
2934                                         0, root->objectid, orphan->objectid,
2935                                         orphan->offset);
2936                         if (ret < 0)
2937                                 goto out;
2938                 }
2939                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2940                                 orphan->offset, orphan->disk_bytenr,
2941                                 orphan->disk_len, orphan->disk_len);
2942                 if (ret < 0)
2943                         goto out;
2944
2945                 /* Update file size info */
2946                 rec->found_size += orphan->disk_len;
2947                 if (rec->found_size == rec->nbytes)
2948                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2949
2950                 /* Update the file extent hole info too */
2951                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2952                                            orphan->disk_len);
2953                 if (ret < 0)
2954                         goto out;
2955                 if (RB_EMPTY_ROOT(&rec->holes))
2956                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2957
2958                 list_del(&orphan->list);
2959                 free(orphan);
2960         }
2961         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2962 out:
2963         return ret;
2964 }
2965
2966 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2967                                         struct btrfs_root *root,
2968                                         struct btrfs_path *path,
2969                                         struct inode_record *rec)
2970 {
2971         struct rb_node *node;
2972         struct file_extent_hole *hole;
2973         int found = 0;
2974         int ret = 0;
2975
2976         node = rb_first(&rec->holes);
2977
2978         while (node) {
2979                 found = 1;
2980                 hole = rb_entry(node, struct file_extent_hole, node);
2981                 ret = btrfs_punch_hole(trans, root, rec->ino,
2982                                        hole->start, hole->len);
2983                 if (ret < 0)
2984                         goto out;
2985                 ret = del_file_extent_hole(&rec->holes, hole->start,
2986                                            hole->len);
2987                 if (ret < 0)
2988                         goto out;
2989                 if (RB_EMPTY_ROOT(&rec->holes))
2990                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2991                 node = rb_first(&rec->holes);
2992         }
2993         /* special case for a file losing all its file extent */
2994         if (!found) {
2995                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2996                                        round_up(rec->isize, root->sectorsize));
2997                 if (ret < 0)
2998                         goto out;
2999         }
3000         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3001                rec->ino, root->objectid);
3002 out:
3003         return ret;
3004 }
3005
3006 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3007 {
3008         struct btrfs_trans_handle *trans;
3009         struct btrfs_path *path;
3010         int ret = 0;
3011
3012         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3013                              I_ERR_NO_ORPHAN_ITEM |
3014                              I_ERR_LINK_COUNT_WRONG |
3015                              I_ERR_NO_INODE_ITEM |
3016                              I_ERR_FILE_EXTENT_ORPHAN |
3017                              I_ERR_FILE_EXTENT_DISCOUNT|
3018                              I_ERR_FILE_NBYTES_WRONG)))
3019                 return rec->errors;
3020
3021         path = btrfs_alloc_path();
3022         if (!path)
3023                 return -ENOMEM;
3024
3025         /*
3026          * For nlink repair, it may create a dir and add link, so
3027          * 2 for parent(256)'s dir_index and dir_item
3028          * 2 for lost+found dir's inode_item and inode_ref
3029          * 1 for the new inode_ref of the file
3030          * 2 for lost+found dir's dir_index and dir_item for the file
3031          */
3032         trans = btrfs_start_transaction(root, 7);
3033         if (IS_ERR(trans)) {
3034                 btrfs_free_path(path);
3035                 return PTR_ERR(trans);
3036         }
3037
3038         if (rec->errors & I_ERR_NO_INODE_ITEM)
3039                 ret = repair_inode_no_item(trans, root, path, rec);
3040         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3041                 ret = repair_inode_orphan_extent(trans, root, path, rec);
3042         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3043                 ret = repair_inode_discount_extent(trans, root, path, rec);
3044         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3045                 ret = repair_inode_isize(trans, root, path, rec);
3046         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3047                 ret = repair_inode_orphan_item(trans, root, path, rec);
3048         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3049                 ret = repair_inode_nlinks(trans, root, path, rec);
3050         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3051                 ret = repair_inode_nbytes(trans, root, path, rec);
3052         btrfs_commit_transaction(trans, root);
3053         btrfs_free_path(path);
3054         return ret;
3055 }
3056
3057 static int check_inode_recs(struct btrfs_root *root,
3058                             struct cache_tree *inode_cache)
3059 {
3060         struct cache_extent *cache;
3061         struct ptr_node *node;
3062         struct inode_record *rec;
3063         struct inode_backref *backref;
3064         int stage = 0;
3065         int ret = 0;
3066         int err = 0;
3067         u64 error = 0;
3068         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3069
3070         if (btrfs_root_refs(&root->root_item) == 0) {
3071                 if (!cache_tree_empty(inode_cache))
3072                         fprintf(stderr, "warning line %d\n", __LINE__);
3073                 return 0;
3074         }
3075
3076         /*
3077          * We need to record the highest inode number for later 'lost+found'
3078          * dir creation.
3079          * We must select an ino not used/referred by any existing inode, or
3080          * 'lost+found' ino may be a missing ino in a corrupted leaf,
3081          * this may cause 'lost+found' dir has wrong nlinks.
3082          */
3083         cache = last_cache_extent(inode_cache);
3084         if (cache) {
3085                 node = container_of(cache, struct ptr_node, cache);
3086                 rec = node->data;
3087                 if (rec->ino > root->highest_inode)
3088                         root->highest_inode = rec->ino;
3089         }
3090
3091         /*
3092          * We need to repair backrefs first because we could change some of the
3093          * errors in the inode recs.
3094          *
3095          * We also need to go through and delete invalid backrefs first and then
3096          * add the correct ones second.  We do this because we may get EEXIST
3097          * when adding back the correct index because we hadn't yet deleted the
3098          * invalid index.
3099          *
3100          * For example, if we were missing a dir index then the directories
3101          * isize would be wrong, so if we fixed the isize to what we thought it
3102          * would be and then fixed the backref we'd still have a invalid fs, so
3103          * we need to add back the dir index and then check to see if the isize
3104          * is still wrong.
3105          */
3106         while (stage < 3) {
3107                 stage++;
3108                 if (stage == 3 && !err)
3109                         break;
3110
3111                 cache = search_cache_extent(inode_cache, 0);
3112                 while (repair && cache) {
3113                         node = container_of(cache, struct ptr_node, cache);
3114                         rec = node->data;
3115                         cache = next_cache_extent(cache);
3116
3117                         /* Need to free everything up and rescan */
3118                         if (stage == 3) {
3119                                 remove_cache_extent(inode_cache, &node->cache);
3120                                 free(node);
3121                                 free_inode_rec(rec);
3122                                 continue;
3123                         }
3124
3125                         if (list_empty(&rec->backrefs))
3126                                 continue;
3127
3128                         ret = repair_inode_backrefs(root, rec, inode_cache,
3129                                                     stage == 1);
3130                         if (ret < 0) {
3131                                 err = ret;
3132                                 stage = 2;
3133                                 break;
3134                         } if (ret > 0) {
3135                                 err = -EAGAIN;
3136                         }
3137                 }
3138         }
3139         if (err)
3140                 return err;
3141
3142         rec = get_inode_rec(inode_cache, root_dirid, 0);
3143         BUG_ON(IS_ERR(rec));
3144         if (rec) {
3145                 ret = check_root_dir(rec);
3146                 if (ret) {
3147                         fprintf(stderr, "root %llu root dir %llu error\n",
3148                                 (unsigned long long)root->root_key.objectid,
3149                                 (unsigned long long)root_dirid);
3150                         print_inode_error(root, rec);
3151                         error++;
3152                 }
3153         } else {
3154                 if (repair) {
3155                         struct btrfs_trans_handle *trans;
3156
3157                         trans = btrfs_start_transaction(root, 1);
3158                         if (IS_ERR(trans)) {
3159                                 err = PTR_ERR(trans);
3160                                 return err;
3161                         }
3162
3163                         fprintf(stderr,
3164                                 "root %llu missing its root dir, recreating\n",
3165                                 (unsigned long long)root->objectid);
3166
3167                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3168                         BUG_ON(ret);
3169
3170                         btrfs_commit_transaction(trans, root);
3171                         return -EAGAIN;
3172                 }
3173
3174                 fprintf(stderr, "root %llu root dir %llu not found\n",
3175                         (unsigned long long)root->root_key.objectid,
3176                         (unsigned long long)root_dirid);
3177         }
3178
3179         while (1) {
3180                 cache = search_cache_extent(inode_cache, 0);
3181                 if (!cache)
3182                         break;
3183                 node = container_of(cache, struct ptr_node, cache);
3184                 rec = node->data;
3185                 remove_cache_extent(inode_cache, &node->cache);
3186                 free(node);
3187                 if (rec->ino == root_dirid ||
3188                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3189                         free_inode_rec(rec);
3190                         continue;
3191                 }
3192
3193                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3194                         ret = check_orphan_item(root, rec->ino);
3195                         if (ret == 0)
3196                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3197                         if (can_free_inode_rec(rec)) {
3198                                 free_inode_rec(rec);
3199                                 continue;
3200                         }
3201                 }
3202
3203                 if (!rec->found_inode_item)
3204                         rec->errors |= I_ERR_NO_INODE_ITEM;
3205                 if (rec->found_link != rec->nlink)
3206                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3207                 if (repair) {
3208                         ret = try_repair_inode(root, rec);
3209                         if (ret == 0 && can_free_inode_rec(rec)) {
3210                                 free_inode_rec(rec);
3211                                 continue;
3212                         }
3213                         ret = 0;
3214                 }
3215
3216                 if (!(repair && ret == 0))
3217                         error++;
3218                 print_inode_error(root, rec);
3219                 list_for_each_entry(backref, &rec->backrefs, list) {
3220                         if (!backref->found_dir_item)
3221                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3222                         if (!backref->found_dir_index)
3223                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3224                         if (!backref->found_inode_ref)
3225                                 backref->errors |= REF_ERR_NO_INODE_REF;
3226                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3227                                 " namelen %u name %s filetype %d errors %x",
3228                                 (unsigned long long)backref->dir,
3229                                 (unsigned long long)backref->index,
3230                                 backref->namelen, backref->name,
3231                                 backref->filetype, backref->errors);
3232                         print_ref_error(backref->errors);
3233                 }
3234                 free_inode_rec(rec);
3235         }
3236         return (error > 0) ? -1 : 0;
3237 }
3238
3239 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3240                                         u64 objectid)
3241 {
3242         struct cache_extent *cache;
3243         struct root_record *rec = NULL;
3244         int ret;
3245
3246         cache = lookup_cache_extent(root_cache, objectid, 1);
3247         if (cache) {
3248                 rec = container_of(cache, struct root_record, cache);
3249         } else {
3250                 rec = calloc(1, sizeof(*rec));
3251                 if (!rec)
3252                         return ERR_PTR(-ENOMEM);
3253                 rec->objectid = objectid;
3254                 INIT_LIST_HEAD(&rec->backrefs);
3255                 rec->cache.start = objectid;
3256                 rec->cache.size = 1;
3257
3258                 ret = insert_cache_extent(root_cache, &rec->cache);
3259                 if (ret)
3260                         return ERR_PTR(-EEXIST);
3261         }
3262         return rec;
3263 }
3264
3265 static struct root_backref *get_root_backref(struct root_record *rec,
3266                                              u64 ref_root, u64 dir, u64 index,
3267                                              const char *name, int namelen)
3268 {
3269         struct root_backref *backref;
3270
3271         list_for_each_entry(backref, &rec->backrefs, list) {
3272                 if (backref->ref_root != ref_root || backref->dir != dir ||
3273                     backref->namelen != namelen)
3274                         continue;
3275                 if (memcmp(name, backref->name, namelen))
3276                         continue;
3277                 return backref;
3278         }
3279
3280         backref = calloc(1, sizeof(*backref) + namelen + 1);
3281         if (!backref)
3282                 return NULL;
3283         backref->ref_root = ref_root;
3284         backref->dir = dir;
3285         backref->index = index;
3286         backref->namelen = namelen;
3287         memcpy(backref->name, name, namelen);
3288         backref->name[namelen] = '\0';
3289         list_add_tail(&backref->list, &rec->backrefs);
3290         return backref;
3291 }
3292
3293 static void free_root_record(struct cache_extent *cache)
3294 {
3295         struct root_record *rec;
3296         struct root_backref *backref;
3297
3298         rec = container_of(cache, struct root_record, cache);
3299         while (!list_empty(&rec->backrefs)) {
3300                 backref = to_root_backref(rec->backrefs.next);
3301                 list_del(&backref->list);
3302                 free(backref);
3303         }
3304
3305         kfree(rec);
3306 }
3307
3308 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3309
3310 static int add_root_backref(struct cache_tree *root_cache,
3311                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3312                             const char *name, int namelen,
3313                             int item_type, int errors)
3314 {
3315         struct root_record *rec;
3316         struct root_backref *backref;
3317
3318         rec = get_root_rec(root_cache, root_id);
3319         BUG_ON(IS_ERR(rec));
3320         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3321         BUG_ON(!backref);
3322
3323         backref->errors |= errors;
3324
3325         if (item_type != BTRFS_DIR_ITEM_KEY) {
3326                 if (backref->found_dir_index || backref->found_back_ref ||
3327                     backref->found_forward_ref) {
3328                         if (backref->index != index)
3329                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3330                 } else {
3331                         backref->index = index;
3332                 }
3333         }
3334
3335         if (item_type == BTRFS_DIR_ITEM_KEY) {
3336                 if (backref->found_forward_ref)
3337                         rec->found_ref++;
3338                 backref->found_dir_item = 1;
3339         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3340                 backref->found_dir_index = 1;
3341         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3342                 if (backref->found_forward_ref)
3343                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3344                 else if (backref->found_dir_item)
3345                         rec->found_ref++;
3346                 backref->found_forward_ref = 1;
3347         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3348                 if (backref->found_back_ref)
3349                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3350                 backref->found_back_ref = 1;
3351         } else {
3352                 BUG_ON(1);
3353         }
3354
3355         if (backref->found_forward_ref && backref->found_dir_item)
3356                 backref->reachable = 1;
3357         return 0;
3358 }
3359
3360 static int merge_root_recs(struct btrfs_root *root,
3361                            struct cache_tree *src_cache,
3362                            struct cache_tree *dst_cache)
3363 {
3364         struct cache_extent *cache;
3365         struct ptr_node *node;
3366         struct inode_record *rec;
3367         struct inode_backref *backref;
3368         int ret = 0;
3369
3370         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3371                 free_inode_recs_tree(src_cache);
3372                 return 0;
3373         }
3374
3375         while (1) {
3376                 cache = search_cache_extent(src_cache, 0);
3377                 if (!cache)
3378                         break;
3379                 node = container_of(cache, struct ptr_node, cache);
3380                 rec = node->data;
3381                 remove_cache_extent(src_cache, &node->cache);
3382                 free(node);
3383
3384                 ret = is_child_root(root, root->objectid, rec->ino);
3385                 if (ret < 0)
3386                         break;
3387                 else if (ret == 0)
3388                         goto skip;
3389
3390                 list_for_each_entry(backref, &rec->backrefs, list) {
3391                         BUG_ON(backref->found_inode_ref);
3392                         if (backref->found_dir_item)
3393                                 add_root_backref(dst_cache, rec->ino,
3394                                         root->root_key.objectid, backref->dir,
3395                                         backref->index, backref->name,
3396                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3397                                         backref->errors);
3398                         if (backref->found_dir_index)
3399                                 add_root_backref(dst_cache, rec->ino,
3400                                         root->root_key.objectid, backref->dir,
3401                                         backref->index, backref->name,
3402                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3403                                         backref->errors);
3404                 }
3405 skip:
3406                 free_inode_rec(rec);
3407         }
3408         if (ret < 0)
3409                 return ret;
3410         return 0;
3411 }
3412
3413 static int check_root_refs(struct btrfs_root *root,
3414                            struct cache_tree *root_cache)
3415 {
3416         struct root_record *rec;
3417         struct root_record *ref_root;
3418         struct root_backref *backref;
3419         struct cache_extent *cache;
3420         int loop = 1;
3421         int ret;
3422         int error;
3423         int errors = 0;
3424
3425         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3426         BUG_ON(IS_ERR(rec));
3427         rec->found_ref = 1;
3428
3429         /* fixme: this can not detect circular references */
3430         while (loop) {
3431                 loop = 0;
3432                 cache = search_cache_extent(root_cache, 0);
3433                 while (1) {
3434                         if (!cache)
3435                                 break;
3436                         rec = container_of(cache, struct root_record, cache);
3437                         cache = next_cache_extent(cache);
3438
3439                         if (rec->found_ref == 0)
3440                                 continue;
3441
3442                         list_for_each_entry(backref, &rec->backrefs, list) {
3443                                 if (!backref->reachable)
3444                                         continue;
3445
3446                                 ref_root = get_root_rec(root_cache,
3447                                                         backref->ref_root);
3448                                 BUG_ON(IS_ERR(ref_root));
3449                                 if (ref_root->found_ref > 0)
3450                                         continue;
3451
3452                                 backref->reachable = 0;
3453                                 rec->found_ref--;
3454                                 if (rec->found_ref == 0)
3455                                         loop = 1;
3456                         }
3457                 }
3458         }
3459
3460         cache = search_cache_extent(root_cache, 0);
3461         while (1) {
3462                 if (!cache)
3463                         break;
3464                 rec = container_of(cache, struct root_record, cache);
3465                 cache = next_cache_extent(cache);
3466
3467                 if (rec->found_ref == 0 &&
3468                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3469                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3470                         ret = check_orphan_item(root->fs_info->tree_root,
3471                                                 rec->objectid);
3472                         if (ret == 0)
3473                                 continue;
3474
3475                         /*
3476                          * If we don't have a root item then we likely just have
3477                          * a dir item in a snapshot for this root but no actual
3478                          * ref key or anything so it's meaningless.
3479                          */
3480                         if (!rec->found_root_item)
3481                                 continue;
3482                         errors++;
3483                         fprintf(stderr, "fs tree %llu not referenced\n",
3484                                 (unsigned long long)rec->objectid);
3485                 }
3486
3487                 error = 0;
3488                 if (rec->found_ref > 0 && !rec->found_root_item)
3489                         error = 1;
3490                 list_for_each_entry(backref, &rec->backrefs, list) {
3491                         if (!backref->found_dir_item)
3492                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3493                         if (!backref->found_dir_index)
3494                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3495                         if (!backref->found_back_ref)
3496                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3497                         if (!backref->found_forward_ref)
3498                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3499                         if (backref->reachable && backref->errors)
3500                                 error = 1;
3501                 }
3502                 if (!error)
3503                         continue;
3504
3505                 errors++;
3506                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3507                         (unsigned long long)rec->objectid, rec->found_ref,
3508                          rec->found_root_item ? "" : "not found");
3509
3510                 list_for_each_entry(backref, &rec->backrefs, list) {
3511                         if (!backref->reachable)
3512                                 continue;
3513                         if (!backref->errors && rec->found_root_item)
3514                                 continue;
3515                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3516                                 " index %llu namelen %u name %s errors %x\n",
3517                                 (unsigned long long)backref->ref_root,
3518                                 (unsigned long long)backref->dir,
3519                                 (unsigned long long)backref->index,
3520                                 backref->namelen, backref->name,
3521                                 backref->errors);
3522                         print_ref_error(backref->errors);
3523                 }
3524         }
3525         return errors > 0 ? 1 : 0;
3526 }
3527
3528 static int process_root_ref(struct extent_buffer *eb, int slot,
3529                             struct btrfs_key *key,
3530                             struct cache_tree *root_cache)
3531 {
3532         u64 dirid;
3533         u64 index;
3534         u32 len;
3535         u32 name_len;
3536         struct btrfs_root_ref *ref;
3537         char namebuf[BTRFS_NAME_LEN];
3538         int error;
3539
3540         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3541
3542         dirid = btrfs_root_ref_dirid(eb, ref);
3543         index = btrfs_root_ref_sequence(eb, ref);
3544         name_len = btrfs_root_ref_name_len(eb, ref);
3545
3546         if (name_len <= BTRFS_NAME_LEN) {
3547                 len = name_len;
3548                 error = 0;
3549         } else {
3550                 len = BTRFS_NAME_LEN;
3551                 error = REF_ERR_NAME_TOO_LONG;
3552         }
3553         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3554
3555         if (key->type == BTRFS_ROOT_REF_KEY) {
3556                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3557                                  index, namebuf, len, key->type, error);
3558         } else {
3559                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3560                                  index, namebuf, len, key->type, error);
3561         }
3562         return 0;
3563 }
3564
3565 static void free_corrupt_block(struct cache_extent *cache)
3566 {
3567         struct btrfs_corrupt_block *corrupt;
3568
3569         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3570         free(corrupt);
3571 }
3572
3573 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3574
3575 /*
3576  * Repair the btree of the given root.
3577  *
3578  * The fix is to remove the node key in corrupt_blocks cache_tree.
3579  * and rebalance the tree.
3580  * After the fix, the btree should be writeable.
3581  */
3582 static int repair_btree(struct btrfs_root *root,
3583                         struct cache_tree *corrupt_blocks)
3584 {
3585         struct btrfs_trans_handle *trans;
3586         struct btrfs_path *path;
3587         struct btrfs_corrupt_block *corrupt;
3588         struct cache_extent *cache;
3589         struct btrfs_key key;
3590         u64 offset;
3591         int level;
3592         int ret = 0;
3593
3594         if (cache_tree_empty(corrupt_blocks))
3595                 return 0;
3596
3597         path = btrfs_alloc_path();
3598         if (!path)
3599                 return -ENOMEM;
3600
3601         trans = btrfs_start_transaction(root, 1);
3602         if (IS_ERR(trans)) {
3603                 ret = PTR_ERR(trans);
3604                 fprintf(stderr, "Error starting transaction: %s\n",
3605                         strerror(-ret));
3606                 goto out_free_path;
3607         }
3608         cache = first_cache_extent(corrupt_blocks);
3609         while (cache) {
3610                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3611                                        cache);
3612                 level = corrupt->level;
3613                 path->lowest_level = level;
3614                 key.objectid = corrupt->key.objectid;
3615                 key.type = corrupt->key.type;
3616                 key.offset = corrupt->key.offset;
3617
3618                 /*
3619                  * Here we don't want to do any tree balance, since it may
3620                  * cause a balance with corrupted brother leaf/node,
3621                  * so ins_len set to 0 here.
3622                  * Balance will be done after all corrupt node/leaf is deleted.
3623                  */
3624                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3625                 if (ret < 0)
3626                         goto out;
3627                 offset = btrfs_node_blockptr(path->nodes[level],
3628                                              path->slots[level]);
3629
3630                 /* Remove the ptr */
3631                 ret = btrfs_del_ptr(trans, root, path, level,
3632                                     path->slots[level]);
3633                 if (ret < 0)
3634                         goto out;
3635                 /*
3636                  * Remove the corresponding extent
3637                  * return value is not concerned.
3638                  */
3639                 btrfs_release_path(path);
3640                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3641                                         0, root->root_key.objectid,
3642                                         level - 1, 0);
3643                 cache = next_cache_extent(cache);
3644         }
3645
3646         /* Balance the btree using btrfs_search_slot() */
3647         cache = first_cache_extent(corrupt_blocks);
3648         while (cache) {
3649                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3650                                        cache);
3651                 memcpy(&key, &corrupt->key, sizeof(key));
3652                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3653                 if (ret < 0)
3654                         goto out;
3655                 /* return will always >0 since it won't find the item */
3656                 ret = 0;
3657                 btrfs_release_path(path);
3658                 cache = next_cache_extent(cache);
3659         }
3660 out:
3661         btrfs_commit_transaction(trans, root);
3662 out_free_path:
3663         btrfs_free_path(path);
3664         return ret;
3665 }
3666
3667 static int check_fs_root(struct btrfs_root *root,
3668                          struct cache_tree *root_cache,
3669                          struct walk_control *wc)
3670 {
3671         int ret = 0;
3672         int err = 0;
3673         int wret;
3674         int level;
3675         struct btrfs_path path;
3676         struct shared_node root_node;
3677         struct root_record *rec;
3678         struct btrfs_root_item *root_item = &root->root_item;
3679         struct cache_tree corrupt_blocks;
3680         struct orphan_data_extent *orphan;
3681         struct orphan_data_extent *tmp;
3682         enum btrfs_tree_block_status status;
3683         struct node_refs nrefs;
3684
3685         /*
3686          * Reuse the corrupt_block cache tree to record corrupted tree block
3687          *
3688          * Unlike the usage in extent tree check, here we do it in a per
3689          * fs/subvol tree base.
3690          */
3691         cache_tree_init(&corrupt_blocks);
3692         root->fs_info->corrupt_blocks = &corrupt_blocks;
3693
3694         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3695                 rec = get_root_rec(root_cache, root->root_key.objectid);
3696                 BUG_ON(IS_ERR(rec));
3697                 if (btrfs_root_refs(root_item) > 0)
3698                         rec->found_root_item = 1;
3699         }
3700
3701         btrfs_init_path(&path);
3702         memset(&root_node, 0, sizeof(root_node));
3703         cache_tree_init(&root_node.root_cache);
3704         cache_tree_init(&root_node.inode_cache);
3705         memset(&nrefs, 0, sizeof(nrefs));
3706
3707         /* Move the orphan extent record to corresponding inode_record */
3708         list_for_each_entry_safe(orphan, tmp,
3709                                  &root->orphan_data_extents, list) {
3710                 struct inode_record *inode;
3711
3712                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3713                                       1);
3714                 BUG_ON(IS_ERR(inode));
3715                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3716                 list_move(&orphan->list, &inode->orphan_extents);
3717         }
3718
3719         level = btrfs_header_level(root->node);
3720         memset(wc->nodes, 0, sizeof(wc->nodes));
3721         wc->nodes[level] = &root_node;
3722         wc->active_node = level;
3723         wc->root_level = level;
3724
3725         /* We may not have checked the root block, lets do that now */
3726         if (btrfs_is_leaf(root->node))
3727                 status = btrfs_check_leaf(root, NULL, root->node);
3728         else
3729                 status = btrfs_check_node(root, NULL, root->node);
3730         if (status != BTRFS_TREE_BLOCK_CLEAN)
3731                 return -EIO;
3732
3733         if (btrfs_root_refs(root_item) > 0 ||
3734             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3735                 path.nodes[level] = root->node;
3736                 extent_buffer_get(root->node);
3737                 path.slots[level] = 0;
3738         } else {
3739                 struct btrfs_key key;
3740                 struct btrfs_disk_key found_key;
3741
3742                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3743                 level = root_item->drop_level;
3744                 path.lowest_level = level;
3745                 if (level > btrfs_header_level(root->node) ||
3746                     level >= BTRFS_MAX_LEVEL) {
3747                         error("ignoring invalid drop level: %u", level);
3748                         goto skip_walking;
3749                 }
3750                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3751                 if (wret < 0)
3752                         goto skip_walking;
3753                 btrfs_node_key(path.nodes[level], &found_key,
3754                                 path.slots[level]);
3755                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3756                                         sizeof(found_key)));
3757         }
3758
3759         while (1) {
3760                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3761                 if (wret < 0)
3762                         ret = wret;
3763                 if (wret != 0)
3764                         break;
3765
3766                 wret = walk_up_tree(root, &path, wc, &level);
3767                 if (wret < 0)
3768                         ret = wret;
3769                 if (wret != 0)
3770                         break;
3771         }
3772 skip_walking:
3773         btrfs_release_path(&path);
3774
3775         if (!cache_tree_empty(&corrupt_blocks)) {
3776                 struct cache_extent *cache;
3777                 struct btrfs_corrupt_block *corrupt;
3778
3779                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3780                        root->root_key.objectid);
3781                 cache = first_cache_extent(&corrupt_blocks);
3782                 while (cache) {
3783                         corrupt = container_of(cache,
3784                                                struct btrfs_corrupt_block,
3785                                                cache);
3786                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3787                                cache->start, corrupt->level,
3788                                corrupt->key.objectid, corrupt->key.type,
3789                                corrupt->key.offset);
3790                         cache = next_cache_extent(cache);
3791                 }
3792                 if (repair) {
3793                         printf("Try to repair the btree for root %llu\n",
3794                                root->root_key.objectid);
3795                         ret = repair_btree(root, &corrupt_blocks);
3796                         if (ret < 0)
3797                                 fprintf(stderr, "Failed to repair btree: %s\n",
3798                                         strerror(-ret));
3799                         if (!ret)
3800                                 printf("Btree for root %llu is fixed\n",
3801                                        root->root_key.objectid);
3802                 }
3803         }
3804
3805         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3806         if (err < 0)
3807                 ret = err;
3808
3809         if (root_node.current) {
3810                 root_node.current->checked = 1;
3811                 maybe_free_inode_rec(&root_node.inode_cache,
3812                                 root_node.current);
3813         }
3814
3815         err = check_inode_recs(root, &root_node.inode_cache);
3816         if (!ret)
3817                 ret = err;
3818
3819         free_corrupt_blocks_tree(&corrupt_blocks);
3820         root->fs_info->corrupt_blocks = NULL;
3821         free_orphan_data_extents(&root->orphan_data_extents);
3822         return ret;
3823 }
3824
3825 static int fs_root_objectid(u64 objectid)
3826 {
3827         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3828             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3829                 return 1;
3830         return is_fstree(objectid);
3831 }
3832
3833 static int check_fs_roots(struct btrfs_root *root,
3834                           struct cache_tree *root_cache)
3835 {
3836         struct btrfs_path path;
3837         struct btrfs_key key;
3838         struct walk_control wc;
3839         struct extent_buffer *leaf, *tree_node;
3840         struct btrfs_root *tmp_root;
3841         struct btrfs_root *tree_root = root->fs_info->tree_root;
3842         int ret;
3843         int err = 0;
3844
3845         if (ctx.progress_enabled) {
3846                 ctx.tp = TASK_FS_ROOTS;
3847                 task_start(ctx.info);
3848         }
3849
3850         /*
3851          * Just in case we made any changes to the extent tree that weren't
3852          * reflected into the free space cache yet.
3853          */
3854         if (repair)
3855                 reset_cached_block_groups(root->fs_info);
3856         memset(&wc, 0, sizeof(wc));
3857         cache_tree_init(&wc.shared);
3858         btrfs_init_path(&path);
3859
3860 again:
3861         key.offset = 0;
3862         key.objectid = 0;
3863         key.type = BTRFS_ROOT_ITEM_KEY;
3864         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3865         if (ret < 0) {
3866                 err = 1;
3867                 goto out;
3868         }
3869         tree_node = tree_root->node;
3870         while (1) {
3871                 if (tree_node != tree_root->node) {
3872                         free_root_recs_tree(root_cache);
3873                         btrfs_release_path(&path);
3874                         goto again;
3875                 }
3876                 leaf = path.nodes[0];
3877                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3878                         ret = btrfs_next_leaf(tree_root, &path);
3879                         if (ret) {
3880                                 if (ret < 0)
3881                                         err = 1;
3882                                 break;
3883                         }
3884                         leaf = path.nodes[0];
3885                 }
3886                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3887                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3888                     fs_root_objectid(key.objectid)) {
3889                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3890                                 tmp_root = btrfs_read_fs_root_no_cache(
3891                                                 root->fs_info, &key);
3892                         } else {
3893                                 key.offset = (u64)-1;
3894                                 tmp_root = btrfs_read_fs_root(
3895                                                 root->fs_info, &key);
3896                         }
3897                         if (IS_ERR(tmp_root)) {
3898                                 err = 1;
3899                                 goto next;
3900                         }
3901                         ret = check_fs_root(tmp_root, root_cache, &wc);
3902                         if (ret == -EAGAIN) {
3903                                 free_root_recs_tree(root_cache);
3904                                 btrfs_release_path(&path);
3905                                 goto again;
3906                         }
3907                         if (ret)
3908                                 err = 1;
3909                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3910                                 btrfs_free_fs_root(tmp_root);
3911                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3912                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3913                         process_root_ref(leaf, path.slots[0], &key,
3914                                          root_cache);
3915                 }
3916 next:
3917                 path.slots[0]++;
3918         }
3919 out:
3920         btrfs_release_path(&path);
3921         if (err)
3922                 free_extent_cache_tree(&wc.shared);
3923         if (!cache_tree_empty(&wc.shared))
3924                 fprintf(stderr, "warning line %d\n", __LINE__);
3925
3926         task_stop(ctx.info);
3927
3928         return err;
3929 }
3930
3931 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3932 {
3933         struct rb_node *n;
3934         struct extent_backref *back;
3935         struct tree_backref *tback;
3936         struct data_backref *dback;
3937         u64 found = 0;
3938         int err = 0;
3939
3940         for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
3941                 back = rb_node_to_extent_backref(n);
3942                 if (!back->found_extent_tree) {
3943                         err = 1;
3944                         if (!print_errs)
3945                                 goto out;
3946                         if (back->is_data) {
3947                                 dback = to_data_backref(back);
3948                                 fprintf(stderr, "Backref %llu %s %llu"
3949                                         " owner %llu offset %llu num_refs %lu"
3950                                         " not found in extent tree\n",
3951                                         (unsigned long long)rec->start,
3952                                         back->full_backref ?
3953                                         "parent" : "root",
3954                                         back->full_backref ?
3955                                         (unsigned long long)dback->parent:
3956                                         (unsigned long long)dback->root,
3957                                         (unsigned long long)dback->owner,
3958                                         (unsigned long long)dback->offset,
3959                                         (unsigned long)dback->num_refs);
3960                         } else {
3961                                 tback = to_tree_backref(back);
3962                                 fprintf(stderr, "Backref %llu parent %llu"
3963                                         " root %llu not found in extent tree\n",
3964                                         (unsigned long long)rec->start,
3965                                         (unsigned long long)tback->parent,
3966                                         (unsigned long long)tback->root);
3967                         }
3968                 }
3969                 if (!back->is_data && !back->found_ref) {
3970                         err = 1;
3971                         if (!print_errs)
3972                                 goto out;
3973                         tback = to_tree_backref(back);
3974                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3975                                 (unsigned long long)rec->start,
3976                                 back->full_backref ? "parent" : "root",
3977                                 back->full_backref ?
3978                                 (unsigned long long)tback->parent :
3979                                 (unsigned long long)tback->root, back);
3980                 }
3981                 if (back->is_data) {
3982                         dback = to_data_backref(back);
3983                         if (dback->found_ref != dback->num_refs) {
3984                                 err = 1;
3985                                 if (!print_errs)
3986                                         goto out;
3987                                 fprintf(stderr, "Incorrect local backref count"
3988                                         " on %llu %s %llu owner %llu"
3989                                         " offset %llu found %u wanted %u back %p\n",
3990                                         (unsigned long long)rec->start,
3991                                         back->full_backref ?
3992                                         "parent" : "root",
3993                                         back->full_backref ?
3994                                         (unsigned long long)dback->parent:
3995                                         (unsigned long long)dback->root,
3996                                         (unsigned long long)dback->owner,
3997                                         (unsigned long long)dback->offset,
3998                                         dback->found_ref, dback->num_refs, back);
3999                         }
4000                         if (dback->disk_bytenr != rec->start) {
4001                                 err = 1;
4002                                 if (!print_errs)
4003                                         goto out;
4004                                 fprintf(stderr, "Backref disk bytenr does not"
4005                                         " match extent record, bytenr=%llu, "
4006                                         "ref bytenr=%llu\n",
4007                                         (unsigned long long)rec->start,
4008                                         (unsigned long long)dback->disk_bytenr);
4009                         }
4010
4011                         if (dback->bytes != rec->nr) {
4012                                 err = 1;
4013                                 if (!print_errs)
4014                                         goto out;
4015                                 fprintf(stderr, "Backref bytes do not match "
4016                                         "extent backref, bytenr=%llu, ref "
4017                                         "bytes=%llu, backref bytes=%llu\n",
4018                                         (unsigned long long)rec->start,
4019                                         (unsigned long long)rec->nr,
4020                                         (unsigned long long)dback->bytes);
4021                         }
4022                 }
4023                 if (!back->is_data) {
4024                         found += 1;
4025                 } else {
4026                         dback = to_data_backref(back);
4027                         found += dback->found_ref;
4028                 }
4029         }
4030         if (found != rec->refs) {
4031                 err = 1;
4032                 if (!print_errs)
4033                         goto out;
4034                 fprintf(stderr, "Incorrect global backref count "
4035                         "on %llu found %llu wanted %llu\n",
4036                         (unsigned long long)rec->start,
4037                         (unsigned long long)found,
4038                         (unsigned long long)rec->refs);
4039         }
4040 out:
4041         return err;
4042 }
4043
4044 static void __free_one_backref(struct rb_node *node)
4045 {
4046         struct extent_backref *back = rb_node_to_extent_backref(node);
4047
4048         free(back);
4049 }
4050
4051 static void free_all_extent_backrefs(struct extent_record *rec)
4052 {
4053         rb_free_nodes(&rec->backref_tree, __free_one_backref);
4054 }
4055
4056 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4057                                      struct cache_tree *extent_cache)
4058 {
4059         struct cache_extent *cache;
4060         struct extent_record *rec;
4061
4062         while (1) {
4063                 cache = first_cache_extent(extent_cache);
4064                 if (!cache)
4065                         break;
4066                 rec = container_of(cache, struct extent_record, cache);
4067                 remove_cache_extent(extent_cache, cache);
4068                 free_all_extent_backrefs(rec);
4069                 free(rec);
4070         }
4071 }
4072
4073 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4074                                  struct extent_record *rec)
4075 {
4076         if (rec->content_checked && rec->owner_ref_checked &&
4077             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4078             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4079             !rec->bad_full_backref && !rec->crossing_stripes &&
4080             !rec->wrong_chunk_type) {
4081                 remove_cache_extent(extent_cache, &rec->cache);
4082                 free_all_extent_backrefs(rec);
4083                 list_del_init(&rec->list);
4084                 free(rec);
4085         }
4086         return 0;
4087 }
4088
4089 static int check_owner_ref(struct btrfs_root *root,
4090                             struct extent_record *rec,
4091                             struct extent_buffer *buf)
4092 {
4093         struct extent_backref *node, *tmp;
4094         struct tree_backref *back;
4095         struct btrfs_root *ref_root;
4096         struct btrfs_key key;
4097         struct btrfs_path path;
4098         struct extent_buffer *parent;
4099         int level;
4100         int found = 0;
4101         int ret;
4102
4103         rbtree_postorder_for_each_entry_safe(node, tmp,
4104                                              &rec->backref_tree, node) {
4105                 if (node->is_data)
4106                         continue;
4107                 if (!node->found_ref)
4108                         continue;
4109                 if (node->full_backref)
4110                         continue;
4111                 back = to_tree_backref(node);
4112                 if (btrfs_header_owner(buf) == back->root)
4113                         return 0;
4114         }
4115         BUG_ON(rec->is_root);
4116
4117         /* try to find the block by search corresponding fs tree */
4118         key.objectid = btrfs_header_owner(buf);
4119         key.type = BTRFS_ROOT_ITEM_KEY;
4120         key.offset = (u64)-1;
4121
4122         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4123         if (IS_ERR(ref_root))
4124                 return 1;
4125
4126         level = btrfs_header_level(buf);
4127         if (level == 0)
4128                 btrfs_item_key_to_cpu(buf, &key, 0);
4129         else
4130                 btrfs_node_key_to_cpu(buf, &key, 0);
4131
4132         btrfs_init_path(&path);
4133         path.lowest_level = level + 1;
4134         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4135         if (ret < 0)
4136                 return 0;
4137
4138         parent = path.nodes[level + 1];
4139         if (parent && buf->start == btrfs_node_blockptr(parent,
4140                                                         path.slots[level + 1]))
4141                 found = 1;
4142
4143         btrfs_release_path(&path);
4144         return found ? 0 : 1;
4145 }
4146
4147 static int is_extent_tree_record(struct extent_record *rec)
4148 {
4149         struct extent_backref *ref, *tmp;
4150         struct tree_backref *back;
4151         int is_extent = 0;
4152
4153         rbtree_postorder_for_each_entry_safe(ref, tmp,
4154                                              &rec->backref_tree, node) {
4155                 if (ref->is_data)
4156                         return 0;
4157                 back = to_tree_backref(ref);
4158                 if (ref->full_backref)
4159                         return 0;
4160                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4161                         is_extent = 1;
4162         }
4163         return is_extent;
4164 }
4165
4166
4167 static int record_bad_block_io(struct btrfs_fs_info *info,
4168                                struct cache_tree *extent_cache,
4169                                u64 start, u64 len)
4170 {
4171         struct extent_record *rec;
4172         struct cache_extent *cache;
4173         struct btrfs_key key;
4174
4175         cache = lookup_cache_extent(extent_cache, start, len);
4176         if (!cache)
4177                 return 0;
4178
4179         rec = container_of(cache, struct extent_record, cache);
4180         if (!is_extent_tree_record(rec))
4181                 return 0;
4182
4183         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4184         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4185 }
4186
4187 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4188                        struct extent_buffer *buf, int slot)
4189 {
4190         if (btrfs_header_level(buf)) {
4191                 struct btrfs_key_ptr ptr1, ptr2;
4192
4193                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4194                                    sizeof(struct btrfs_key_ptr));
4195                 read_extent_buffer(buf, &ptr2,
4196                                    btrfs_node_key_ptr_offset(slot + 1),
4197                                    sizeof(struct btrfs_key_ptr));
4198                 write_extent_buffer(buf, &ptr1,
4199                                     btrfs_node_key_ptr_offset(slot + 1),
4200                                     sizeof(struct btrfs_key_ptr));
4201                 write_extent_buffer(buf, &ptr2,
4202                                     btrfs_node_key_ptr_offset(slot),
4203                                     sizeof(struct btrfs_key_ptr));
4204                 if (slot == 0) {
4205                         struct btrfs_disk_key key;
4206                         btrfs_node_key(buf, &key, 0);
4207                         btrfs_fixup_low_keys(root, path, &key,
4208                                              btrfs_header_level(buf) + 1);
4209                 }
4210         } else {
4211                 struct btrfs_item *item1, *item2;
4212                 struct btrfs_key k1, k2;
4213                 char *item1_data, *item2_data;
4214                 u32 item1_offset, item2_offset, item1_size, item2_size;
4215
4216                 item1 = btrfs_item_nr(slot);
4217                 item2 = btrfs_item_nr(slot + 1);
4218                 btrfs_item_key_to_cpu(buf, &k1, slot);
4219                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4220                 item1_offset = btrfs_item_offset(buf, item1);
4221                 item2_offset = btrfs_item_offset(buf, item2);
4222                 item1_size = btrfs_item_size(buf, item1);
4223                 item2_size = btrfs_item_size(buf, item2);
4224
4225                 item1_data = malloc(item1_size);
4226                 if (!item1_data)
4227                         return -ENOMEM;
4228                 item2_data = malloc(item2_size);
4229                 if (!item2_data) {
4230                         free(item1_data);
4231                         return -ENOMEM;
4232                 }
4233
4234                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4235                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4236
4237                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4238                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4239                 free(item1_data);
4240                 free(item2_data);
4241
4242                 btrfs_set_item_offset(buf, item1, item2_offset);
4243                 btrfs_set_item_offset(buf, item2, item1_offset);
4244                 btrfs_set_item_size(buf, item1, item2_size);
4245                 btrfs_set_item_size(buf, item2, item1_size);
4246
4247                 path->slots[0] = slot;
4248                 btrfs_set_item_key_unsafe(root, path, &k2);
4249                 path->slots[0] = slot + 1;
4250                 btrfs_set_item_key_unsafe(root, path, &k1);
4251         }
4252         return 0;
4253 }
4254
4255 static int fix_key_order(struct btrfs_trans_handle *trans,
4256                          struct btrfs_root *root,
4257                          struct btrfs_path *path)
4258 {
4259         struct extent_buffer *buf;
4260         struct btrfs_key k1, k2;
4261         int i;
4262         int level = path->lowest_level;
4263         int ret = -EIO;
4264
4265         buf = path->nodes[level];
4266         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4267                 if (level) {
4268                         btrfs_node_key_to_cpu(buf, &k1, i);
4269                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4270                 } else {
4271                         btrfs_item_key_to_cpu(buf, &k1, i);
4272                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4273                 }
4274                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4275                         continue;
4276                 ret = swap_values(root, path, buf, i);
4277                 if (ret)
4278                         break;
4279                 btrfs_mark_buffer_dirty(buf);
4280                 i = 0;
4281         }
4282         return ret;
4283 }
4284
4285 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4286                              struct btrfs_root *root,
4287                              struct btrfs_path *path,
4288                              struct extent_buffer *buf, int slot)
4289 {
4290         struct btrfs_key key;
4291         int nritems = btrfs_header_nritems(buf);
4292
4293         btrfs_item_key_to_cpu(buf, &key, slot);
4294
4295         /* These are all the keys we can deal with missing. */
4296         if (key.type != BTRFS_DIR_INDEX_KEY &&
4297             key.type != BTRFS_EXTENT_ITEM_KEY &&
4298             key.type != BTRFS_METADATA_ITEM_KEY &&
4299             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4300             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4301                 return -1;
4302
4303         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4304                (unsigned long long)key.objectid, key.type,
4305                (unsigned long long)key.offset, slot, buf->start);
4306         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4307                               btrfs_item_nr_offset(slot + 1),
4308                               sizeof(struct btrfs_item) *
4309                               (nritems - slot - 1));
4310         btrfs_set_header_nritems(buf, nritems - 1);
4311         if (slot == 0) {
4312                 struct btrfs_disk_key disk_key;
4313
4314                 btrfs_item_key(buf, &disk_key, 0);
4315                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4316         }
4317         btrfs_mark_buffer_dirty(buf);
4318         return 0;
4319 }
4320
4321 static int fix_item_offset(struct btrfs_trans_handle *trans,
4322                            struct btrfs_root *root,
4323                            struct btrfs_path *path)
4324 {
4325         struct extent_buffer *buf;
4326         int i;
4327         int ret = 0;
4328
4329         /* We should only get this for leaves */
4330         BUG_ON(path->lowest_level);
4331         buf = path->nodes[0];
4332 again:
4333         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4334                 unsigned int shift = 0, offset;
4335
4336                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4337                     BTRFS_LEAF_DATA_SIZE(root)) {
4338                         if (btrfs_item_end_nr(buf, i) >
4339                             BTRFS_LEAF_DATA_SIZE(root)) {
4340                                 ret = delete_bogus_item(trans, root, path,
4341                                                         buf, i);
4342                                 if (!ret)
4343                                         goto again;
4344                                 fprintf(stderr, "item is off the end of the "
4345                                         "leaf, can't fix\n");
4346                                 ret = -EIO;
4347                                 break;
4348                         }
4349                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4350                                 btrfs_item_end_nr(buf, i);
4351                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4352                            btrfs_item_offset_nr(buf, i - 1)) {
4353                         if (btrfs_item_end_nr(buf, i) >
4354                             btrfs_item_offset_nr(buf, i - 1)) {
4355                                 ret = delete_bogus_item(trans, root, path,
4356                                                         buf, i);
4357                                 if (!ret)
4358                                         goto again;
4359                                 fprintf(stderr, "items overlap, can't fix\n");
4360                                 ret = -EIO;
4361                                 break;
4362                         }
4363                         shift = btrfs_item_offset_nr(buf, i - 1) -
4364                                 btrfs_item_end_nr(buf, i);
4365                 }
4366                 if (!shift)
4367                         continue;
4368
4369                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4370                        i, shift, (unsigned long long)buf->start);
4371                 offset = btrfs_item_offset_nr(buf, i);
4372                 memmove_extent_buffer(buf,
4373                                       btrfs_leaf_data(buf) + offset + shift,
4374                                       btrfs_leaf_data(buf) + offset,
4375                                       btrfs_item_size_nr(buf, i));
4376                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4377                                       offset + shift);
4378                 btrfs_mark_buffer_dirty(buf);
4379         }
4380
4381         /*
4382          * We may have moved things, in which case we want to exit so we don't
4383          * write those changes out.  Once we have proper abort functionality in
4384          * progs this can be changed to something nicer.
4385          */
4386         BUG_ON(ret);
4387         return ret;
4388 }
4389
4390 /*
4391  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4392  * then just return -EIO.
4393  */
4394 static int try_to_fix_bad_block(struct btrfs_root *root,
4395                                 struct extent_buffer *buf,
4396                                 enum btrfs_tree_block_status status)
4397 {
4398         struct btrfs_trans_handle *trans;
4399         struct ulist *roots;
4400         struct ulist_node *node;
4401         struct btrfs_root *search_root;
4402         struct btrfs_path *path;
4403         struct ulist_iterator iter;
4404         struct btrfs_key root_key, key;
4405         int ret;
4406
4407         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4408             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4409                 return -EIO;
4410
4411         path = btrfs_alloc_path();
4412         if (!path)
4413                 return -EIO;
4414
4415         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4416                                    0, &roots);
4417         if (ret) {
4418                 btrfs_free_path(path);
4419                 return -EIO;
4420         }
4421
4422         ULIST_ITER_INIT(&iter);
4423         while ((node = ulist_next(roots, &iter))) {
4424                 root_key.objectid = node->val;
4425                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4426                 root_key.offset = (u64)-1;
4427
4428                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4429                 if (IS_ERR(root)) {
4430                         ret = -EIO;
4431                         break;
4432                 }
4433
4434
4435                 trans = btrfs_start_transaction(search_root, 0);
4436                 if (IS_ERR(trans)) {
4437                         ret = PTR_ERR(trans);
4438                         break;
4439                 }
4440
4441                 path->lowest_level = btrfs_header_level(buf);
4442                 path->skip_check_block = 1;
4443                 if (path->lowest_level)
4444                         btrfs_node_key_to_cpu(buf, &key, 0);
4445                 else
4446                         btrfs_item_key_to_cpu(buf, &key, 0);
4447                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4448                 if (ret) {
4449                         ret = -EIO;
4450                         btrfs_commit_transaction(trans, search_root);
4451                         break;
4452                 }
4453                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4454                         ret = fix_key_order(trans, search_root, path);
4455                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4456                         ret = fix_item_offset(trans, search_root, path);
4457                 if (ret) {
4458                         btrfs_commit_transaction(trans, search_root);
4459                         break;
4460                 }
4461                 btrfs_release_path(path);
4462                 btrfs_commit_transaction(trans, search_root);
4463         }
4464         ulist_free(roots);
4465         btrfs_free_path(path);
4466         return ret;
4467 }
4468
4469 static int check_block(struct btrfs_root *root,
4470                        struct cache_tree *extent_cache,
4471                        struct extent_buffer *buf, u64 flags)
4472 {
4473         struct extent_record *rec;
4474         struct cache_extent *cache;
4475         struct btrfs_key key;
4476         enum btrfs_tree_block_status status;
4477         int ret = 0;
4478         int level;
4479
4480         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4481         if (!cache)
4482                 return 1;
4483         rec = container_of(cache, struct extent_record, cache);
4484         rec->generation = btrfs_header_generation(buf);
4485
4486         level = btrfs_header_level(buf);
4487         if (btrfs_header_nritems(buf) > 0) {
4488
4489                 if (level == 0)
4490                         btrfs_item_key_to_cpu(buf, &key, 0);
4491                 else
4492                         btrfs_node_key_to_cpu(buf, &key, 0);
4493
4494                 rec->info_objectid = key.objectid;
4495         }
4496         rec->info_level = level;
4497
4498         if (btrfs_is_leaf(buf))
4499                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4500         else
4501                 status = btrfs_check_node(root, &rec->parent_key, buf);
4502
4503         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4504                 if (repair)
4505                         status = try_to_fix_bad_block(root, buf, status);
4506                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4507                         ret = -EIO;
4508                         fprintf(stderr, "bad block %llu\n",
4509                                 (unsigned long long)buf->start);
4510                 } else {
4511                         /*
4512                          * Signal to callers we need to start the scan over
4513                          * again since we'll have cowed blocks.
4514                          */
4515                         ret = -EAGAIN;
4516                 }
4517         } else {
4518                 rec->content_checked = 1;
4519                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4520                         rec->owner_ref_checked = 1;
4521                 else {
4522                         ret = check_owner_ref(root, rec, buf);
4523                         if (!ret)
4524                                 rec->owner_ref_checked = 1;
4525                 }
4526         }
4527         if (!ret)
4528                 maybe_free_extent_rec(extent_cache, rec);
4529         return ret;
4530 }
4531
4532
4533 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4534                                                 u64 parent, u64 root)
4535 {
4536         struct rb_node *node;
4537         struct tree_backref *back = NULL;
4538         struct tree_backref match = {
4539                 .node = {
4540                         .is_data = 0,
4541                 },
4542         };
4543
4544         if (parent) {
4545                 match.parent = parent;
4546                 match.node.full_backref = 1;
4547         } else {
4548                 match.root = root;
4549         }
4550
4551         node = rb_search(&rec->backref_tree, &match.node.node,
4552                          (rb_compare_keys)compare_extent_backref, NULL);
4553         if (node)
4554                 back = to_tree_backref(rb_node_to_extent_backref(node));
4555
4556         return back;
4557 }
4558
4559 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4560                                                 u64 parent, u64 root)
4561 {
4562         struct tree_backref *ref = malloc(sizeof(*ref));
4563
4564         if (!ref)
4565                 return NULL;
4566         memset(&ref->node, 0, sizeof(ref->node));
4567         if (parent > 0) {
4568                 ref->parent = parent;
4569                 ref->node.full_backref = 1;
4570         } else {
4571                 ref->root = root;
4572                 ref->node.full_backref = 0;
4573         }
4574         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4575
4576         return ref;
4577 }
4578
4579 static struct data_backref *find_data_backref(struct extent_record *rec,
4580                                                 u64 parent, u64 root,
4581                                                 u64 owner, u64 offset,
4582                                                 int found_ref,
4583                                                 u64 disk_bytenr, u64 bytes)
4584 {
4585         struct rb_node *node;
4586         struct data_backref *back = NULL;
4587         struct data_backref match = {
4588                 .node = {
4589                         .is_data = 1,
4590                 },
4591                 .owner = owner,
4592                 .offset = offset,
4593                 .bytes = bytes,
4594                 .found_ref = found_ref,
4595                 .disk_bytenr = disk_bytenr,
4596         };
4597
4598         if (parent) {
4599                 match.parent = parent;
4600                 match.node.full_backref = 1;
4601         } else {
4602                 match.root = root;
4603         }
4604
4605         node = rb_search(&rec->backref_tree, &match.node.node,
4606                          (rb_compare_keys)compare_extent_backref, NULL);
4607         if (node)
4608                 back = to_data_backref(rb_node_to_extent_backref(node));
4609
4610         return back;
4611 }
4612
4613 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4614                                                 u64 parent, u64 root,
4615                                                 u64 owner, u64 offset,
4616                                                 u64 max_size)
4617 {
4618         struct data_backref *ref = malloc(sizeof(*ref));
4619
4620         if (!ref)
4621                 return NULL;
4622         memset(&ref->node, 0, sizeof(ref->node));
4623         ref->node.is_data = 1;
4624
4625         if (parent > 0) {
4626                 ref->parent = parent;
4627                 ref->owner = 0;
4628                 ref->offset = 0;
4629                 ref->node.full_backref = 1;
4630         } else {
4631                 ref->root = root;
4632                 ref->owner = owner;
4633                 ref->offset = offset;
4634                 ref->node.full_backref = 0;
4635         }
4636         ref->bytes = max_size;
4637         ref->found_ref = 0;
4638         ref->num_refs = 0;
4639         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4640         if (max_size > rec->max_size)
4641                 rec->max_size = max_size;
4642         return ref;
4643 }
4644
4645 /* Check if the type of extent matches with its chunk */
4646 static void check_extent_type(struct extent_record *rec)
4647 {
4648         struct btrfs_block_group_cache *bg_cache;
4649
4650         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4651         if (!bg_cache)
4652                 return;
4653
4654         /* data extent, check chunk directly*/
4655         if (!rec->metadata) {
4656                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4657                         rec->wrong_chunk_type = 1;
4658                 return;
4659         }
4660
4661         /* metadata extent, check the obvious case first */
4662         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4663                                  BTRFS_BLOCK_GROUP_METADATA))) {
4664                 rec->wrong_chunk_type = 1;
4665                 return;
4666         }
4667
4668         /*
4669          * Check SYSTEM extent, as it's also marked as metadata, we can only
4670          * make sure it's a SYSTEM extent by its backref
4671          */
4672         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4673                 struct extent_backref *node;
4674                 struct tree_backref *tback;
4675                 u64 bg_type;
4676
4677                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4678                 if (node->is_data) {
4679                         /* tree block shouldn't have data backref */
4680                         rec->wrong_chunk_type = 1;
4681                         return;
4682                 }
4683                 tback = container_of(node, struct tree_backref, node);
4684
4685                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4686                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4687                 else
4688                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4689                 if (!(bg_cache->flags & bg_type))
4690                         rec->wrong_chunk_type = 1;
4691         }
4692 }
4693
4694 /*
4695  * Allocate a new extent record, fill default values from @tmpl and insert int
4696  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4697  * the cache, otherwise it fails.
4698  */
4699 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4700                 struct extent_record *tmpl)
4701 {
4702         struct extent_record *rec;
4703         int ret = 0;
4704
4705         rec = malloc(sizeof(*rec));
4706         if (!rec)
4707                 return -ENOMEM;
4708         rec->start = tmpl->start;
4709         rec->max_size = tmpl->max_size;
4710         rec->nr = max(tmpl->nr, tmpl->max_size);
4711         rec->found_rec = tmpl->found_rec;
4712         rec->content_checked = tmpl->content_checked;
4713         rec->owner_ref_checked = tmpl->owner_ref_checked;
4714         rec->num_duplicates = 0;
4715         rec->metadata = tmpl->metadata;
4716         rec->flag_block_full_backref = FLAG_UNSET;
4717         rec->bad_full_backref = 0;
4718         rec->crossing_stripes = 0;
4719         rec->wrong_chunk_type = 0;
4720         rec->is_root = tmpl->is_root;
4721         rec->refs = tmpl->refs;
4722         rec->extent_item_refs = tmpl->extent_item_refs;
4723         rec->parent_generation = tmpl->parent_generation;
4724         INIT_LIST_HEAD(&rec->backrefs);
4725         INIT_LIST_HEAD(&rec->dups);
4726         INIT_LIST_HEAD(&rec->list);
4727         rec->backref_tree = RB_ROOT;
4728         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4729         rec->cache.start = tmpl->start;
4730         rec->cache.size = tmpl->nr;
4731         ret = insert_cache_extent(extent_cache, &rec->cache);
4732         BUG_ON(ret);
4733         bytes_used += rec->nr;
4734
4735         if (tmpl->metadata)
4736                 rec->crossing_stripes = check_crossing_stripes(rec->start,
4737                                 global_info->tree_root->nodesize);
4738         check_extent_type(rec);
4739         return ret;
4740 }
4741
4742 /*
4743  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4744  * some are hints:
4745  * - refs              - if found, increase refs
4746  * - is_root           - if found, set
4747  * - content_checked   - if found, set
4748  * - owner_ref_checked - if found, set
4749  *
4750  * If not found, create a new one, initialize and insert.
4751  */
4752 static int add_extent_rec(struct cache_tree *extent_cache,
4753                 struct extent_record *tmpl)
4754 {
4755         struct extent_record *rec;
4756         struct cache_extent *cache;
4757         int ret = 0;
4758         int dup = 0;
4759
4760         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4761         if (cache) {
4762                 rec = container_of(cache, struct extent_record, cache);
4763                 if (tmpl->refs)
4764                         rec->refs++;
4765                 if (rec->nr == 1)
4766                         rec->nr = max(tmpl->nr, tmpl->max_size);
4767
4768                 /*
4769                  * We need to make sure to reset nr to whatever the extent
4770                  * record says was the real size, this way we can compare it to
4771                  * the backrefs.
4772                  */
4773                 if (tmpl->found_rec) {
4774                         if (tmpl->start != rec->start || rec->found_rec) {
4775                                 struct extent_record *tmp;
4776
4777                                 dup = 1;
4778                                 if (list_empty(&rec->list))
4779                                         list_add_tail(&rec->list,
4780                                                       &duplicate_extents);
4781
4782                                 /*
4783                                  * We have to do this song and dance in case we
4784                                  * find an extent record that falls inside of
4785                                  * our current extent record but does not have
4786                                  * the same objectid.
4787                                  */
4788                                 tmp = malloc(sizeof(*tmp));
4789                                 if (!tmp)
4790                                         return -ENOMEM;
4791                                 tmp->start = tmpl->start;
4792                                 tmp->max_size = tmpl->max_size;
4793                                 tmp->nr = tmpl->nr;
4794                                 tmp->found_rec = 1;
4795                                 tmp->metadata = tmpl->metadata;
4796                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4797                                 INIT_LIST_HEAD(&tmp->list);
4798                                 list_add_tail(&tmp->list, &rec->dups);
4799                                 rec->num_duplicates++;
4800                         } else {
4801                                 rec->nr = tmpl->nr;
4802                                 rec->found_rec = 1;
4803                         }
4804                 }
4805
4806                 if (tmpl->extent_item_refs && !dup) {
4807                         if (rec->extent_item_refs) {
4808                                 fprintf(stderr, "block %llu rec "
4809                                         "extent_item_refs %llu, passed %llu\n",
4810                                         (unsigned long long)tmpl->start,
4811                                         (unsigned long long)
4812                                                         rec->extent_item_refs,
4813                                         (unsigned long long)tmpl->extent_item_refs);
4814                         }
4815                         rec->extent_item_refs = tmpl->extent_item_refs;
4816                 }
4817                 if (tmpl->is_root)
4818                         rec->is_root = 1;
4819                 if (tmpl->content_checked)
4820                         rec->content_checked = 1;
4821                 if (tmpl->owner_ref_checked)
4822                         rec->owner_ref_checked = 1;
4823                 memcpy(&rec->parent_key, &tmpl->parent_key,
4824                                 sizeof(tmpl->parent_key));
4825                 if (tmpl->parent_generation)
4826                         rec->parent_generation = tmpl->parent_generation;
4827                 if (rec->max_size < tmpl->max_size)
4828                         rec->max_size = tmpl->max_size;
4829
4830                 /*
4831                  * A metadata extent can't cross stripe_len boundary, otherwise
4832                  * kernel scrub won't be able to handle it.
4833                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4834                  * it.
4835                  */
4836                 if (tmpl->metadata)
4837                         rec->crossing_stripes = check_crossing_stripes(
4838                                 rec->start, global_info->tree_root->nodesize);
4839                 check_extent_type(rec);
4840                 maybe_free_extent_rec(extent_cache, rec);
4841                 return ret;
4842         }
4843
4844         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4845
4846         return ret;
4847 }
4848
4849 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4850                             u64 parent, u64 root, int found_ref)
4851 {
4852         struct extent_record *rec;
4853         struct tree_backref *back;
4854         struct cache_extent *cache;
4855
4856         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4857         if (!cache) {
4858                 struct extent_record tmpl;
4859
4860                 memset(&tmpl, 0, sizeof(tmpl));
4861                 tmpl.start = bytenr;
4862                 tmpl.nr = 1;
4863                 tmpl.metadata = 1;
4864
4865                 add_extent_rec_nolookup(extent_cache, &tmpl);
4866
4867                 /* really a bug in cache_extent implement now */
4868                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4869                 if (!cache)
4870                         return -ENOENT;
4871         }
4872
4873         rec = container_of(cache, struct extent_record, cache);
4874         if (rec->start != bytenr) {
4875                 /*
4876                  * Several cause, from unaligned bytenr to over lapping extents
4877                  */
4878                 return -EEXIST;
4879         }
4880
4881         back = find_tree_backref(rec, parent, root);
4882         if (!back) {
4883                 back = alloc_tree_backref(rec, parent, root);
4884                 if (!back)
4885                         return -ENOMEM;
4886         }
4887
4888         if (found_ref) {
4889                 if (back->node.found_ref) {
4890                         fprintf(stderr, "Extent back ref already exists "
4891                                 "for %llu parent %llu root %llu \n",
4892                                 (unsigned long long)bytenr,
4893                                 (unsigned long long)parent,
4894                                 (unsigned long long)root);
4895                 }
4896                 back->node.found_ref = 1;
4897         } else {
4898                 if (back->node.found_extent_tree) {
4899                         fprintf(stderr, "Extent back ref already exists "
4900                                 "for %llu parent %llu root %llu \n",
4901                                 (unsigned long long)bytenr,
4902                                 (unsigned long long)parent,
4903                                 (unsigned long long)root);
4904                 }
4905                 back->node.found_extent_tree = 1;
4906         }
4907         check_extent_type(rec);
4908         maybe_free_extent_rec(extent_cache, rec);
4909         return 0;
4910 }
4911
4912 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4913                             u64 parent, u64 root, u64 owner, u64 offset,
4914                             u32 num_refs, int found_ref, u64 max_size)
4915 {
4916         struct extent_record *rec;
4917         struct data_backref *back;
4918         struct cache_extent *cache;
4919
4920         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4921         if (!cache) {
4922                 struct extent_record tmpl;
4923
4924                 memset(&tmpl, 0, sizeof(tmpl));
4925                 tmpl.start = bytenr;
4926                 tmpl.nr = 1;
4927                 tmpl.max_size = max_size;
4928
4929                 add_extent_rec_nolookup(extent_cache, &tmpl);
4930
4931                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4932                 if (!cache)
4933                         abort();
4934         }
4935
4936         rec = container_of(cache, struct extent_record, cache);
4937         if (rec->max_size < max_size)
4938                 rec->max_size = max_size;
4939
4940         /*
4941          * If found_ref is set then max_size is the real size and must match the
4942          * existing refs.  So if we have already found a ref then we need to
4943          * make sure that this ref matches the existing one, otherwise we need
4944          * to add a new backref so we can notice that the backrefs don't match
4945          * and we need to figure out who is telling the truth.  This is to
4946          * account for that awful fsync bug I introduced where we'd end up with
4947          * a btrfs_file_extent_item that would have its length include multiple
4948          * prealloc extents or point inside of a prealloc extent.
4949          */
4950         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4951                                  bytenr, max_size);
4952         if (!back) {
4953                 back = alloc_data_backref(rec, parent, root, owner, offset,
4954                                           max_size);
4955                 BUG_ON(!back);
4956         }
4957
4958         if (found_ref) {
4959                 BUG_ON(num_refs != 1);
4960                 if (back->node.found_ref)
4961                         BUG_ON(back->bytes != max_size);
4962                 back->node.found_ref = 1;
4963                 back->found_ref += 1;
4964                 back->bytes = max_size;
4965                 back->disk_bytenr = bytenr;
4966                 rec->refs += 1;
4967                 rec->content_checked = 1;
4968                 rec->owner_ref_checked = 1;
4969         } else {
4970                 if (back->node.found_extent_tree) {
4971                         fprintf(stderr, "Extent back ref already exists "
4972                                 "for %llu parent %llu root %llu "
4973                                 "owner %llu offset %llu num_refs %lu\n",
4974                                 (unsigned long long)bytenr,
4975                                 (unsigned long long)parent,
4976                                 (unsigned long long)root,
4977                                 (unsigned long long)owner,
4978                                 (unsigned long long)offset,
4979                                 (unsigned long)num_refs);
4980                 }
4981                 back->num_refs = num_refs;
4982                 back->node.found_extent_tree = 1;
4983         }
4984         maybe_free_extent_rec(extent_cache, rec);
4985         return 0;
4986 }
4987
4988 static int add_pending(struct cache_tree *pending,
4989                        struct cache_tree *seen, u64 bytenr, u32 size)
4990 {
4991         int ret;
4992         ret = add_cache_extent(seen, bytenr, size);
4993         if (ret)
4994                 return ret;
4995         add_cache_extent(pending, bytenr, size);
4996         return 0;
4997 }
4998
4999 static int pick_next_pending(struct cache_tree *pending,
5000                         struct cache_tree *reada,
5001                         struct cache_tree *nodes,
5002                         u64 last, struct block_info *bits, int bits_nr,
5003                         int *reada_bits)
5004 {
5005         unsigned long node_start = last;
5006         struct cache_extent *cache;
5007         int ret;
5008
5009         cache = search_cache_extent(reada, 0);
5010         if (cache) {
5011                 bits[0].start = cache->start;
5012                 bits[0].size = cache->size;
5013                 *reada_bits = 1;
5014                 return 1;
5015         }
5016         *reada_bits = 0;
5017         if (node_start > 32768)
5018                 node_start -= 32768;
5019
5020         cache = search_cache_extent(nodes, node_start);
5021         if (!cache)
5022                 cache = search_cache_extent(nodes, 0);
5023
5024         if (!cache) {
5025                  cache = search_cache_extent(pending, 0);
5026                  if (!cache)
5027                          return 0;
5028                  ret = 0;
5029                  do {
5030                          bits[ret].start = cache->start;
5031                          bits[ret].size = cache->size;
5032                          cache = next_cache_extent(cache);
5033                          ret++;
5034                  } while (cache && ret < bits_nr);
5035                  return ret;
5036         }
5037
5038         ret = 0;
5039         do {
5040                 bits[ret].start = cache->start;
5041                 bits[ret].size = cache->size;
5042                 cache = next_cache_extent(cache);
5043                 ret++;
5044         } while (cache && ret < bits_nr);
5045
5046         if (bits_nr - ret > 8) {
5047                 u64 lookup = bits[0].start + bits[0].size;
5048                 struct cache_extent *next;
5049                 next = search_cache_extent(pending, lookup);
5050                 while(next) {
5051                         if (next->start - lookup > 32768)
5052                                 break;
5053                         bits[ret].start = next->start;
5054                         bits[ret].size = next->size;
5055                         lookup = next->start + next->size;
5056                         ret++;
5057                         if (ret == bits_nr)
5058                                 break;
5059                         next = next_cache_extent(next);
5060                         if (!next)
5061                                 break;
5062                 }
5063         }
5064         return ret;
5065 }
5066
5067 static void free_chunk_record(struct cache_extent *cache)
5068 {
5069         struct chunk_record *rec;
5070
5071         rec = container_of(cache, struct chunk_record, cache);
5072         list_del_init(&rec->list);
5073         list_del_init(&rec->dextents);
5074         free(rec);
5075 }
5076
5077 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5078 {
5079         cache_tree_free_extents(chunk_cache, free_chunk_record);
5080 }
5081
5082 static void free_device_record(struct rb_node *node)
5083 {
5084         struct device_record *rec;
5085
5086         rec = container_of(node, struct device_record, node);
5087         free(rec);
5088 }
5089
5090 FREE_RB_BASED_TREE(device_cache, free_device_record);
5091
5092 int insert_block_group_record(struct block_group_tree *tree,
5093                               struct block_group_record *bg_rec)
5094 {
5095         int ret;
5096
5097         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5098         if (ret)
5099                 return ret;
5100
5101         list_add_tail(&bg_rec->list, &tree->block_groups);
5102         return 0;
5103 }
5104
5105 static void free_block_group_record(struct cache_extent *cache)
5106 {
5107         struct block_group_record *rec;
5108
5109         rec = container_of(cache, struct block_group_record, cache);
5110         list_del_init(&rec->list);
5111         free(rec);
5112 }
5113
5114 void free_block_group_tree(struct block_group_tree *tree)
5115 {
5116         cache_tree_free_extents(&tree->tree, free_block_group_record);
5117 }
5118
5119 int insert_device_extent_record(struct device_extent_tree *tree,
5120                                 struct device_extent_record *de_rec)
5121 {
5122         int ret;
5123
5124         /*
5125          * Device extent is a bit different from the other extents, because
5126          * the extents which belong to the different devices may have the
5127          * same start and size, so we need use the special extent cache
5128          * search/insert functions.
5129          */
5130         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5131         if (ret)
5132                 return ret;
5133
5134         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5135         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5136         return 0;
5137 }
5138
5139 static void free_device_extent_record(struct cache_extent *cache)
5140 {
5141         struct device_extent_record *rec;
5142
5143         rec = container_of(cache, struct device_extent_record, cache);
5144         if (!list_empty(&rec->chunk_list))
5145                 list_del_init(&rec->chunk_list);
5146         if (!list_empty(&rec->device_list))
5147                 list_del_init(&rec->device_list);
5148         free(rec);
5149 }
5150
5151 void free_device_extent_tree(struct device_extent_tree *tree)
5152 {
5153         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5154 }
5155
5156 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5157 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5158                                  struct extent_buffer *leaf, int slot)
5159 {
5160         struct btrfs_extent_ref_v0 *ref0;
5161         struct btrfs_key key;
5162         int ret;
5163
5164         btrfs_item_key_to_cpu(leaf, &key, slot);
5165         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5166         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5167                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5168                                 0, 0);
5169         } else {
5170                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5171                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5172         }
5173         return ret;
5174 }
5175 #endif
5176
5177 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5178                                             struct btrfs_key *key,
5179                                             int slot)
5180 {
5181         struct btrfs_chunk *ptr;
5182         struct chunk_record *rec;
5183         int num_stripes, i;
5184
5185         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5186         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5187
5188         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5189         if (!rec) {
5190                 fprintf(stderr, "memory allocation failed\n");
5191                 exit(-1);
5192         }
5193
5194         INIT_LIST_HEAD(&rec->list);
5195         INIT_LIST_HEAD(&rec->dextents);
5196         rec->bg_rec = NULL;
5197
5198         rec->cache.start = key->offset;
5199         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5200
5201         rec->generation = btrfs_header_generation(leaf);
5202
5203         rec->objectid = key->objectid;
5204         rec->type = key->type;
5205         rec->offset = key->offset;
5206
5207         rec->length = rec->cache.size;
5208         rec->owner = btrfs_chunk_owner(leaf, ptr);
5209         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5210         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5211         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5212         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5213         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5214         rec->num_stripes = num_stripes;
5215         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5216
5217         for (i = 0; i < rec->num_stripes; ++i) {
5218                 rec->stripes[i].devid =
5219                         btrfs_stripe_devid_nr(leaf, ptr, i);
5220                 rec->stripes[i].offset =
5221                         btrfs_stripe_offset_nr(leaf, ptr, i);
5222                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5223                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5224                                 BTRFS_UUID_SIZE);
5225         }
5226
5227         return rec;
5228 }
5229
5230 static int process_chunk_item(struct cache_tree *chunk_cache,
5231                               struct btrfs_key *key, struct extent_buffer *eb,
5232                               int slot)
5233 {
5234         struct chunk_record *rec;
5235         struct btrfs_chunk *chunk;
5236         int ret = 0;
5237
5238         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5239         /*
5240          * Do extra check for this chunk item,
5241          *
5242          * It's still possible one can craft a leaf with CHUNK_ITEM, with
5243          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5244          * and owner<->key_type check.
5245          */
5246         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5247                                       key->offset);
5248         if (ret < 0) {
5249                 error("chunk(%llu, %llu) is not valid, ignore it",
5250                       key->offset, btrfs_chunk_length(eb, chunk));
5251                 return 0;
5252         }
5253         rec = btrfs_new_chunk_record(eb, key, slot);
5254         ret = insert_cache_extent(chunk_cache, &rec->cache);
5255         if (ret) {
5256                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5257                         rec->offset, rec->length);
5258                 free(rec);
5259         }
5260
5261         return ret;
5262 }
5263
5264 static int process_device_item(struct rb_root *dev_cache,
5265                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5266 {
5267         struct btrfs_dev_item *ptr;
5268         struct device_record *rec;
5269         int ret = 0;
5270
5271         ptr = btrfs_item_ptr(eb,
5272                 slot, struct btrfs_dev_item);
5273
5274         rec = malloc(sizeof(*rec));
5275         if (!rec) {
5276                 fprintf(stderr, "memory allocation failed\n");
5277                 return -ENOMEM;
5278         }
5279
5280         rec->devid = key->offset;
5281         rec->generation = btrfs_header_generation(eb);
5282
5283         rec->objectid = key->objectid;
5284         rec->type = key->type;
5285         rec->offset = key->offset;
5286
5287         rec->devid = btrfs_device_id(eb, ptr);
5288         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5289         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5290
5291         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5292         if (ret) {
5293                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5294                 free(rec);
5295         }
5296
5297         return ret;
5298 }
5299
5300 struct block_group_record *
5301 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5302                              int slot)
5303 {
5304         struct btrfs_block_group_item *ptr;
5305         struct block_group_record *rec;
5306
5307         rec = calloc(1, sizeof(*rec));
5308         if (!rec) {
5309                 fprintf(stderr, "memory allocation failed\n");
5310                 exit(-1);
5311         }
5312
5313         rec->cache.start = key->objectid;
5314         rec->cache.size = key->offset;
5315
5316         rec->generation = btrfs_header_generation(leaf);
5317
5318         rec->objectid = key->objectid;
5319         rec->type = key->type;
5320         rec->offset = key->offset;
5321
5322         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5323         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5324
5325         INIT_LIST_HEAD(&rec->list);
5326
5327         return rec;
5328 }
5329
5330 static int process_block_group_item(struct block_group_tree *block_group_cache,
5331                                     struct btrfs_key *key,
5332                                     struct extent_buffer *eb, int slot)
5333 {
5334         struct block_group_record *rec;
5335         int ret = 0;
5336
5337         rec = btrfs_new_block_group_record(eb, key, slot);
5338         ret = insert_block_group_record(block_group_cache, rec);
5339         if (ret) {
5340                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5341                         rec->objectid, rec->offset);
5342                 free(rec);
5343         }
5344
5345         return ret;
5346 }
5347
5348 struct device_extent_record *
5349 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5350                                struct btrfs_key *key, int slot)
5351 {
5352         struct device_extent_record *rec;
5353         struct btrfs_dev_extent *ptr;
5354
5355         rec = calloc(1, sizeof(*rec));
5356         if (!rec) {
5357                 fprintf(stderr, "memory allocation failed\n");
5358                 exit(-1);
5359         }
5360
5361         rec->cache.objectid = key->objectid;
5362         rec->cache.start = key->offset;
5363
5364         rec->generation = btrfs_header_generation(leaf);
5365
5366         rec->objectid = key->objectid;
5367         rec->type = key->type;
5368         rec->offset = key->offset;
5369
5370         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5371         rec->chunk_objecteid =
5372                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5373         rec->chunk_offset =
5374                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5375         rec->length = btrfs_dev_extent_length(leaf, ptr);
5376         rec->cache.size = rec->length;
5377
5378         INIT_LIST_HEAD(&rec->chunk_list);
5379         INIT_LIST_HEAD(&rec->device_list);
5380
5381         return rec;
5382 }
5383
5384 static int
5385 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5386                            struct btrfs_key *key, struct extent_buffer *eb,
5387                            int slot)
5388 {
5389         struct device_extent_record *rec;
5390         int ret;
5391
5392         rec = btrfs_new_device_extent_record(eb, key, slot);
5393         ret = insert_device_extent_record(dev_extent_cache, rec);
5394         if (ret) {
5395                 fprintf(stderr,
5396                         "Device extent[%llu, %llu, %llu] existed.\n",
5397                         rec->objectid, rec->offset, rec->length);
5398                 free(rec);
5399         }
5400
5401         return ret;
5402 }
5403
5404 static int process_extent_item(struct btrfs_root *root,
5405                                struct cache_tree *extent_cache,
5406                                struct extent_buffer *eb, int slot)
5407 {
5408         struct btrfs_extent_item *ei;
5409         struct btrfs_extent_inline_ref *iref;
5410         struct btrfs_extent_data_ref *dref;
5411         struct btrfs_shared_data_ref *sref;
5412         struct btrfs_key key;
5413         struct extent_record tmpl;
5414         unsigned long end;
5415         unsigned long ptr;
5416         int ret;
5417         int type;
5418         u32 item_size = btrfs_item_size_nr(eb, slot);
5419         u64 refs = 0;
5420         u64 offset;
5421         u64 num_bytes;
5422         int metadata = 0;
5423
5424         btrfs_item_key_to_cpu(eb, &key, slot);
5425
5426         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5427                 metadata = 1;
5428                 num_bytes = root->nodesize;
5429         } else {
5430                 num_bytes = key.offset;
5431         }
5432
5433         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5434                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5435                       key.objectid, root->sectorsize);
5436                 return -EIO;
5437         }
5438         if (item_size < sizeof(*ei)) {
5439 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5440                 struct btrfs_extent_item_v0 *ei0;
5441                 BUG_ON(item_size != sizeof(*ei0));
5442                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5443                 refs = btrfs_extent_refs_v0(eb, ei0);
5444 #else
5445                 BUG();
5446 #endif
5447                 memset(&tmpl, 0, sizeof(tmpl));
5448                 tmpl.start = key.objectid;
5449                 tmpl.nr = num_bytes;
5450                 tmpl.extent_item_refs = refs;
5451                 tmpl.metadata = metadata;
5452                 tmpl.found_rec = 1;
5453                 tmpl.max_size = num_bytes;
5454
5455                 return add_extent_rec(extent_cache, &tmpl);
5456         }
5457
5458         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5459         refs = btrfs_extent_refs(eb, ei);
5460         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5461                 metadata = 1;
5462         else
5463                 metadata = 0;
5464         if (metadata && num_bytes != root->nodesize) {
5465                 error("ignore invalid metadata extent, length %llu does not equal to %u",
5466                       num_bytes, root->nodesize);
5467                 return -EIO;
5468         }
5469         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5470                 error("ignore invalid data extent, length %llu is not aligned to %u",
5471                       num_bytes, root->sectorsize);
5472                 return -EIO;
5473         }
5474
5475         memset(&tmpl, 0, sizeof(tmpl));
5476         tmpl.start = key.objectid;
5477         tmpl.nr = num_bytes;
5478         tmpl.extent_item_refs = refs;
5479         tmpl.metadata = metadata;
5480         tmpl.found_rec = 1;
5481         tmpl.max_size = num_bytes;
5482         add_extent_rec(extent_cache, &tmpl);
5483
5484         ptr = (unsigned long)(ei + 1);
5485         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5486             key.type == BTRFS_EXTENT_ITEM_KEY)
5487                 ptr += sizeof(struct btrfs_tree_block_info);
5488
5489         end = (unsigned long)ei + item_size;
5490         while (ptr < end) {
5491                 iref = (struct btrfs_extent_inline_ref *)ptr;
5492                 type = btrfs_extent_inline_ref_type(eb, iref);
5493                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5494                 switch (type) {
5495                 case BTRFS_TREE_BLOCK_REF_KEY:
5496                         ret = add_tree_backref(extent_cache, key.objectid,
5497                                         0, offset, 0);
5498                         if (ret < 0)
5499                                 error("add_tree_backref failed: %s",
5500                                       strerror(-ret));
5501                         break;
5502                 case BTRFS_SHARED_BLOCK_REF_KEY:
5503                         ret = add_tree_backref(extent_cache, key.objectid,
5504                                         offset, 0, 0);
5505                         if (ret < 0)
5506                                 error("add_tree_backref failed: %s",
5507                                       strerror(-ret));
5508                         break;
5509                 case BTRFS_EXTENT_DATA_REF_KEY:
5510                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5511                         add_data_backref(extent_cache, key.objectid, 0,
5512                                         btrfs_extent_data_ref_root(eb, dref),
5513                                         btrfs_extent_data_ref_objectid(eb,
5514                                                                        dref),
5515                                         btrfs_extent_data_ref_offset(eb, dref),
5516                                         btrfs_extent_data_ref_count(eb, dref),
5517                                         0, num_bytes);
5518                         break;
5519                 case BTRFS_SHARED_DATA_REF_KEY:
5520                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5521                         add_data_backref(extent_cache, key.objectid, offset,
5522                                         0, 0, 0,
5523                                         btrfs_shared_data_ref_count(eb, sref),
5524                                         0, num_bytes);
5525                         break;
5526                 default:
5527                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5528                                 key.objectid, key.type, num_bytes);
5529                         goto out;
5530                 }
5531                 ptr += btrfs_extent_inline_ref_size(type);
5532         }
5533         WARN_ON(ptr > end);
5534 out:
5535         return 0;
5536 }
5537
5538 static int check_cache_range(struct btrfs_root *root,
5539                              struct btrfs_block_group_cache *cache,
5540                              u64 offset, u64 bytes)
5541 {
5542         struct btrfs_free_space *entry;
5543         u64 *logical;
5544         u64 bytenr;
5545         int stripe_len;
5546         int i, nr, ret;
5547
5548         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5549                 bytenr = btrfs_sb_offset(i);
5550                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5551                                        cache->key.objectid, bytenr, 0,
5552                                        &logical, &nr, &stripe_len);
5553                 if (ret)
5554                         return ret;
5555
5556                 while (nr--) {
5557                         if (logical[nr] + stripe_len <= offset)
5558                                 continue;
5559                         if (offset + bytes <= logical[nr])
5560                                 continue;
5561                         if (logical[nr] == offset) {
5562                                 if (stripe_len >= bytes) {
5563                                         kfree(logical);
5564                                         return 0;
5565                                 }
5566                                 bytes -= stripe_len;
5567                                 offset += stripe_len;
5568                         } else if (logical[nr] < offset) {
5569                                 if (logical[nr] + stripe_len >=
5570                                     offset + bytes) {
5571                                         kfree(logical);
5572                                         return 0;
5573                                 }
5574                                 bytes = (offset + bytes) -
5575                                         (logical[nr] + stripe_len);
5576                                 offset = logical[nr] + stripe_len;
5577                         } else {
5578                                 /*
5579                                  * Could be tricky, the super may land in the
5580                                  * middle of the area we're checking.  First
5581                                  * check the easiest case, it's at the end.
5582                                  */
5583                                 if (logical[nr] + stripe_len >=
5584                                     bytes + offset) {
5585                                         bytes = logical[nr] - offset;
5586                                         continue;
5587                                 }
5588
5589                                 /* Check the left side */
5590                                 ret = check_cache_range(root, cache,
5591                                                         offset,
5592                                                         logical[nr] - offset);
5593                                 if (ret) {
5594                                         kfree(logical);
5595                                         return ret;
5596                                 }
5597
5598                                 /* Now we continue with the right side */
5599                                 bytes = (offset + bytes) -
5600                                         (logical[nr] + stripe_len);
5601                                 offset = logical[nr] + stripe_len;
5602                         }
5603                 }
5604
5605                 kfree(logical);
5606         }
5607
5608         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5609         if (!entry) {
5610                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5611                         offset, offset+bytes);
5612                 return -EINVAL;
5613         }
5614
5615         if (entry->offset != offset) {
5616                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5617                         entry->offset);
5618                 return -EINVAL;
5619         }
5620
5621         if (entry->bytes != bytes) {
5622                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5623                         bytes, entry->bytes, offset);
5624                 return -EINVAL;
5625         }
5626
5627         unlink_free_space(cache->free_space_ctl, entry);
5628         free(entry);
5629         return 0;
5630 }
5631
5632 static int verify_space_cache(struct btrfs_root *root,
5633                               struct btrfs_block_group_cache *cache)
5634 {
5635         struct btrfs_path *path;
5636         struct extent_buffer *leaf;
5637         struct btrfs_key key;
5638         u64 last;
5639         int ret = 0;
5640
5641         path = btrfs_alloc_path();
5642         if (!path)
5643                 return -ENOMEM;
5644
5645         root = root->fs_info->extent_root;
5646
5647         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5648
5649         key.objectid = last;
5650         key.offset = 0;
5651         key.type = BTRFS_EXTENT_ITEM_KEY;
5652
5653         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5654         if (ret < 0)
5655                 goto out;
5656         ret = 0;
5657         while (1) {
5658                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5659                         ret = btrfs_next_leaf(root, path);
5660                         if (ret < 0)
5661                                 goto out;
5662                         if (ret > 0) {
5663                                 ret = 0;
5664                                 break;
5665                         }
5666                 }
5667                 leaf = path->nodes[0];
5668                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5669                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5670                         break;
5671                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5672                     key.type != BTRFS_METADATA_ITEM_KEY) {
5673                         path->slots[0]++;
5674                         continue;
5675                 }
5676
5677                 if (last == key.objectid) {
5678                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5679                                 last = key.objectid + key.offset;
5680                         else
5681                                 last = key.objectid + root->nodesize;
5682                         path->slots[0]++;
5683                         continue;
5684                 }
5685
5686                 ret = check_cache_range(root, cache, last,
5687                                         key.objectid - last);
5688                 if (ret)
5689                         break;
5690                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5691                         last = key.objectid + key.offset;
5692                 else
5693                         last = key.objectid + root->nodesize;
5694                 path->slots[0]++;
5695         }
5696
5697         if (last < cache->key.objectid + cache->key.offset)
5698                 ret = check_cache_range(root, cache, last,
5699                                         cache->key.objectid +
5700                                         cache->key.offset - last);
5701
5702 out:
5703         btrfs_free_path(path);
5704
5705         if (!ret &&
5706             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5707                 fprintf(stderr, "There are still entries left in the space "
5708                         "cache\n");
5709                 ret = -EINVAL;
5710         }
5711
5712         return ret;
5713 }
5714
5715 static int check_space_cache(struct btrfs_root *root)
5716 {
5717         struct btrfs_block_group_cache *cache;
5718         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5719         int ret;
5720         int error = 0;
5721
5722         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5723             btrfs_super_generation(root->fs_info->super_copy) !=
5724             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5725                 printf("cache and super generation don't match, space cache "
5726                        "will be invalidated\n");
5727                 return 0;
5728         }
5729
5730         if (ctx.progress_enabled) {
5731                 ctx.tp = TASK_FREE_SPACE;
5732                 task_start(ctx.info);
5733         }
5734
5735         while (1) {
5736                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5737                 if (!cache)
5738                         break;
5739
5740                 start = cache->key.objectid + cache->key.offset;
5741                 if (!cache->free_space_ctl) {
5742                         if (btrfs_init_free_space_ctl(cache,
5743                                                       root->sectorsize)) {
5744                                 ret = -ENOMEM;
5745                                 break;
5746                         }
5747                 } else {
5748                         btrfs_remove_free_space_cache(cache);
5749                 }
5750
5751                 if (btrfs_fs_compat_ro(root->fs_info,
5752                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5753                         ret = exclude_super_stripes(root, cache);
5754                         if (ret) {
5755                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5756                                         strerror(-ret));
5757                                 error++;
5758                                 continue;
5759                         }
5760                         ret = load_free_space_tree(root->fs_info, cache);
5761                         free_excluded_extents(root, cache);
5762                         if (ret < 0) {
5763                                 fprintf(stderr, "could not load free space tree: %s\n",
5764                                         strerror(-ret));
5765                                 error++;
5766                                 continue;
5767                         }
5768                         error += ret;
5769                 } else {
5770                         ret = load_free_space_cache(root->fs_info, cache);
5771                         if (!ret)
5772                                 continue;
5773                 }
5774
5775                 ret = verify_space_cache(root, cache);
5776                 if (ret) {
5777                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5778                                 cache->key.objectid);
5779                         error++;
5780                 }
5781         }
5782
5783         task_stop(ctx.info);
5784
5785         return error ? -EINVAL : 0;
5786 }
5787
5788 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5789                         u64 num_bytes, unsigned long leaf_offset,
5790                         struct extent_buffer *eb) {
5791
5792         u64 offset = 0;
5793         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5794         char *data;
5795         unsigned long csum_offset;
5796         u32 csum;
5797         u32 csum_expected;
5798         u64 read_len;
5799         u64 data_checked = 0;
5800         u64 tmp;
5801         int ret = 0;
5802         int mirror;
5803         int num_copies;
5804
5805         if (num_bytes % root->sectorsize)
5806                 return -EINVAL;
5807
5808         data = malloc(num_bytes);
5809         if (!data)
5810                 return -ENOMEM;
5811
5812         while (offset < num_bytes) {
5813                 mirror = 0;
5814 again:
5815                 read_len = num_bytes - offset;
5816                 /* read as much space once a time */
5817                 ret = read_extent_data(root, data + offset,
5818                                 bytenr + offset, &read_len, mirror);
5819                 if (ret)
5820                         goto out;
5821                 data_checked = 0;
5822                 /* verify every 4k data's checksum */
5823                 while (data_checked < read_len) {
5824                         csum = ~(u32)0;
5825                         tmp = offset + data_checked;
5826
5827                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5828                                                csum, root->sectorsize);
5829                         btrfs_csum_final(csum, (char *)&csum);
5830
5831                         csum_offset = leaf_offset +
5832                                  tmp / root->sectorsize * csum_size;
5833                         read_extent_buffer(eb, (char *)&csum_expected,
5834                                            csum_offset, csum_size);
5835                         /* try another mirror */
5836                         if (csum != csum_expected) {
5837                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5838                                                 mirror, bytenr + tmp,
5839                                                 csum, csum_expected);
5840                                 num_copies = btrfs_num_copies(
5841                                                 &root->fs_info->mapping_tree,
5842                                                 bytenr, num_bytes);
5843                                 if (mirror < num_copies - 1) {
5844                                         mirror += 1;
5845                                         goto again;
5846                                 }
5847                         }
5848                         data_checked += root->sectorsize;
5849                 }
5850                 offset += read_len;
5851         }
5852 out:
5853         free(data);
5854         return ret;
5855 }
5856
5857 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5858                                u64 num_bytes)
5859 {
5860         struct btrfs_path *path;
5861         struct extent_buffer *leaf;
5862         struct btrfs_key key;
5863         int ret;
5864
5865         path = btrfs_alloc_path();
5866         if (!path) {
5867                 fprintf(stderr, "Error allocating path\n");
5868                 return -ENOMEM;
5869         }
5870
5871         key.objectid = bytenr;
5872         key.type = BTRFS_EXTENT_ITEM_KEY;
5873         key.offset = (u64)-1;
5874
5875 again:
5876         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5877                                 0, 0);
5878         if (ret < 0) {
5879                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5880                 btrfs_free_path(path);
5881                 return ret;
5882         } else if (ret) {
5883                 if (path->slots[0] > 0) {
5884                         path->slots[0]--;
5885                 } else {
5886                         ret = btrfs_prev_leaf(root, path);
5887                         if (ret < 0) {
5888                                 goto out;
5889                         } else if (ret > 0) {
5890                                 ret = 0;
5891                                 goto out;
5892                         }
5893                 }
5894         }
5895
5896         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5897
5898         /*
5899          * Block group items come before extent items if they have the same
5900          * bytenr, so walk back one more just in case.  Dear future traveller,
5901          * first congrats on mastering time travel.  Now if it's not too much
5902          * trouble could you go back to 2006 and tell Chris to make the
5903          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5904          * EXTENT_ITEM_KEY please?
5905          */
5906         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5907                 if (path->slots[0] > 0) {
5908                         path->slots[0]--;
5909                 } else {
5910                         ret = btrfs_prev_leaf(root, path);
5911                         if (ret < 0) {
5912                                 goto out;
5913                         } else if (ret > 0) {
5914                                 ret = 0;
5915                                 goto out;
5916                         }
5917                 }
5918                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5919         }
5920
5921         while (num_bytes) {
5922                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5923                         ret = btrfs_next_leaf(root, path);
5924                         if (ret < 0) {
5925                                 fprintf(stderr, "Error going to next leaf "
5926                                         "%d\n", ret);
5927                                 btrfs_free_path(path);
5928                                 return ret;
5929                         } else if (ret) {
5930                                 break;
5931                         }
5932                 }
5933                 leaf = path->nodes[0];
5934                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5935                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5936                         path->slots[0]++;
5937                         continue;
5938                 }
5939                 if (key.objectid + key.offset < bytenr) {
5940                         path->slots[0]++;
5941                         continue;
5942                 }
5943                 if (key.objectid > bytenr + num_bytes)
5944                         break;
5945
5946                 if (key.objectid == bytenr) {
5947                         if (key.offset >= num_bytes) {
5948                                 num_bytes = 0;
5949                                 break;
5950                         }
5951                         num_bytes -= key.offset;
5952                         bytenr += key.offset;
5953                 } else if (key.objectid < bytenr) {
5954                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5955                                 num_bytes = 0;
5956                                 break;
5957                         }
5958                         num_bytes = (bytenr + num_bytes) -
5959                                 (key.objectid + key.offset);
5960                         bytenr = key.objectid + key.offset;
5961                 } else {
5962                         if (key.objectid + key.offset < bytenr + num_bytes) {
5963                                 u64 new_start = key.objectid + key.offset;
5964                                 u64 new_bytes = bytenr + num_bytes - new_start;
5965
5966                                 /*
5967                                  * Weird case, the extent is in the middle of
5968                                  * our range, we'll have to search one side
5969                                  * and then the other.  Not sure if this happens
5970                                  * in real life, but no harm in coding it up
5971                                  * anyway just in case.
5972                                  */
5973                                 btrfs_release_path(path);
5974                                 ret = check_extent_exists(root, new_start,
5975                                                           new_bytes);
5976                                 if (ret) {
5977                                         fprintf(stderr, "Right section didn't "
5978                                                 "have a record\n");
5979                                         break;
5980                                 }
5981                                 num_bytes = key.objectid - bytenr;
5982                                 goto again;
5983                         }
5984                         num_bytes = key.objectid - bytenr;
5985                 }
5986                 path->slots[0]++;
5987         }
5988         ret = 0;
5989
5990 out:
5991         if (num_bytes && !ret) {
5992                 fprintf(stderr, "There are no extents for csum range "
5993                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5994                 ret = 1;
5995         }
5996
5997         btrfs_free_path(path);
5998         return ret;
5999 }
6000
6001 static int check_csums(struct btrfs_root *root)
6002 {
6003         struct btrfs_path *path;
6004         struct extent_buffer *leaf;
6005         struct btrfs_key key;
6006         u64 offset = 0, num_bytes = 0;
6007         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6008         int errors = 0;
6009         int ret;
6010         u64 data_len;
6011         unsigned long leaf_offset;
6012
6013         root = root->fs_info->csum_root;
6014         if (!extent_buffer_uptodate(root->node)) {
6015                 fprintf(stderr, "No valid csum tree found\n");
6016                 return -ENOENT;
6017         }
6018
6019         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
6020         key.type = BTRFS_EXTENT_CSUM_KEY;
6021         key.offset = 0;
6022
6023         path = btrfs_alloc_path();
6024         if (!path)
6025                 return -ENOMEM;
6026
6027         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6028         if (ret < 0) {
6029                 fprintf(stderr, "Error searching csum tree %d\n", ret);
6030                 btrfs_free_path(path);
6031                 return ret;
6032         }
6033
6034         if (ret > 0 && path->slots[0])
6035                 path->slots[0]--;
6036         ret = 0;
6037
6038         while (1) {
6039                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6040                         ret = btrfs_next_leaf(root, path);
6041                         if (ret < 0) {
6042                                 fprintf(stderr, "Error going to next leaf "
6043                                         "%d\n", ret);
6044                                 break;
6045                         }
6046                         if (ret)
6047                                 break;
6048                 }
6049                 leaf = path->nodes[0];
6050
6051                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6052                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
6053                         path->slots[0]++;
6054                         continue;
6055                 }
6056
6057                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
6058                               csum_size) * root->sectorsize;
6059                 if (!check_data_csum)
6060                         goto skip_csum_check;
6061                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
6062                 ret = check_extent_csums(root, key.offset, data_len,
6063                                          leaf_offset, leaf);
6064                 if (ret)
6065                         break;
6066 skip_csum_check:
6067                 if (!num_bytes) {
6068                         offset = key.offset;
6069                 } else if (key.offset != offset + num_bytes) {
6070                         ret = check_extent_exists(root, offset, num_bytes);
6071                         if (ret) {
6072                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6073                                         "there is no extent record\n",
6074                                         offset, offset+num_bytes);
6075                                 errors++;
6076                         }
6077                         offset = key.offset;
6078                         num_bytes = 0;
6079                 }
6080                 num_bytes += data_len;
6081                 path->slots[0]++;
6082         }
6083
6084         btrfs_free_path(path);
6085         return errors;
6086 }
6087
6088 static int is_dropped_key(struct btrfs_key *key,
6089                           struct btrfs_key *drop_key) {
6090         if (key->objectid < drop_key->objectid)
6091                 return 1;
6092         else if (key->objectid == drop_key->objectid) {
6093                 if (key->type < drop_key->type)
6094                         return 1;
6095                 else if (key->type == drop_key->type) {
6096                         if (key->offset < drop_key->offset)
6097                                 return 1;
6098                 }
6099         }
6100         return 0;
6101 }
6102
6103 /*
6104  * Here are the rules for FULL_BACKREF.
6105  *
6106  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6107  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6108  *      FULL_BACKREF set.
6109  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6110  *    if it happened after the relocation occurred since we'll have dropped the
6111  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6112  *    have no real way to know for sure.
6113  *
6114  * We process the blocks one root at a time, and we start from the lowest root
6115  * objectid and go to the highest.  So we can just lookup the owner backref for
6116  * the record and if we don't find it then we know it doesn't exist and we have
6117  * a FULL BACKREF.
6118  *
6119  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6120  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6121  * be set or not and then we can check later once we've gathered all the refs.
6122  */
6123 static int calc_extent_flag(struct btrfs_root *root,
6124                            struct cache_tree *extent_cache,
6125                            struct extent_buffer *buf,
6126                            struct root_item_record *ri,
6127                            u64 *flags)
6128 {
6129         struct extent_record *rec;
6130         struct cache_extent *cache;
6131         struct tree_backref *tback;
6132         u64 owner = 0;
6133
6134         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6135         /* we have added this extent before */
6136         BUG_ON(!cache);
6137         rec = container_of(cache, struct extent_record, cache);
6138
6139         /*
6140          * Except file/reloc tree, we can not have
6141          * FULL BACKREF MODE
6142          */
6143         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6144                 goto normal;
6145         /*
6146          * root node
6147          */
6148         if (buf->start == ri->bytenr)
6149                 goto normal;
6150
6151         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6152                 goto full_backref;
6153
6154         owner = btrfs_header_owner(buf);
6155         if (owner == ri->objectid)
6156                 goto normal;
6157
6158         tback = find_tree_backref(rec, 0, owner);
6159         if (!tback)
6160                 goto full_backref;
6161 normal:
6162         *flags = 0;
6163         if (rec->flag_block_full_backref != FLAG_UNSET &&
6164             rec->flag_block_full_backref != 0)
6165                 rec->bad_full_backref = 1;
6166         return 0;
6167 full_backref:
6168         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6169         if (rec->flag_block_full_backref != FLAG_UNSET &&
6170             rec->flag_block_full_backref != 1)
6171                 rec->bad_full_backref = 1;
6172         return 0;
6173 }
6174
6175 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6176 {
6177         fprintf(stderr, "Invalid key type(");
6178         print_key_type(stderr, 0, key_type);
6179         fprintf(stderr, ") found in root(");
6180         print_objectid(stderr, rootid, 0);
6181         fprintf(stderr, ")\n");
6182 }
6183
6184 /*
6185  * Check if the key is valid with its extent buffer.
6186  *
6187  * This is a early check in case invalid key exists in a extent buffer
6188  * This is not comprehensive yet, but should prevent wrong key/item passed
6189  * further
6190  */
6191 static int check_type_with_root(u64 rootid, u8 key_type)
6192 {
6193         switch (key_type) {
6194         /* Only valid in chunk tree */
6195         case BTRFS_DEV_ITEM_KEY:
6196         case BTRFS_CHUNK_ITEM_KEY:
6197                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6198                         goto err;
6199                 break;
6200         /* valid in csum and log tree */
6201         case BTRFS_CSUM_TREE_OBJECTID:
6202                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6203                       is_fstree(rootid)))
6204                         goto err;
6205                 break;
6206         case BTRFS_EXTENT_ITEM_KEY:
6207         case BTRFS_METADATA_ITEM_KEY:
6208         case BTRFS_BLOCK_GROUP_ITEM_KEY:
6209                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6210                         goto err;
6211                 break;
6212         case BTRFS_ROOT_ITEM_KEY:
6213                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6214                         goto err;
6215                 break;
6216         case BTRFS_DEV_EXTENT_KEY:
6217                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6218                         goto err;
6219                 break;
6220         }
6221         return 0;
6222 err:
6223         report_mismatch_key_root(key_type, rootid);
6224         return -EINVAL;
6225 }
6226
6227 static int run_next_block(struct btrfs_root *root,
6228                           struct block_info *bits,
6229                           int bits_nr,
6230                           u64 *last,
6231                           struct cache_tree *pending,
6232                           struct cache_tree *seen,
6233                           struct cache_tree *reada,
6234                           struct cache_tree *nodes,
6235                           struct cache_tree *extent_cache,
6236                           struct cache_tree *chunk_cache,
6237                           struct rb_root *dev_cache,
6238                           struct block_group_tree *block_group_cache,
6239                           struct device_extent_tree *dev_extent_cache,
6240                           struct root_item_record *ri)
6241 {
6242         struct extent_buffer *buf;
6243         struct extent_record *rec = NULL;
6244         u64 bytenr;
6245         u32 size;
6246         u64 parent;
6247         u64 owner;
6248         u64 flags;
6249         u64 ptr;
6250         u64 gen = 0;
6251         int ret = 0;
6252         int i;
6253         int nritems;
6254         struct btrfs_key key;
6255         struct cache_extent *cache;
6256         int reada_bits;
6257
6258         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6259                                     bits_nr, &reada_bits);
6260         if (nritems == 0)
6261                 return 1;
6262
6263         if (!reada_bits) {
6264                 for(i = 0; i < nritems; i++) {
6265                         ret = add_cache_extent(reada, bits[i].start,
6266                                                bits[i].size);
6267                         if (ret == -EEXIST)
6268                                 continue;
6269
6270                         /* fixme, get the parent transid */
6271                         readahead_tree_block(root, bits[i].start,
6272                                              bits[i].size, 0);
6273                 }
6274         }
6275         *last = bits[0].start;
6276         bytenr = bits[0].start;
6277         size = bits[0].size;
6278
6279         cache = lookup_cache_extent(pending, bytenr, size);
6280         if (cache) {
6281                 remove_cache_extent(pending, cache);
6282                 free(cache);
6283         }
6284         cache = lookup_cache_extent(reada, bytenr, size);
6285         if (cache) {
6286                 remove_cache_extent(reada, cache);
6287                 free(cache);
6288         }
6289         cache = lookup_cache_extent(nodes, bytenr, size);
6290         if (cache) {
6291                 remove_cache_extent(nodes, cache);
6292                 free(cache);
6293         }
6294         cache = lookup_cache_extent(extent_cache, bytenr, size);
6295         if (cache) {
6296                 rec = container_of(cache, struct extent_record, cache);
6297                 gen = rec->parent_generation;
6298         }
6299
6300         /* fixme, get the real parent transid */
6301         buf = read_tree_block(root, bytenr, size, gen);
6302         if (!extent_buffer_uptodate(buf)) {
6303                 record_bad_block_io(root->fs_info,
6304                                     extent_cache, bytenr, size);
6305                 goto out;
6306         }
6307
6308         nritems = btrfs_header_nritems(buf);
6309
6310         flags = 0;
6311         if (!init_extent_tree) {
6312                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6313                                        btrfs_header_level(buf), 1, NULL,
6314                                        &flags);
6315                 if (ret < 0) {
6316                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6317                         if (ret < 0) {
6318                                 fprintf(stderr, "Couldn't calc extent flags\n");
6319                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6320                         }
6321                 }
6322         } else {
6323                 flags = 0;
6324                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6325                 if (ret < 0) {
6326                         fprintf(stderr, "Couldn't calc extent flags\n");
6327                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6328                 }
6329         }
6330
6331         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6332                 if (ri != NULL &&
6333                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6334                     ri->objectid == btrfs_header_owner(buf)) {
6335                         /*
6336                          * Ok we got to this block from it's original owner and
6337                          * we have FULL_BACKREF set.  Relocation can leave
6338                          * converted blocks over so this is altogether possible,
6339                          * however it's not possible if the generation > the
6340                          * last snapshot, so check for this case.
6341                          */
6342                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6343                             btrfs_header_generation(buf) > ri->last_snapshot) {
6344                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6345                                 rec->bad_full_backref = 1;
6346                         }
6347                 }
6348         } else {
6349                 if (ri != NULL &&
6350                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6351                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6352                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6353                         rec->bad_full_backref = 1;
6354                 }
6355         }
6356
6357         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6358                 rec->flag_block_full_backref = 1;
6359                 parent = bytenr;
6360                 owner = 0;
6361         } else {
6362                 rec->flag_block_full_backref = 0;
6363                 parent = 0;
6364                 owner = btrfs_header_owner(buf);
6365         }
6366
6367         ret = check_block(root, extent_cache, buf, flags);
6368         if (ret)
6369                 goto out;
6370
6371         if (btrfs_is_leaf(buf)) {
6372                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6373                 for (i = 0; i < nritems; i++) {
6374                         struct btrfs_file_extent_item *fi;
6375                         btrfs_item_key_to_cpu(buf, &key, i);
6376                         /*
6377                          * Check key type against the leaf owner.
6378                          * Could filter quite a lot of early error if
6379                          * owner is correct
6380                          */
6381                         if (check_type_with_root(btrfs_header_owner(buf),
6382                                                  key.type)) {
6383                                 fprintf(stderr, "ignoring invalid key\n");
6384                                 continue;
6385                         }
6386                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6387                                 process_extent_item(root, extent_cache, buf,
6388                                                     i);
6389                                 continue;
6390                         }
6391                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6392                                 process_extent_item(root, extent_cache, buf,
6393                                                     i);
6394                                 continue;
6395                         }
6396                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6397                                 total_csum_bytes +=
6398                                         btrfs_item_size_nr(buf, i);
6399                                 continue;
6400                         }
6401                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6402                                 process_chunk_item(chunk_cache, &key, buf, i);
6403                                 continue;
6404                         }
6405                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6406                                 process_device_item(dev_cache, &key, buf, i);
6407                                 continue;
6408                         }
6409                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6410                                 process_block_group_item(block_group_cache,
6411                                         &key, buf, i);
6412                                 continue;
6413                         }
6414                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6415                                 process_device_extent_item(dev_extent_cache,
6416                                         &key, buf, i);
6417                                 continue;
6418
6419                         }
6420                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6421 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6422                                 process_extent_ref_v0(extent_cache, buf, i);
6423 #else
6424                                 BUG();
6425 #endif
6426                                 continue;
6427                         }
6428
6429                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6430                                 ret = add_tree_backref(extent_cache,
6431                                                 key.objectid, 0, key.offset, 0);
6432                                 if (ret < 0)
6433                                         error("add_tree_backref failed: %s",
6434                                               strerror(-ret));
6435                                 continue;
6436                         }
6437                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6438                                 ret = add_tree_backref(extent_cache,
6439                                                 key.objectid, key.offset, 0, 0);
6440                                 if (ret < 0)
6441                                         error("add_tree_backref failed: %s",
6442                                               strerror(-ret));
6443                                 continue;
6444                         }
6445                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6446                                 struct btrfs_extent_data_ref *ref;
6447                                 ref = btrfs_item_ptr(buf, i,
6448                                                 struct btrfs_extent_data_ref);
6449                                 add_data_backref(extent_cache,
6450                                         key.objectid, 0,
6451                                         btrfs_extent_data_ref_root(buf, ref),
6452                                         btrfs_extent_data_ref_objectid(buf,
6453                                                                        ref),
6454                                         btrfs_extent_data_ref_offset(buf, ref),
6455                                         btrfs_extent_data_ref_count(buf, ref),
6456                                         0, root->sectorsize);
6457                                 continue;
6458                         }
6459                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6460                                 struct btrfs_shared_data_ref *ref;
6461                                 ref = btrfs_item_ptr(buf, i,
6462                                                 struct btrfs_shared_data_ref);
6463                                 add_data_backref(extent_cache,
6464                                         key.objectid, key.offset, 0, 0, 0,
6465                                         btrfs_shared_data_ref_count(buf, ref),
6466                                         0, root->sectorsize);
6467                                 continue;
6468                         }
6469                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6470                                 struct bad_item *bad;
6471
6472                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6473                                         continue;
6474                                 if (!owner)
6475                                         continue;
6476                                 bad = malloc(sizeof(struct bad_item));
6477                                 if (!bad)
6478                                         continue;
6479                                 INIT_LIST_HEAD(&bad->list);
6480                                 memcpy(&bad->key, &key,
6481                                        sizeof(struct btrfs_key));
6482                                 bad->root_id = owner;
6483                                 list_add_tail(&bad->list, &delete_items);
6484                                 continue;
6485                         }
6486                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6487                                 continue;
6488                         fi = btrfs_item_ptr(buf, i,
6489                                             struct btrfs_file_extent_item);
6490                         if (btrfs_file_extent_type(buf, fi) ==
6491                             BTRFS_FILE_EXTENT_INLINE)
6492                                 continue;
6493                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6494                                 continue;
6495
6496                         data_bytes_allocated +=
6497                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6498                         if (data_bytes_allocated < root->sectorsize) {
6499                                 abort();
6500                         }
6501                         data_bytes_referenced +=
6502                                 btrfs_file_extent_num_bytes(buf, fi);
6503                         add_data_backref(extent_cache,
6504                                 btrfs_file_extent_disk_bytenr(buf, fi),
6505                                 parent, owner, key.objectid, key.offset -
6506                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6507                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6508                 }
6509         } else {
6510                 int level;
6511                 struct btrfs_key first_key;
6512
6513                 first_key.objectid = 0;
6514
6515                 if (nritems > 0)
6516                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6517                 level = btrfs_header_level(buf);
6518                 for (i = 0; i < nritems; i++) {
6519                         struct extent_record tmpl;
6520
6521                         ptr = btrfs_node_blockptr(buf, i);
6522                         size = root->nodesize;
6523                         btrfs_node_key_to_cpu(buf, &key, i);
6524                         if (ri != NULL) {
6525                                 if ((level == ri->drop_level)
6526                                     && is_dropped_key(&key, &ri->drop_key)) {
6527                                         continue;
6528                                 }
6529                         }
6530
6531                         memset(&tmpl, 0, sizeof(tmpl));
6532                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6533                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6534                         tmpl.start = ptr;
6535                         tmpl.nr = size;
6536                         tmpl.refs = 1;
6537                         tmpl.metadata = 1;
6538                         tmpl.max_size = size;
6539                         ret = add_extent_rec(extent_cache, &tmpl);
6540                         if (ret < 0)
6541                                 goto out;
6542
6543                         ret = add_tree_backref(extent_cache, ptr, parent,
6544                                         owner, 1);
6545                         if (ret < 0) {
6546                                 error("add_tree_backref failed: %s",
6547                                       strerror(-ret));
6548                                 continue;
6549                         }
6550
6551                         if (level > 1) {
6552                                 add_pending(nodes, seen, ptr, size);
6553                         } else {
6554                                 add_pending(pending, seen, ptr, size);
6555                         }
6556                 }
6557                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6558                                       nritems) * sizeof(struct btrfs_key_ptr);
6559         }
6560         total_btree_bytes += buf->len;
6561         if (fs_root_objectid(btrfs_header_owner(buf)))
6562                 total_fs_tree_bytes += buf->len;
6563         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6564                 total_extent_tree_bytes += buf->len;
6565         if (!found_old_backref &&
6566             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6567             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6568             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6569                 found_old_backref = 1;
6570 out:
6571         free_extent_buffer(buf);
6572         return ret;
6573 }
6574
6575 static int add_root_to_pending(struct extent_buffer *buf,
6576                                struct cache_tree *extent_cache,
6577                                struct cache_tree *pending,
6578                                struct cache_tree *seen,
6579                                struct cache_tree *nodes,
6580                                u64 objectid)
6581 {
6582         struct extent_record tmpl;
6583         int ret;
6584
6585         if (btrfs_header_level(buf) > 0)
6586                 add_pending(nodes, seen, buf->start, buf->len);
6587         else
6588                 add_pending(pending, seen, buf->start, buf->len);
6589
6590         memset(&tmpl, 0, sizeof(tmpl));
6591         tmpl.start = buf->start;
6592         tmpl.nr = buf->len;
6593         tmpl.is_root = 1;
6594         tmpl.refs = 1;
6595         tmpl.metadata = 1;
6596         tmpl.max_size = buf->len;
6597         add_extent_rec(extent_cache, &tmpl);
6598
6599         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6600             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6601                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6602                                 0, 1);
6603         else
6604                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6605                                 1);
6606         return ret;
6607 }
6608
6609 /* as we fix the tree, we might be deleting blocks that
6610  * we're tracking for repair.  This hook makes sure we
6611  * remove any backrefs for blocks as we are fixing them.
6612  */
6613 static int free_extent_hook(struct btrfs_trans_handle *trans,
6614                             struct btrfs_root *root,
6615                             u64 bytenr, u64 num_bytes, u64 parent,
6616                             u64 root_objectid, u64 owner, u64 offset,
6617                             int refs_to_drop)
6618 {
6619         struct extent_record *rec;
6620         struct cache_extent *cache;
6621         int is_data;
6622         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6623
6624         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6625         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6626         if (!cache)
6627                 return 0;
6628
6629         rec = container_of(cache, struct extent_record, cache);
6630         if (is_data) {
6631                 struct data_backref *back;
6632                 back = find_data_backref(rec, parent, root_objectid, owner,
6633                                          offset, 1, bytenr, num_bytes);
6634                 if (!back)
6635                         goto out;
6636                 if (back->node.found_ref) {
6637                         back->found_ref -= refs_to_drop;
6638                         if (rec->refs)
6639                                 rec->refs -= refs_to_drop;
6640                 }
6641                 if (back->node.found_extent_tree) {
6642                         back->num_refs -= refs_to_drop;
6643                         if (rec->extent_item_refs)
6644                                 rec->extent_item_refs -= refs_to_drop;
6645                 }
6646                 if (back->found_ref == 0)
6647                         back->node.found_ref = 0;
6648                 if (back->num_refs == 0)
6649                         back->node.found_extent_tree = 0;
6650
6651                 if (!back->node.found_extent_tree && back->node.found_ref) {
6652                         rb_erase(&back->node.node, &rec->backref_tree);
6653                         free(back);
6654                 }
6655         } else {
6656                 struct tree_backref *back;
6657                 back = find_tree_backref(rec, parent, root_objectid);
6658                 if (!back)
6659                         goto out;
6660                 if (back->node.found_ref) {
6661                         if (rec->refs)
6662                                 rec->refs--;
6663                         back->node.found_ref = 0;
6664                 }
6665                 if (back->node.found_extent_tree) {
6666                         if (rec->extent_item_refs)
6667                                 rec->extent_item_refs--;
6668                         back->node.found_extent_tree = 0;
6669                 }
6670                 if (!back->node.found_extent_tree && back->node.found_ref) {
6671                         rb_erase(&back->node.node, &rec->backref_tree);
6672                         free(back);
6673                 }
6674         }
6675         maybe_free_extent_rec(extent_cache, rec);
6676 out:
6677         return 0;
6678 }
6679
6680 static int delete_extent_records(struct btrfs_trans_handle *trans,
6681                                  struct btrfs_root *root,
6682                                  struct btrfs_path *path,
6683                                  u64 bytenr, u64 new_len)
6684 {
6685         struct btrfs_key key;
6686         struct btrfs_key found_key;
6687         struct extent_buffer *leaf;
6688         int ret;
6689         int slot;
6690
6691
6692         key.objectid = bytenr;
6693         key.type = (u8)-1;
6694         key.offset = (u64)-1;
6695
6696         while(1) {
6697                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6698                                         &key, path, 0, 1);
6699                 if (ret < 0)
6700                         break;
6701
6702                 if (ret > 0) {
6703                         ret = 0;
6704                         if (path->slots[0] == 0)
6705                                 break;
6706                         path->slots[0]--;
6707                 }
6708                 ret = 0;
6709
6710                 leaf = path->nodes[0];
6711                 slot = path->slots[0];
6712
6713                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6714                 if (found_key.objectid != bytenr)
6715                         break;
6716
6717                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6718                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6719                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6720                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6721                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6722                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6723                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6724                         btrfs_release_path(path);
6725                         if (found_key.type == 0) {
6726                                 if (found_key.offset == 0)
6727                                         break;
6728                                 key.offset = found_key.offset - 1;
6729                                 key.type = found_key.type;
6730                         }
6731                         key.type = found_key.type - 1;
6732                         key.offset = (u64)-1;
6733                         continue;
6734                 }
6735
6736                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6737                         found_key.objectid, found_key.type, found_key.offset);
6738
6739                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6740                 if (ret)
6741                         break;
6742                 btrfs_release_path(path);
6743
6744                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6745                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6746                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6747                                 found_key.offset : root->nodesize;
6748
6749                         ret = btrfs_update_block_group(trans, root, bytenr,
6750                                                        bytes, 0, 0);
6751                         if (ret)
6752                                 break;
6753                 }
6754         }
6755
6756         btrfs_release_path(path);
6757         return ret;
6758 }
6759
6760 /*
6761  * for a single backref, this will allocate a new extent
6762  * and add the backref to it.
6763  */
6764 static int record_extent(struct btrfs_trans_handle *trans,
6765                          struct btrfs_fs_info *info,
6766                          struct btrfs_path *path,
6767                          struct extent_record *rec,
6768                          struct extent_backref *back,
6769                          int allocated, u64 flags)
6770 {
6771         int ret;
6772         struct btrfs_root *extent_root = info->extent_root;
6773         struct extent_buffer *leaf;
6774         struct btrfs_key ins_key;
6775         struct btrfs_extent_item *ei;
6776         struct tree_backref *tback;
6777         struct data_backref *dback;
6778         struct btrfs_tree_block_info *bi;
6779
6780         if (!back->is_data)
6781                 rec->max_size = max_t(u64, rec->max_size,
6782                                     info->extent_root->nodesize);
6783
6784         if (!allocated) {
6785                 u32 item_size = sizeof(*ei);
6786
6787                 if (!back->is_data)
6788                         item_size += sizeof(*bi);
6789
6790                 ins_key.objectid = rec->start;
6791                 ins_key.offset = rec->max_size;
6792                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6793
6794                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6795                                         &ins_key, item_size);
6796                 if (ret)
6797                         goto fail;
6798
6799                 leaf = path->nodes[0];
6800                 ei = btrfs_item_ptr(leaf, path->slots[0],
6801                                     struct btrfs_extent_item);
6802
6803                 btrfs_set_extent_refs(leaf, ei, 0);
6804                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6805
6806                 if (back->is_data) {
6807                         btrfs_set_extent_flags(leaf, ei,
6808                                                BTRFS_EXTENT_FLAG_DATA);
6809                 } else {
6810                         struct btrfs_disk_key copy_key;;
6811
6812                         tback = to_tree_backref(back);
6813                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6814                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6815                                              sizeof(*bi));
6816
6817                         btrfs_set_disk_key_objectid(&copy_key,
6818                                                     rec->info_objectid);
6819                         btrfs_set_disk_key_type(&copy_key, 0);
6820                         btrfs_set_disk_key_offset(&copy_key, 0);
6821
6822                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6823                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6824
6825                         btrfs_set_extent_flags(leaf, ei,
6826                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6827                 }
6828
6829                 btrfs_mark_buffer_dirty(leaf);
6830                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6831                                                rec->max_size, 1, 0);
6832                 if (ret)
6833                         goto fail;
6834                 btrfs_release_path(path);
6835         }
6836
6837         if (back->is_data) {
6838                 u64 parent;
6839                 int i;
6840
6841                 dback = to_data_backref(back);
6842                 if (back->full_backref)
6843                         parent = dback->parent;
6844                 else
6845                         parent = 0;
6846
6847                 for (i = 0; i < dback->found_ref; i++) {
6848                         /* if parent != 0, we're doing a full backref
6849                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6850                          * just makes the backref allocator create a data
6851                          * backref
6852                          */
6853                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6854                                                    rec->start, rec->max_size,
6855                                                    parent,
6856                                                    dback->root,
6857                                                    parent ?
6858                                                    BTRFS_FIRST_FREE_OBJECTID :
6859                                                    dback->owner,
6860                                                    dback->offset);
6861                         if (ret)
6862                                 break;
6863                 }
6864                 fprintf(stderr, "adding new data backref"
6865                                 " on %llu %s %llu owner %llu"
6866                                 " offset %llu found %d\n",
6867                                 (unsigned long long)rec->start,
6868                                 back->full_backref ?
6869                                 "parent" : "root",
6870                                 back->full_backref ?
6871                                 (unsigned long long)parent :
6872                                 (unsigned long long)dback->root,
6873                                 (unsigned long long)dback->owner,
6874                                 (unsigned long long)dback->offset,
6875                                 dback->found_ref);
6876         } else {
6877                 u64 parent;
6878
6879                 tback = to_tree_backref(back);
6880                 if (back->full_backref)
6881                         parent = tback->parent;
6882                 else
6883                         parent = 0;
6884
6885                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6886                                            rec->start, rec->max_size,
6887                                            parent, tback->root, 0, 0);
6888                 fprintf(stderr, "adding new tree backref on "
6889                         "start %llu len %llu parent %llu root %llu\n",
6890                         rec->start, rec->max_size, parent, tback->root);
6891         }
6892 fail:
6893         btrfs_release_path(path);
6894         return ret;
6895 }
6896
6897 static struct extent_entry *find_entry(struct list_head *entries,
6898                                        u64 bytenr, u64 bytes)
6899 {
6900         struct extent_entry *entry = NULL;
6901
6902         list_for_each_entry(entry, entries, list) {
6903                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6904                         return entry;
6905         }
6906
6907         return NULL;
6908 }
6909
6910 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6911 {
6912         struct extent_entry *entry, *best = NULL, *prev = NULL;
6913
6914         list_for_each_entry(entry, entries, list) {
6915                 if (!prev) {
6916                         prev = entry;
6917                         continue;
6918                 }
6919
6920                 /*
6921                  * If there are as many broken entries as entries then we know
6922                  * not to trust this particular entry.
6923                  */
6924                 if (entry->broken == entry->count)
6925                         continue;
6926
6927                 /*
6928                  * If our current entry == best then we can't be sure our best
6929                  * is really the best, so we need to keep searching.
6930                  */
6931                 if (best && best->count == entry->count) {
6932                         prev = entry;
6933                         best = NULL;
6934                         continue;
6935                 }
6936
6937                 /* Prev == entry, not good enough, have to keep searching */
6938                 if (!prev->broken && prev->count == entry->count)
6939                         continue;
6940
6941                 if (!best)
6942                         best = (prev->count > entry->count) ? prev : entry;
6943                 else if (best->count < entry->count)
6944                         best = entry;
6945                 prev = entry;
6946         }
6947
6948         return best;
6949 }
6950
6951 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6952                       struct data_backref *dback, struct extent_entry *entry)
6953 {
6954         struct btrfs_trans_handle *trans;
6955         struct btrfs_root *root;
6956         struct btrfs_file_extent_item *fi;
6957         struct extent_buffer *leaf;
6958         struct btrfs_key key;
6959         u64 bytenr, bytes;
6960         int ret, err;
6961
6962         key.objectid = dback->root;
6963         key.type = BTRFS_ROOT_ITEM_KEY;
6964         key.offset = (u64)-1;
6965         root = btrfs_read_fs_root(info, &key);
6966         if (IS_ERR(root)) {
6967                 fprintf(stderr, "Couldn't find root for our ref\n");
6968                 return -EINVAL;
6969         }
6970
6971         /*
6972          * The backref points to the original offset of the extent if it was
6973          * split, so we need to search down to the offset we have and then walk
6974          * forward until we find the backref we're looking for.
6975          */
6976         key.objectid = dback->owner;
6977         key.type = BTRFS_EXTENT_DATA_KEY;
6978         key.offset = dback->offset;
6979         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6980         if (ret < 0) {
6981                 fprintf(stderr, "Error looking up ref %d\n", ret);
6982                 return ret;
6983         }
6984
6985         while (1) {
6986                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6987                         ret = btrfs_next_leaf(root, path);
6988                         if (ret) {
6989                                 fprintf(stderr, "Couldn't find our ref, next\n");
6990                                 return -EINVAL;
6991                         }
6992                 }
6993                 leaf = path->nodes[0];
6994                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6995                 if (key.objectid != dback->owner ||
6996                     key.type != BTRFS_EXTENT_DATA_KEY) {
6997                         fprintf(stderr, "Couldn't find our ref, search\n");
6998                         return -EINVAL;
6999                 }
7000                 fi = btrfs_item_ptr(leaf, path->slots[0],
7001                                     struct btrfs_file_extent_item);
7002                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7003                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
7004
7005                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
7006                         break;
7007                 path->slots[0]++;
7008         }
7009
7010         btrfs_release_path(path);
7011
7012         trans = btrfs_start_transaction(root, 1);
7013         if (IS_ERR(trans))
7014                 return PTR_ERR(trans);
7015
7016         /*
7017          * Ok we have the key of the file extent we want to fix, now we can cow
7018          * down to the thing and fix it.
7019          */
7020         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7021         if (ret < 0) {
7022                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
7023                         key.objectid, key.type, key.offset, ret);
7024                 goto out;
7025         }
7026         if (ret > 0) {
7027                 fprintf(stderr, "Well that's odd, we just found this key "
7028                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
7029                         key.offset);
7030                 ret = -EINVAL;
7031                 goto out;
7032         }
7033         leaf = path->nodes[0];
7034         fi = btrfs_item_ptr(leaf, path->slots[0],
7035                             struct btrfs_file_extent_item);
7036
7037         if (btrfs_file_extent_compression(leaf, fi) &&
7038             dback->disk_bytenr != entry->bytenr) {
7039                 fprintf(stderr, "Ref doesn't match the record start and is "
7040                         "compressed, please take a btrfs-image of this file "
7041                         "system and send it to a btrfs developer so they can "
7042                         "complete this functionality for bytenr %Lu\n",
7043                         dback->disk_bytenr);
7044                 ret = -EINVAL;
7045                 goto out;
7046         }
7047
7048         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
7049                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7050         } else if (dback->disk_bytenr > entry->bytenr) {
7051                 u64 off_diff, offset;
7052
7053                 off_diff = dback->disk_bytenr - entry->bytenr;
7054                 offset = btrfs_file_extent_offset(leaf, fi);
7055                 if (dback->disk_bytenr + offset +
7056                     btrfs_file_extent_num_bytes(leaf, fi) >
7057                     entry->bytenr + entry->bytes) {
7058                         fprintf(stderr, "Ref is past the entry end, please "
7059                                 "take a btrfs-image of this file system and "
7060                                 "send it to a btrfs developer, ref %Lu\n",
7061                                 dback->disk_bytenr);
7062                         ret = -EINVAL;
7063                         goto out;
7064                 }
7065                 offset += off_diff;
7066                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7067                 btrfs_set_file_extent_offset(leaf, fi, offset);
7068         } else if (dback->disk_bytenr < entry->bytenr) {
7069                 u64 offset;
7070
7071                 offset = btrfs_file_extent_offset(leaf, fi);
7072                 if (dback->disk_bytenr + offset < entry->bytenr) {
7073                         fprintf(stderr, "Ref is before the entry start, please"
7074                                 " take a btrfs-image of this file system and "
7075                                 "send it to a btrfs developer, ref %Lu\n",
7076                                 dback->disk_bytenr);
7077                         ret = -EINVAL;
7078                         goto out;
7079                 }
7080
7081                 offset += dback->disk_bytenr;
7082                 offset -= entry->bytenr;
7083                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7084                 btrfs_set_file_extent_offset(leaf, fi, offset);
7085         }
7086
7087         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7088
7089         /*
7090          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7091          * only do this if we aren't using compression, otherwise it's a
7092          * trickier case.
7093          */
7094         if (!btrfs_file_extent_compression(leaf, fi))
7095                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7096         else
7097                 printf("ram bytes may be wrong?\n");
7098         btrfs_mark_buffer_dirty(leaf);
7099 out:
7100         err = btrfs_commit_transaction(trans, root);
7101         btrfs_release_path(path);
7102         return ret ? ret : err;
7103 }
7104
7105 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7106                            struct extent_record *rec)
7107 {
7108         struct extent_backref *back, *tmp;
7109         struct data_backref *dback;
7110         struct extent_entry *entry, *best = NULL;
7111         LIST_HEAD(entries);
7112         int nr_entries = 0;
7113         int broken_entries = 0;
7114         int ret = 0;
7115         short mismatch = 0;
7116
7117         /*
7118          * Metadata is easy and the backrefs should always agree on bytenr and
7119          * size, if not we've got bigger issues.
7120          */
7121         if (rec->metadata)
7122                 return 0;
7123
7124         rbtree_postorder_for_each_entry_safe(back, tmp,
7125                                              &rec->backref_tree, node) {
7126                 if (back->full_backref || !back->is_data)
7127                         continue;
7128
7129                 dback = to_data_backref(back);
7130
7131                 /*
7132                  * We only pay attention to backrefs that we found a real
7133                  * backref for.
7134                  */
7135                 if (dback->found_ref == 0)
7136                         continue;
7137
7138                 /*
7139                  * For now we only catch when the bytes don't match, not the
7140                  * bytenr.  We can easily do this at the same time, but I want
7141                  * to have a fs image to test on before we just add repair
7142                  * functionality willy-nilly so we know we won't screw up the
7143                  * repair.
7144                  */
7145
7146                 entry = find_entry(&entries, dback->disk_bytenr,
7147                                    dback->bytes);
7148                 if (!entry) {
7149                         entry = malloc(sizeof(struct extent_entry));
7150                         if (!entry) {
7151                                 ret = -ENOMEM;
7152                                 goto out;
7153                         }
7154                         memset(entry, 0, sizeof(*entry));
7155                         entry->bytenr = dback->disk_bytenr;
7156                         entry->bytes = dback->bytes;
7157                         list_add_tail(&entry->list, &entries);
7158                         nr_entries++;
7159                 }
7160
7161                 /*
7162                  * If we only have on entry we may think the entries agree when
7163                  * in reality they don't so we have to do some extra checking.
7164                  */
7165                 if (dback->disk_bytenr != rec->start ||
7166                     dback->bytes != rec->nr || back->broken)
7167                         mismatch = 1;
7168
7169                 if (back->broken) {
7170                         entry->broken++;
7171                         broken_entries++;
7172                 }
7173
7174                 entry->count++;
7175         }
7176
7177         /* Yay all the backrefs agree, carry on good sir */
7178         if (nr_entries <= 1 && !mismatch)
7179                 goto out;
7180
7181         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7182                 "%Lu\n", rec->start);
7183
7184         /*
7185          * First we want to see if the backrefs can agree amongst themselves who
7186          * is right, so figure out which one of the entries has the highest
7187          * count.
7188          */
7189         best = find_most_right_entry(&entries);
7190
7191         /*
7192          * Ok so we may have an even split between what the backrefs think, so
7193          * this is where we use the extent ref to see what it thinks.
7194          */
7195         if (!best) {
7196                 entry = find_entry(&entries, rec->start, rec->nr);
7197                 if (!entry && (!broken_entries || !rec->found_rec)) {
7198                         fprintf(stderr, "Backrefs don't agree with each other "
7199                                 "and extent record doesn't agree with anybody,"
7200                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7201                                 rec->start, rec->nr);
7202                         ret = -EINVAL;
7203                         goto out;
7204                 } else if (!entry) {
7205                         /*
7206                          * Ok our backrefs were broken, we'll assume this is the
7207                          * correct value and add an entry for this range.
7208                          */
7209                         entry = malloc(sizeof(struct extent_entry));
7210                         if (!entry) {
7211                                 ret = -ENOMEM;
7212                                 goto out;
7213                         }
7214                         memset(entry, 0, sizeof(*entry));
7215                         entry->bytenr = rec->start;
7216                         entry->bytes = rec->nr;
7217                         list_add_tail(&entry->list, &entries);
7218                         nr_entries++;
7219                 }
7220                 entry->count++;
7221                 best = find_most_right_entry(&entries);
7222                 if (!best) {
7223                         fprintf(stderr, "Backrefs and extent record evenly "
7224                                 "split on who is right, this is going to "
7225                                 "require user input to fix bytenr %Lu bytes "
7226                                 "%Lu\n", rec->start, rec->nr);
7227                         ret = -EINVAL;
7228                         goto out;
7229                 }
7230         }
7231
7232         /*
7233          * I don't think this can happen currently as we'll abort() if we catch
7234          * this case higher up, but in case somebody removes that we still can't
7235          * deal with it properly here yet, so just bail out of that's the case.
7236          */
7237         if (best->bytenr != rec->start) {
7238                 fprintf(stderr, "Extent start and backref starts don't match, "
7239                         "please use btrfs-image on this file system and send "
7240                         "it to a btrfs developer so they can make fsck fix "
7241                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7242                         rec->start, rec->nr);
7243                 ret = -EINVAL;
7244                 goto out;
7245         }
7246
7247         /*
7248          * Ok great we all agreed on an extent record, let's go find the real
7249          * references and fix up the ones that don't match.
7250          */
7251         rbtree_postorder_for_each_entry_safe(back, tmp,
7252                                              &rec->backref_tree, node) {
7253                 if (back->full_backref || !back->is_data)
7254                         continue;
7255
7256                 dback = to_data_backref(back);
7257
7258                 /*
7259                  * Still ignoring backrefs that don't have a real ref attached
7260                  * to them.
7261                  */
7262                 if (dback->found_ref == 0)
7263                         continue;
7264
7265                 if (dback->bytes == best->bytes &&
7266                     dback->disk_bytenr == best->bytenr)
7267                         continue;
7268
7269                 ret = repair_ref(info, path, dback, best);
7270                 if (ret)
7271                         goto out;
7272         }
7273
7274         /*
7275          * Ok we messed with the actual refs, which means we need to drop our
7276          * entire cache and go back and rescan.  I know this is a huge pain and
7277          * adds a lot of extra work, but it's the only way to be safe.  Once all
7278          * the backrefs agree we may not need to do anything to the extent
7279          * record itself.
7280          */
7281         ret = -EAGAIN;
7282 out:
7283         while (!list_empty(&entries)) {
7284                 entry = list_entry(entries.next, struct extent_entry, list);
7285                 list_del_init(&entry->list);
7286                 free(entry);
7287         }
7288         return ret;
7289 }
7290
7291 static int process_duplicates(struct btrfs_root *root,
7292                               struct cache_tree *extent_cache,
7293                               struct extent_record *rec)
7294 {
7295         struct extent_record *good, *tmp;
7296         struct cache_extent *cache;
7297         int ret;
7298
7299         /*
7300          * If we found a extent record for this extent then return, or if we
7301          * have more than one duplicate we are likely going to need to delete
7302          * something.
7303          */
7304         if (rec->found_rec || rec->num_duplicates > 1)
7305                 return 0;
7306
7307         /* Shouldn't happen but just in case */
7308         BUG_ON(!rec->num_duplicates);
7309
7310         /*
7311          * So this happens if we end up with a backref that doesn't match the
7312          * actual extent entry.  So either the backref is bad or the extent
7313          * entry is bad.  Either way we want to have the extent_record actually
7314          * reflect what we found in the extent_tree, so we need to take the
7315          * duplicate out and use that as the extent_record since the only way we
7316          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7317          */
7318         remove_cache_extent(extent_cache, &rec->cache);
7319
7320         good = to_extent_record(rec->dups.next);
7321         list_del_init(&good->list);
7322         INIT_LIST_HEAD(&good->backrefs);
7323         INIT_LIST_HEAD(&good->dups);
7324         good->cache.start = good->start;
7325         good->cache.size = good->nr;
7326         good->content_checked = 0;
7327         good->owner_ref_checked = 0;
7328         good->num_duplicates = 0;
7329         good->refs = rec->refs;
7330         list_splice_init(&rec->backrefs, &good->backrefs);
7331         while (1) {
7332                 cache = lookup_cache_extent(extent_cache, good->start,
7333                                             good->nr);
7334                 if (!cache)
7335                         break;
7336                 tmp = container_of(cache, struct extent_record, cache);
7337
7338                 /*
7339                  * If we find another overlapping extent and it's found_rec is
7340                  * set then it's a duplicate and we need to try and delete
7341                  * something.
7342                  */
7343                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7344                         if (list_empty(&good->list))
7345                                 list_add_tail(&good->list,
7346                                               &duplicate_extents);
7347                         good->num_duplicates += tmp->num_duplicates + 1;
7348                         list_splice_init(&tmp->dups, &good->dups);
7349                         list_del_init(&tmp->list);
7350                         list_add_tail(&tmp->list, &good->dups);
7351                         remove_cache_extent(extent_cache, &tmp->cache);
7352                         continue;
7353                 }
7354
7355                 /*
7356                  * Ok we have another non extent item backed extent rec, so lets
7357                  * just add it to this extent and carry on like we did above.
7358                  */
7359                 good->refs += tmp->refs;
7360                 list_splice_init(&tmp->backrefs, &good->backrefs);
7361                 remove_cache_extent(extent_cache, &tmp->cache);
7362                 free(tmp);
7363         }
7364         ret = insert_cache_extent(extent_cache, &good->cache);
7365         BUG_ON(ret);
7366         free(rec);
7367         return good->num_duplicates ? 0 : 1;
7368 }
7369
7370 static int delete_duplicate_records(struct btrfs_root *root,
7371                                     struct extent_record *rec)
7372 {
7373         struct btrfs_trans_handle *trans;
7374         LIST_HEAD(delete_list);
7375         struct btrfs_path *path;
7376         struct extent_record *tmp, *good, *n;
7377         int nr_del = 0;
7378         int ret = 0, err;
7379         struct btrfs_key key;
7380
7381         path = btrfs_alloc_path();
7382         if (!path) {
7383                 ret = -ENOMEM;
7384                 goto out;
7385         }
7386
7387         good = rec;
7388         /* Find the record that covers all of the duplicates. */
7389         list_for_each_entry(tmp, &rec->dups, list) {
7390                 if (good->start < tmp->start)
7391                         continue;
7392                 if (good->nr > tmp->nr)
7393                         continue;
7394
7395                 if (tmp->start + tmp->nr < good->start + good->nr) {
7396                         fprintf(stderr, "Ok we have overlapping extents that "
7397                                 "aren't completely covered by each other, this "
7398                                 "is going to require more careful thought.  "
7399                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7400                                 tmp->start, tmp->nr, good->start, good->nr);
7401                         abort();
7402                 }
7403                 good = tmp;
7404         }
7405
7406         if (good != rec)
7407                 list_add_tail(&rec->list, &delete_list);
7408
7409         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7410                 if (tmp == good)
7411                         continue;
7412                 list_move_tail(&tmp->list, &delete_list);
7413         }
7414
7415         root = root->fs_info->extent_root;
7416         trans = btrfs_start_transaction(root, 1);
7417         if (IS_ERR(trans)) {
7418                 ret = PTR_ERR(trans);
7419                 goto out;
7420         }
7421
7422         list_for_each_entry(tmp, &delete_list, list) {
7423                 if (tmp->found_rec == 0)
7424                         continue;
7425                 key.objectid = tmp->start;
7426                 key.type = BTRFS_EXTENT_ITEM_KEY;
7427                 key.offset = tmp->nr;
7428
7429                 /* Shouldn't happen but just in case */
7430                 if (tmp->metadata) {
7431                         fprintf(stderr, "Well this shouldn't happen, extent "
7432                                 "record overlaps but is metadata? "
7433                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7434                         abort();
7435                 }
7436
7437                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7438                 if (ret) {
7439                         if (ret > 0)
7440                                 ret = -EINVAL;
7441                         break;
7442                 }
7443                 ret = btrfs_del_item(trans, root, path);
7444                 if (ret)
7445                         break;
7446                 btrfs_release_path(path);
7447                 nr_del++;
7448         }
7449         err = btrfs_commit_transaction(trans, root);
7450         if (err && !ret)
7451                 ret = err;
7452 out:
7453         while (!list_empty(&delete_list)) {
7454                 tmp = to_extent_record(delete_list.next);
7455                 list_del_init(&tmp->list);
7456                 if (tmp == rec)
7457                         continue;
7458                 free(tmp);
7459         }
7460
7461         while (!list_empty(&rec->dups)) {
7462                 tmp = to_extent_record(rec->dups.next);
7463                 list_del_init(&tmp->list);
7464                 free(tmp);
7465         }
7466
7467         btrfs_free_path(path);
7468
7469         if (!ret && !nr_del)
7470                 rec->num_duplicates = 0;
7471
7472         return ret ? ret : nr_del;
7473 }
7474
7475 static int find_possible_backrefs(struct btrfs_fs_info *info,
7476                                   struct btrfs_path *path,
7477                                   struct cache_tree *extent_cache,
7478                                   struct extent_record *rec)
7479 {
7480         struct btrfs_root *root;
7481         struct extent_backref *back, *tmp;
7482         struct data_backref *dback;
7483         struct cache_extent *cache;
7484         struct btrfs_file_extent_item *fi;
7485         struct btrfs_key key;
7486         u64 bytenr, bytes;
7487         int ret;
7488
7489         rbtree_postorder_for_each_entry_safe(back, tmp,
7490                                              &rec->backref_tree, node) {
7491                 /* Don't care about full backrefs (poor unloved backrefs) */
7492                 if (back->full_backref || !back->is_data)
7493                         continue;
7494
7495                 dback = to_data_backref(back);
7496
7497                 /* We found this one, we don't need to do a lookup */
7498                 if (dback->found_ref)
7499                         continue;
7500
7501                 key.objectid = dback->root;
7502                 key.type = BTRFS_ROOT_ITEM_KEY;
7503                 key.offset = (u64)-1;
7504
7505                 root = btrfs_read_fs_root(info, &key);
7506
7507                 /* No root, definitely a bad ref, skip */
7508                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7509                         continue;
7510                 /* Other err, exit */
7511                 if (IS_ERR(root))
7512                         return PTR_ERR(root);
7513
7514                 key.objectid = dback->owner;
7515                 key.type = BTRFS_EXTENT_DATA_KEY;
7516                 key.offset = dback->offset;
7517                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7518                 if (ret) {
7519                         btrfs_release_path(path);
7520                         if (ret < 0)
7521                                 return ret;
7522                         /* Didn't find it, we can carry on */
7523                         ret = 0;
7524                         continue;
7525                 }
7526
7527                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7528                                     struct btrfs_file_extent_item);
7529                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7530                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7531                 btrfs_release_path(path);
7532                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7533                 if (cache) {
7534                         struct extent_record *tmp;
7535                         tmp = container_of(cache, struct extent_record, cache);
7536
7537                         /*
7538                          * If we found an extent record for the bytenr for this
7539                          * particular backref then we can't add it to our
7540                          * current extent record.  We only want to add backrefs
7541                          * that don't have a corresponding extent item in the
7542                          * extent tree since they likely belong to this record
7543                          * and we need to fix it if it doesn't match bytenrs.
7544                          */
7545                         if  (tmp->found_rec)
7546                                 continue;
7547                 }
7548
7549                 dback->found_ref += 1;
7550                 dback->disk_bytenr = bytenr;
7551                 dback->bytes = bytes;
7552
7553                 /*
7554                  * Set this so the verify backref code knows not to trust the
7555                  * values in this backref.
7556                  */
7557                 back->broken = 1;
7558         }
7559
7560         return 0;
7561 }
7562
7563 /*
7564  * Record orphan data ref into corresponding root.
7565  *
7566  * Return 0 if the extent item contains data ref and recorded.
7567  * Return 1 if the extent item contains no useful data ref
7568  *   On that case, it may contains only shared_dataref or metadata backref
7569  *   or the file extent exists(this should be handled by the extent bytenr
7570  *   recovery routine)
7571  * Return <0 if something goes wrong.
7572  */
7573 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7574                                       struct extent_record *rec)
7575 {
7576         struct btrfs_key key;
7577         struct btrfs_root *dest_root;
7578         struct extent_backref *back, *tmp;
7579         struct data_backref *dback;
7580         struct orphan_data_extent *orphan;
7581         struct btrfs_path *path;
7582         int recorded_data_ref = 0;
7583         int ret = 0;
7584
7585         if (rec->metadata)
7586                 return 1;
7587         path = btrfs_alloc_path();
7588         if (!path)
7589                 return -ENOMEM;
7590         rbtree_postorder_for_each_entry_safe(back, tmp,
7591                                              &rec->backref_tree, node) {
7592                 if (back->full_backref || !back->is_data ||
7593                     !back->found_extent_tree)
7594                         continue;
7595                 dback = to_data_backref(back);
7596                 if (dback->found_ref)
7597                         continue;
7598                 key.objectid = dback->root;
7599                 key.type = BTRFS_ROOT_ITEM_KEY;
7600                 key.offset = (u64)-1;
7601
7602                 dest_root = btrfs_read_fs_root(fs_info, &key);
7603
7604                 /* For non-exist root we just skip it */
7605                 if (IS_ERR(dest_root) || !dest_root)
7606                         continue;
7607
7608                 key.objectid = dback->owner;
7609                 key.type = BTRFS_EXTENT_DATA_KEY;
7610                 key.offset = dback->offset;
7611
7612                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7613                 /*
7614                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7615                  * we need to record it for inode/file extent rebuild.
7616                  * For ret > 0, we record it only for file extent rebuild.
7617                  * For ret == 0, the file extent exists but only bytenr
7618                  * mismatch, let the original bytenr fix routine to handle,
7619                  * don't record it.
7620                  */
7621                 if (ret == 0)
7622                         continue;
7623                 ret = 0;
7624                 orphan = malloc(sizeof(*orphan));
7625                 if (!orphan) {
7626                         ret = -ENOMEM;
7627                         goto out;
7628                 }
7629                 INIT_LIST_HEAD(&orphan->list);
7630                 orphan->root = dback->root;
7631                 orphan->objectid = dback->owner;
7632                 orphan->offset = dback->offset;
7633                 orphan->disk_bytenr = rec->cache.start;
7634                 orphan->disk_len = rec->cache.size;
7635                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7636                 recorded_data_ref = 1;
7637         }
7638 out:
7639         btrfs_free_path(path);
7640         if (!ret)
7641                 return !recorded_data_ref;
7642         else
7643                 return ret;
7644 }
7645
7646 /*
7647  * when an incorrect extent item is found, this will delete
7648  * all of the existing entries for it and recreate them
7649  * based on what the tree scan found.
7650  */
7651 static int fixup_extent_refs(struct btrfs_fs_info *info,
7652                              struct cache_tree *extent_cache,
7653                              struct extent_record *rec)
7654 {
7655         struct btrfs_trans_handle *trans = NULL;
7656         int ret;
7657         struct btrfs_path *path;
7658         struct cache_extent *cache;
7659         struct extent_backref *back, *tmp;
7660         int allocated = 0;
7661         u64 flags = 0;
7662
7663         if (rec->flag_block_full_backref)
7664                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7665
7666         path = btrfs_alloc_path();
7667         if (!path)
7668                 return -ENOMEM;
7669
7670         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7671                 /*
7672                  * Sometimes the backrefs themselves are so broken they don't
7673                  * get attached to any meaningful rec, so first go back and
7674                  * check any of our backrefs that we couldn't find and throw
7675                  * them into the list if we find the backref so that
7676                  * verify_backrefs can figure out what to do.
7677                  */
7678                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7679                 if (ret < 0)
7680                         goto out;
7681         }
7682
7683         /* step one, make sure all of the backrefs agree */
7684         ret = verify_backrefs(info, path, rec);
7685         if (ret < 0)
7686                 goto out;
7687
7688         trans = btrfs_start_transaction(info->extent_root, 1);
7689         if (IS_ERR(trans)) {
7690                 ret = PTR_ERR(trans);
7691                 goto out;
7692         }
7693
7694         /* step two, delete all the existing records */
7695         ret = delete_extent_records(trans, info->extent_root, path,
7696                                     rec->start, rec->max_size);
7697
7698         if (ret < 0)
7699                 goto out;
7700
7701         /* was this block corrupt?  If so, don't add references to it */
7702         cache = lookup_cache_extent(info->corrupt_blocks,
7703                                     rec->start, rec->max_size);
7704         if (cache) {
7705                 ret = 0;
7706                 goto out;
7707         }
7708
7709         /* step three, recreate all the refs we did find */
7710         rbtree_postorder_for_each_entry_safe(back, tmp,
7711                                              &rec->backref_tree, node) {
7712                 /*
7713                  * if we didn't find any references, don't create a
7714                  * new extent record
7715                  */
7716                 if (!back->found_ref)
7717                         continue;
7718
7719                 rec->bad_full_backref = 0;
7720                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7721                 allocated = 1;
7722
7723                 if (ret)
7724                         goto out;
7725         }
7726 out:
7727         if (trans) {
7728                 int err = btrfs_commit_transaction(trans, info->extent_root);
7729                 if (!ret)
7730                         ret = err;
7731         }
7732
7733         btrfs_free_path(path);
7734         return ret;
7735 }
7736
7737 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7738                               struct extent_record *rec)
7739 {
7740         struct btrfs_trans_handle *trans;
7741         struct btrfs_root *root = fs_info->extent_root;
7742         struct btrfs_path *path;
7743         struct btrfs_extent_item *ei;
7744         struct btrfs_key key;
7745         u64 flags;
7746         int ret = 0;
7747
7748         key.objectid = rec->start;
7749         if (rec->metadata) {
7750                 key.type = BTRFS_METADATA_ITEM_KEY;
7751                 key.offset = rec->info_level;
7752         } else {
7753                 key.type = BTRFS_EXTENT_ITEM_KEY;
7754                 key.offset = rec->max_size;
7755         }
7756
7757         path = btrfs_alloc_path();
7758         if (!path)
7759                 return -ENOMEM;
7760
7761         trans = btrfs_start_transaction(root, 0);
7762         if (IS_ERR(trans)) {
7763                 btrfs_free_path(path);
7764                 return PTR_ERR(trans);
7765         }
7766
7767         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7768         if (ret < 0) {
7769                 btrfs_free_path(path);
7770                 btrfs_commit_transaction(trans, root);
7771                 return ret;
7772         } else if (ret) {
7773                 fprintf(stderr, "Didn't find extent for %llu\n",
7774                         (unsigned long long)rec->start);
7775                 btrfs_free_path(path);
7776                 btrfs_commit_transaction(trans, root);
7777                 return -ENOENT;
7778         }
7779
7780         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7781                             struct btrfs_extent_item);
7782         flags = btrfs_extent_flags(path->nodes[0], ei);
7783         if (rec->flag_block_full_backref) {
7784                 fprintf(stderr, "setting full backref on %llu\n",
7785                         (unsigned long long)key.objectid);
7786                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7787         } else {
7788                 fprintf(stderr, "clearing full backref on %llu\n",
7789                         (unsigned long long)key.objectid);
7790                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7791         }
7792         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7793         btrfs_mark_buffer_dirty(path->nodes[0]);
7794         btrfs_free_path(path);
7795         return btrfs_commit_transaction(trans, root);
7796 }
7797
7798 /* right now we only prune from the extent allocation tree */
7799 static int prune_one_block(struct btrfs_trans_handle *trans,
7800                            struct btrfs_fs_info *info,
7801                            struct btrfs_corrupt_block *corrupt)
7802 {
7803         int ret;
7804         struct btrfs_path path;
7805         struct extent_buffer *eb;
7806         u64 found;
7807         int slot;
7808         int nritems;
7809         int level = corrupt->level + 1;
7810
7811         btrfs_init_path(&path);
7812 again:
7813         /* we want to stop at the parent to our busted block */
7814         path.lowest_level = level;
7815
7816         ret = btrfs_search_slot(trans, info->extent_root,
7817                                 &corrupt->key, &path, -1, 1);
7818
7819         if (ret < 0)
7820                 goto out;
7821
7822         eb = path.nodes[level];
7823         if (!eb) {
7824                 ret = -ENOENT;
7825                 goto out;
7826         }
7827
7828         /*
7829          * hopefully the search gave us the block we want to prune,
7830          * lets try that first
7831          */
7832         slot = path.slots[level];
7833         found =  btrfs_node_blockptr(eb, slot);
7834         if (found == corrupt->cache.start)
7835                 goto del_ptr;
7836
7837         nritems = btrfs_header_nritems(eb);
7838
7839         /* the search failed, lets scan this node and hope we find it */
7840         for (slot = 0; slot < nritems; slot++) {
7841                 found =  btrfs_node_blockptr(eb, slot);
7842                 if (found == corrupt->cache.start)
7843                         goto del_ptr;
7844         }
7845         /*
7846          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7847          * to this block
7848          */
7849         if (eb == info->extent_root->node) {
7850                 ret = -ENOENT;
7851                 goto out;
7852         } else {
7853                 level++;
7854                 btrfs_release_path(&path);
7855                 goto again;
7856         }
7857
7858 del_ptr:
7859         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7860         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7861
7862 out:
7863         btrfs_release_path(&path);
7864         return ret;
7865 }
7866
7867 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7868 {
7869         struct btrfs_trans_handle *trans = NULL;
7870         struct cache_extent *cache;
7871         struct btrfs_corrupt_block *corrupt;
7872
7873         while (1) {
7874                 cache = search_cache_extent(info->corrupt_blocks, 0);
7875                 if (!cache)
7876                         break;
7877                 if (!trans) {
7878                         trans = btrfs_start_transaction(info->extent_root, 1);
7879                         if (IS_ERR(trans))
7880                                 return PTR_ERR(trans);
7881                 }
7882                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7883                 prune_one_block(trans, info, corrupt);
7884                 remove_cache_extent(info->corrupt_blocks, cache);
7885         }
7886         if (trans)
7887                 return btrfs_commit_transaction(trans, info->extent_root);
7888         return 0;
7889 }
7890
7891 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7892 {
7893         struct btrfs_block_group_cache *cache;
7894         u64 start, end;
7895         int ret;
7896
7897         while (1) {
7898                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7899                                             &start, &end, EXTENT_DIRTY);
7900                 if (ret)
7901                         break;
7902                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7903                                    GFP_NOFS);
7904         }
7905
7906         start = 0;
7907         while (1) {
7908                 cache = btrfs_lookup_first_block_group(fs_info, start);
7909                 if (!cache)
7910                         break;
7911                 if (cache->cached)
7912                         cache->cached = 0;
7913                 start = cache->key.objectid + cache->key.offset;
7914         }
7915 }
7916
7917 static int check_extent_refs(struct btrfs_root *root,
7918                              struct cache_tree *extent_cache)
7919 {
7920         struct extent_record *rec;
7921         struct cache_extent *cache;
7922         int err = 0;
7923         int ret = 0;
7924         int fixed = 0;
7925         int had_dups = 0;
7926         int recorded = 0;
7927
7928         if (repair) {
7929                 /*
7930                  * if we're doing a repair, we have to make sure
7931                  * we don't allocate from the problem extents.
7932                  * In the worst case, this will be all the
7933                  * extents in the FS
7934                  */
7935                 cache = search_cache_extent(extent_cache, 0);
7936                 while(cache) {
7937                         rec = container_of(cache, struct extent_record, cache);
7938                         set_extent_dirty(root->fs_info->excluded_extents,
7939                                          rec->start,
7940                                          rec->start + rec->max_size - 1,
7941                                          GFP_NOFS);
7942                         cache = next_cache_extent(cache);
7943                 }
7944
7945                 /* pin down all the corrupted blocks too */
7946                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7947                 while(cache) {
7948                         set_extent_dirty(root->fs_info->excluded_extents,
7949                                          cache->start,
7950                                          cache->start + cache->size - 1,
7951                                          GFP_NOFS);
7952                         cache = next_cache_extent(cache);
7953                 }
7954                 prune_corrupt_blocks(root->fs_info);
7955                 reset_cached_block_groups(root->fs_info);
7956         }
7957
7958         reset_cached_block_groups(root->fs_info);
7959
7960         /*
7961          * We need to delete any duplicate entries we find first otherwise we
7962          * could mess up the extent tree when we have backrefs that actually
7963          * belong to a different extent item and not the weird duplicate one.
7964          */
7965         while (repair && !list_empty(&duplicate_extents)) {
7966                 rec = to_extent_record(duplicate_extents.next);
7967                 list_del_init(&rec->list);
7968
7969                 /* Sometimes we can find a backref before we find an actual
7970                  * extent, so we need to process it a little bit to see if there
7971                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7972                  * if this is a backref screwup.  If we need to delete stuff
7973                  * process_duplicates() will return 0, otherwise it will return
7974                  * 1 and we
7975                  */
7976                 if (process_duplicates(root, extent_cache, rec))
7977                         continue;
7978                 ret = delete_duplicate_records(root, rec);
7979                 if (ret < 0)
7980                         return ret;
7981                 /*
7982                  * delete_duplicate_records will return the number of entries
7983                  * deleted, so if it's greater than 0 then we know we actually
7984                  * did something and we need to remove.
7985                  */
7986                 if (ret)
7987                         had_dups = 1;
7988         }
7989
7990         if (had_dups)
7991                 return -EAGAIN;
7992
7993         while(1) {
7994                 int cur_err = 0;
7995
7996                 fixed = 0;
7997                 recorded = 0;
7998                 cache = search_cache_extent(extent_cache, 0);
7999                 if (!cache)
8000                         break;
8001                 rec = container_of(cache, struct extent_record, cache);
8002                 if (rec->num_duplicates) {
8003                         fprintf(stderr, "extent item %llu has multiple extent "
8004                                 "items\n", (unsigned long long)rec->start);
8005                         err = 1;
8006                         cur_err = 1;
8007                 }
8008
8009                 if (rec->refs != rec->extent_item_refs) {
8010                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
8011                                 (unsigned long long)rec->start,
8012                                 (unsigned long long)rec->nr);
8013                         fprintf(stderr, "extent item %llu, found %llu\n",
8014                                 (unsigned long long)rec->extent_item_refs,
8015                                 (unsigned long long)rec->refs);
8016                         ret = record_orphan_data_extents(root->fs_info, rec);
8017                         if (ret < 0)
8018                                 goto repair_abort;
8019                         if (ret == 0) {
8020                                 recorded = 1;
8021                         } else {
8022                                 /*
8023                                  * we can't use the extent to repair file
8024                                  * extent, let the fallback method handle it.
8025                                  */
8026                                 if (!fixed && repair) {
8027                                         ret = fixup_extent_refs(
8028                                                         root->fs_info,
8029                                                         extent_cache, rec);
8030                                         if (ret)
8031                                                 goto repair_abort;
8032                                         fixed = 1;
8033                                 }
8034                         }
8035                         err = 1;
8036                         cur_err = 1;
8037                 }
8038                 if (all_backpointers_checked(rec, 1)) {
8039                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
8040                                 (unsigned long long)rec->start,
8041                                 (unsigned long long)rec->nr);
8042
8043                         if (!fixed && !recorded && repair) {
8044                                 ret = fixup_extent_refs(root->fs_info,
8045                                                         extent_cache, rec);
8046                                 if (ret)
8047                                         goto repair_abort;
8048                                 fixed = 1;
8049                         }
8050                         cur_err = 1;
8051                         err = 1;
8052                 }
8053                 if (!rec->owner_ref_checked) {
8054                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
8055                                 (unsigned long long)rec->start,
8056                                 (unsigned long long)rec->nr);
8057                         if (!fixed && !recorded && repair) {
8058                                 ret = fixup_extent_refs(root->fs_info,
8059                                                         extent_cache, rec);
8060                                 if (ret)
8061                                         goto repair_abort;
8062                                 fixed = 1;
8063                         }
8064                         err = 1;
8065                         cur_err = 1;
8066                 }
8067                 if (rec->bad_full_backref) {
8068                         fprintf(stderr, "bad full backref, on [%llu]\n",
8069                                 (unsigned long long)rec->start);
8070                         if (repair) {
8071                                 ret = fixup_extent_flags(root->fs_info, rec);
8072                                 if (ret)
8073                                         goto repair_abort;
8074                                 fixed = 1;
8075                         }
8076                         err = 1;
8077                         cur_err = 1;
8078                 }
8079                 /*
8080                  * Although it's not a extent ref's problem, we reuse this
8081                  * routine for error reporting.
8082                  * No repair function yet.
8083                  */
8084                 if (rec->crossing_stripes) {
8085                         fprintf(stderr,
8086                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8087                                 rec->start, rec->start + rec->max_size);
8088                         err = 1;
8089                         cur_err = 1;
8090                 }
8091
8092                 if (rec->wrong_chunk_type) {
8093                         fprintf(stderr,
8094                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
8095                                 rec->start, rec->start + rec->max_size);
8096                         err = 1;
8097                         cur_err = 1;
8098                 }
8099
8100                 remove_cache_extent(extent_cache, cache);
8101                 free_all_extent_backrefs(rec);
8102                 if (!init_extent_tree && repair && (!cur_err || fixed))
8103                         clear_extent_dirty(root->fs_info->excluded_extents,
8104                                            rec->start,
8105                                            rec->start + rec->max_size - 1,
8106                                            GFP_NOFS);
8107                 free(rec);
8108         }
8109 repair_abort:
8110         if (repair) {
8111                 if (ret && ret != -EAGAIN) {
8112                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8113                         exit(1);
8114                 } else if (!ret) {
8115                         struct btrfs_trans_handle *trans;
8116
8117                         root = root->fs_info->extent_root;
8118                         trans = btrfs_start_transaction(root, 1);
8119                         if (IS_ERR(trans)) {
8120                                 ret = PTR_ERR(trans);
8121                                 goto repair_abort;
8122                         }
8123
8124                         btrfs_fix_block_accounting(trans, root);
8125                         ret = btrfs_commit_transaction(trans, root);
8126                         if (ret)
8127                                 goto repair_abort;
8128                 }
8129                 if (err)
8130                         fprintf(stderr, "repaired damaged extent references\n");
8131                 return ret;
8132         }
8133         return err;
8134 }
8135
8136 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8137 {
8138         u64 stripe_size;
8139
8140         if (type & BTRFS_BLOCK_GROUP_RAID0) {
8141                 stripe_size = length;
8142                 stripe_size /= num_stripes;
8143         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8144                 stripe_size = length * 2;
8145                 stripe_size /= num_stripes;
8146         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8147                 stripe_size = length;
8148                 stripe_size /= (num_stripes - 1);
8149         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8150                 stripe_size = length;
8151                 stripe_size /= (num_stripes - 2);
8152         } else {
8153                 stripe_size = length;
8154         }
8155         return stripe_size;
8156 }
8157
8158 /*
8159  * Check the chunk with its block group/dev list ref:
8160  * Return 0 if all refs seems valid.
8161  * Return 1 if part of refs seems valid, need later check for rebuild ref
8162  * like missing block group and needs to search extent tree to rebuild them.
8163  * Return -1 if essential refs are missing and unable to rebuild.
8164  */
8165 static int check_chunk_refs(struct chunk_record *chunk_rec,
8166                             struct block_group_tree *block_group_cache,
8167                             struct device_extent_tree *dev_extent_cache,
8168                             int silent)
8169 {
8170         struct cache_extent *block_group_item;
8171         struct block_group_record *block_group_rec;
8172         struct cache_extent *dev_extent_item;
8173         struct device_extent_record *dev_extent_rec;
8174         u64 devid;
8175         u64 offset;
8176         u64 length;
8177         int metadump_v2 = 0;
8178         int i;
8179         int ret = 0;
8180
8181         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8182                                                chunk_rec->offset,
8183                                                chunk_rec->length);
8184         if (block_group_item) {
8185                 block_group_rec = container_of(block_group_item,
8186                                                struct block_group_record,
8187                                                cache);
8188                 if (chunk_rec->length != block_group_rec->offset ||
8189                     chunk_rec->offset != block_group_rec->objectid ||
8190                     (!metadump_v2 &&
8191                      chunk_rec->type_flags != block_group_rec->flags)) {
8192                         if (!silent)
8193                                 fprintf(stderr,
8194                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8195                                         chunk_rec->objectid,
8196                                         chunk_rec->type,
8197                                         chunk_rec->offset,
8198                                         chunk_rec->length,
8199                                         chunk_rec->offset,
8200                                         chunk_rec->type_flags,
8201                                         block_group_rec->objectid,
8202                                         block_group_rec->type,
8203                                         block_group_rec->offset,
8204                                         block_group_rec->offset,
8205                                         block_group_rec->objectid,
8206                                         block_group_rec->flags);
8207                         ret = -1;
8208                 } else {
8209                         list_del_init(&block_group_rec->list);
8210                         chunk_rec->bg_rec = block_group_rec;
8211                 }
8212         } else {
8213                 if (!silent)
8214                         fprintf(stderr,
8215                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8216                                 chunk_rec->objectid,
8217                                 chunk_rec->type,
8218                                 chunk_rec->offset,
8219                                 chunk_rec->length,
8220                                 chunk_rec->offset,
8221                                 chunk_rec->type_flags);
8222                 ret = 1;
8223         }
8224
8225         if (metadump_v2)
8226                 return ret;
8227
8228         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8229                                     chunk_rec->num_stripes);
8230         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8231                 devid = chunk_rec->stripes[i].devid;
8232                 offset = chunk_rec->stripes[i].offset;
8233                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8234                                                        devid, offset, length);
8235                 if (dev_extent_item) {
8236                         dev_extent_rec = container_of(dev_extent_item,
8237                                                 struct device_extent_record,
8238                                                 cache);
8239                         if (dev_extent_rec->objectid != devid ||
8240                             dev_extent_rec->offset != offset ||
8241                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8242                             dev_extent_rec->length != length) {
8243                                 if (!silent)
8244                                         fprintf(stderr,
8245                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8246                                                 chunk_rec->objectid,
8247                                                 chunk_rec->type,
8248                                                 chunk_rec->offset,
8249                                                 chunk_rec->stripes[i].devid,
8250                                                 chunk_rec->stripes[i].offset,
8251                                                 dev_extent_rec->objectid,
8252                                                 dev_extent_rec->offset,
8253                                                 dev_extent_rec->length);
8254                                 ret = -1;
8255                         } else {
8256                                 list_move(&dev_extent_rec->chunk_list,
8257                                           &chunk_rec->dextents);
8258                         }
8259                 } else {
8260                         if (!silent)
8261                                 fprintf(stderr,
8262                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8263                                         chunk_rec->objectid,
8264                                         chunk_rec->type,
8265                                         chunk_rec->offset,
8266                                         chunk_rec->stripes[i].devid,
8267                                         chunk_rec->stripes[i].offset);
8268                         ret = -1;
8269                 }
8270         }
8271         return ret;
8272 }
8273
8274 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8275 int check_chunks(struct cache_tree *chunk_cache,
8276                  struct block_group_tree *block_group_cache,
8277                  struct device_extent_tree *dev_extent_cache,
8278                  struct list_head *good, struct list_head *bad,
8279                  struct list_head *rebuild, int silent)
8280 {
8281         struct cache_extent *chunk_item;
8282         struct chunk_record *chunk_rec;
8283         struct block_group_record *bg_rec;
8284         struct device_extent_record *dext_rec;
8285         int err;
8286         int ret = 0;
8287
8288         chunk_item = first_cache_extent(chunk_cache);
8289         while (chunk_item) {
8290                 chunk_rec = container_of(chunk_item, struct chunk_record,
8291                                          cache);
8292                 err = check_chunk_refs(chunk_rec, block_group_cache,
8293                                        dev_extent_cache, silent);
8294                 if (err < 0)
8295                         ret = err;
8296                 if (err == 0 && good)
8297                         list_add_tail(&chunk_rec->list, good);
8298                 if (err > 0 && rebuild)
8299                         list_add_tail(&chunk_rec->list, rebuild);
8300                 if (err < 0 && bad)
8301                         list_add_tail(&chunk_rec->list, bad);
8302                 chunk_item = next_cache_extent(chunk_item);
8303         }
8304
8305         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8306                 if (!silent)
8307                         fprintf(stderr,
8308                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8309                                 bg_rec->objectid,
8310                                 bg_rec->offset,
8311                                 bg_rec->flags);
8312                 if (!ret)
8313                         ret = 1;
8314         }
8315
8316         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8317                             chunk_list) {
8318                 if (!silent)
8319                         fprintf(stderr,
8320                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8321                                 dext_rec->objectid,
8322                                 dext_rec->offset,
8323                                 dext_rec->length);
8324                 if (!ret)
8325                         ret = 1;
8326         }
8327         return ret;
8328 }
8329
8330
8331 static int check_device_used(struct device_record *dev_rec,
8332                              struct device_extent_tree *dext_cache)
8333 {
8334         struct cache_extent *cache;
8335         struct device_extent_record *dev_extent_rec;
8336         u64 total_byte = 0;
8337
8338         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8339         while (cache) {
8340                 dev_extent_rec = container_of(cache,
8341                                               struct device_extent_record,
8342                                               cache);
8343                 if (dev_extent_rec->objectid != dev_rec->devid)
8344                         break;
8345
8346                 list_del_init(&dev_extent_rec->device_list);
8347                 total_byte += dev_extent_rec->length;
8348                 cache = next_cache_extent(cache);
8349         }
8350
8351         if (total_byte != dev_rec->byte_used) {
8352                 fprintf(stderr,
8353                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8354                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8355                         dev_rec->type, dev_rec->offset);
8356                 return -1;
8357         } else {
8358                 return 0;
8359         }
8360 }
8361
8362 /* check btrfs_dev_item -> btrfs_dev_extent */
8363 static int check_devices(struct rb_root *dev_cache,
8364                          struct device_extent_tree *dev_extent_cache)
8365 {
8366         struct rb_node *dev_node;
8367         struct device_record *dev_rec;
8368         struct device_extent_record *dext_rec;
8369         int err;
8370         int ret = 0;
8371
8372         dev_node = rb_first(dev_cache);
8373         while (dev_node) {
8374                 dev_rec = container_of(dev_node, struct device_record, node);
8375                 err = check_device_used(dev_rec, dev_extent_cache);
8376                 if (err)
8377                         ret = err;
8378
8379                 dev_node = rb_next(dev_node);
8380         }
8381         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8382                             device_list) {
8383                 fprintf(stderr,
8384                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8385                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8386                 if (!ret)
8387                         ret = 1;
8388         }
8389         return ret;
8390 }
8391
8392 static int add_root_item_to_list(struct list_head *head,
8393                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8394                                   u8 level, u8 drop_level,
8395                                   int level_size, struct btrfs_key *drop_key)
8396 {
8397
8398         struct root_item_record *ri_rec;
8399         ri_rec = malloc(sizeof(*ri_rec));
8400         if (!ri_rec)
8401                 return -ENOMEM;
8402         ri_rec->bytenr = bytenr;
8403         ri_rec->objectid = objectid;
8404         ri_rec->level = level;
8405         ri_rec->level_size = level_size;
8406         ri_rec->drop_level = drop_level;
8407         ri_rec->last_snapshot = last_snapshot;
8408         if (drop_key)
8409                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8410         list_add_tail(&ri_rec->list, head);
8411
8412         return 0;
8413 }
8414
8415 static void free_root_item_list(struct list_head *list)
8416 {
8417         struct root_item_record *ri_rec;
8418
8419         while (!list_empty(list)) {
8420                 ri_rec = list_first_entry(list, struct root_item_record,
8421                                           list);
8422                 list_del_init(&ri_rec->list);
8423                 free(ri_rec);
8424         }
8425 }
8426
8427 static int deal_root_from_list(struct list_head *list,
8428                                struct btrfs_root *root,
8429                                struct block_info *bits,
8430                                int bits_nr,
8431                                struct cache_tree *pending,
8432                                struct cache_tree *seen,
8433                                struct cache_tree *reada,
8434                                struct cache_tree *nodes,
8435                                struct cache_tree *extent_cache,
8436                                struct cache_tree *chunk_cache,
8437                                struct rb_root *dev_cache,
8438                                struct block_group_tree *block_group_cache,
8439                                struct device_extent_tree *dev_extent_cache)
8440 {
8441         int ret = 0;
8442         u64 last;
8443
8444         while (!list_empty(list)) {
8445                 struct root_item_record *rec;
8446                 struct extent_buffer *buf;
8447                 rec = list_entry(list->next,
8448                                  struct root_item_record, list);
8449                 last = 0;
8450                 buf = read_tree_block(root->fs_info->tree_root,
8451                                       rec->bytenr, rec->level_size, 0);
8452                 if (!extent_buffer_uptodate(buf)) {
8453                         free_extent_buffer(buf);
8454                         ret = -EIO;
8455                         break;
8456                 }
8457                 ret = add_root_to_pending(buf, extent_cache, pending,
8458                                     seen, nodes, rec->objectid);
8459                 if (ret < 0)
8460                         break;
8461                 /*
8462                  * To rebuild extent tree, we need deal with snapshot
8463                  * one by one, otherwise we deal with node firstly which
8464                  * can maximize readahead.
8465                  */
8466                 while (1) {
8467                         ret = run_next_block(root, bits, bits_nr, &last,
8468                                              pending, seen, reada, nodes,
8469                                              extent_cache, chunk_cache,
8470                                              dev_cache, block_group_cache,
8471                                              dev_extent_cache, rec);
8472                         if (ret != 0)
8473                                 break;
8474                 }
8475                 free_extent_buffer(buf);
8476                 list_del(&rec->list);
8477                 free(rec);
8478                 if (ret < 0)
8479                         break;
8480         }
8481         while (ret >= 0) {
8482                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8483                                      reada, nodes, extent_cache, chunk_cache,
8484                                      dev_cache, block_group_cache,
8485                                      dev_extent_cache, NULL);
8486                 if (ret != 0) {
8487                         if (ret > 0)
8488                                 ret = 0;
8489                         break;
8490                 }
8491         }
8492         return ret;
8493 }
8494
8495 static int check_chunks_and_extents(struct btrfs_root *root)
8496 {
8497         struct rb_root dev_cache;
8498         struct cache_tree chunk_cache;
8499         struct block_group_tree block_group_cache;
8500         struct device_extent_tree dev_extent_cache;
8501         struct cache_tree extent_cache;
8502         struct cache_tree seen;
8503         struct cache_tree pending;
8504         struct cache_tree reada;
8505         struct cache_tree nodes;
8506         struct extent_io_tree excluded_extents;
8507         struct cache_tree corrupt_blocks;
8508         struct btrfs_path path;
8509         struct btrfs_key key;
8510         struct btrfs_key found_key;
8511         int ret, err = 0;
8512         struct block_info *bits;
8513         int bits_nr;
8514         struct extent_buffer *leaf;
8515         int slot;
8516         struct btrfs_root_item ri;
8517         struct list_head dropping_trees;
8518         struct list_head normal_trees;
8519         struct btrfs_root *root1;
8520         u64 objectid;
8521         u32 level_size;
8522         u8 level;
8523
8524         dev_cache = RB_ROOT;
8525         cache_tree_init(&chunk_cache);
8526         block_group_tree_init(&block_group_cache);
8527         device_extent_tree_init(&dev_extent_cache);
8528
8529         cache_tree_init(&extent_cache);
8530         cache_tree_init(&seen);
8531         cache_tree_init(&pending);
8532         cache_tree_init(&nodes);
8533         cache_tree_init(&reada);
8534         cache_tree_init(&corrupt_blocks);
8535         extent_io_tree_init(&excluded_extents);
8536         INIT_LIST_HEAD(&dropping_trees);
8537         INIT_LIST_HEAD(&normal_trees);
8538
8539         if (repair) {
8540                 root->fs_info->excluded_extents = &excluded_extents;
8541                 root->fs_info->fsck_extent_cache = &extent_cache;
8542                 root->fs_info->free_extent_hook = free_extent_hook;
8543                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8544         }
8545
8546         bits_nr = 1024;
8547         bits = malloc(bits_nr * sizeof(struct block_info));
8548         if (!bits) {
8549                 perror("malloc");
8550                 exit(1);
8551         }
8552
8553         if (ctx.progress_enabled) {
8554                 ctx.tp = TASK_EXTENTS;
8555                 task_start(ctx.info);
8556         }
8557
8558 again:
8559         root1 = root->fs_info->tree_root;
8560         level = btrfs_header_level(root1->node);
8561         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8562                                     root1->node->start, 0, level, 0,
8563                                     root1->nodesize, NULL);
8564         if (ret < 0)
8565                 goto out;
8566         root1 = root->fs_info->chunk_root;
8567         level = btrfs_header_level(root1->node);
8568         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8569                                     root1->node->start, 0, level, 0,
8570                                     root1->nodesize, NULL);
8571         if (ret < 0)
8572                 goto out;
8573         btrfs_init_path(&path);
8574         key.offset = 0;
8575         key.objectid = 0;
8576         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8577         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8578                                         &key, &path, 0, 0);
8579         if (ret < 0)
8580                 goto out;
8581         while(1) {
8582                 leaf = path.nodes[0];
8583                 slot = path.slots[0];
8584                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8585                         ret = btrfs_next_leaf(root, &path);
8586                         if (ret != 0)
8587                                 break;
8588                         leaf = path.nodes[0];
8589                         slot = path.slots[0];
8590                 }
8591                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8592                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8593                         unsigned long offset;
8594                         u64 last_snapshot;
8595
8596                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8597                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8598                         last_snapshot = btrfs_root_last_snapshot(&ri);
8599                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8600                                 level = btrfs_root_level(&ri);
8601                                 level_size = root->nodesize;
8602                                 ret = add_root_item_to_list(&normal_trees,
8603                                                 found_key.objectid,
8604                                                 btrfs_root_bytenr(&ri),
8605                                                 last_snapshot, level,
8606                                                 0, level_size, NULL);
8607                                 if (ret < 0)
8608                                         goto out;
8609                         } else {
8610                                 level = btrfs_root_level(&ri);
8611                                 level_size = root->nodesize;
8612                                 objectid = found_key.objectid;
8613                                 btrfs_disk_key_to_cpu(&found_key,
8614                                                       &ri.drop_progress);
8615                                 ret = add_root_item_to_list(&dropping_trees,
8616                                                 objectid,
8617                                                 btrfs_root_bytenr(&ri),
8618                                                 last_snapshot, level,
8619                                                 ri.drop_level,
8620                                                 level_size, &found_key);
8621                                 if (ret < 0)
8622                                         goto out;
8623                         }
8624                 }
8625                 path.slots[0]++;
8626         }
8627         btrfs_release_path(&path);
8628
8629         /*
8630          * check_block can return -EAGAIN if it fixes something, please keep
8631          * this in mind when dealing with return values from these functions, if
8632          * we get -EAGAIN we want to fall through and restart the loop.
8633          */
8634         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8635                                   &seen, &reada, &nodes, &extent_cache,
8636                                   &chunk_cache, &dev_cache, &block_group_cache,
8637                                   &dev_extent_cache);
8638         if (ret < 0) {
8639                 if (ret == -EAGAIN)
8640                         goto loop;
8641                 goto out;
8642         }
8643         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8644                                   &pending, &seen, &reada, &nodes,
8645                                   &extent_cache, &chunk_cache, &dev_cache,
8646                                   &block_group_cache, &dev_extent_cache);
8647         if (ret < 0) {
8648                 if (ret == -EAGAIN)
8649                         goto loop;
8650                 goto out;
8651         }
8652
8653         ret = check_chunks(&chunk_cache, &block_group_cache,
8654                            &dev_extent_cache, NULL, NULL, NULL, 0);
8655         if (ret) {
8656                 if (ret == -EAGAIN)
8657                         goto loop;
8658                 err = ret;
8659         }
8660
8661         ret = check_extent_refs(root, &extent_cache);
8662         if (ret < 0) {
8663                 if (ret == -EAGAIN)
8664                         goto loop;
8665                 goto out;
8666         }
8667
8668         ret = check_devices(&dev_cache, &dev_extent_cache);
8669         if (ret && err)
8670                 ret = err;
8671
8672 out:
8673         task_stop(ctx.info);
8674         if (repair) {
8675                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8676                 extent_io_tree_cleanup(&excluded_extents);
8677                 root->fs_info->fsck_extent_cache = NULL;
8678                 root->fs_info->free_extent_hook = NULL;
8679                 root->fs_info->corrupt_blocks = NULL;
8680                 root->fs_info->excluded_extents = NULL;
8681         }
8682         free(bits);
8683         free_chunk_cache_tree(&chunk_cache);
8684         free_device_cache_tree(&dev_cache);
8685         free_block_group_tree(&block_group_cache);
8686         free_device_extent_tree(&dev_extent_cache);
8687         free_extent_cache_tree(&seen);
8688         free_extent_cache_tree(&pending);
8689         free_extent_cache_tree(&reada);
8690         free_extent_cache_tree(&nodes);
8691         return ret;
8692 loop:
8693         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8694         free_extent_cache_tree(&seen);
8695         free_extent_cache_tree(&pending);
8696         free_extent_cache_tree(&reada);
8697         free_extent_cache_tree(&nodes);
8698         free_chunk_cache_tree(&chunk_cache);
8699         free_block_group_tree(&block_group_cache);
8700         free_device_cache_tree(&dev_cache);
8701         free_device_extent_tree(&dev_extent_cache);
8702         free_extent_record_cache(root->fs_info, &extent_cache);
8703         free_root_item_list(&normal_trees);
8704         free_root_item_list(&dropping_trees);
8705         extent_io_tree_cleanup(&excluded_extents);
8706         goto again;
8707 }
8708
8709 /*
8710  * Check backrefs of a tree block given by @bytenr or @eb.
8711  *
8712  * @root:       the root containing the @bytenr or @eb
8713  * @eb:         tree block extent buffer, can be NULL
8714  * @bytenr:     bytenr of the tree block to search
8715  * @level:      tree level of the tree block
8716  * @owner:      owner of the tree block
8717  *
8718  * Return >0 for any error found and output error message
8719  * Return 0 for no error found
8720  */
8721 static int check_tree_block_ref(struct btrfs_root *root,
8722                                 struct extent_buffer *eb, u64 bytenr,
8723                                 int level, u64 owner)
8724 {
8725         struct btrfs_key key;
8726         struct btrfs_root *extent_root = root->fs_info->extent_root;
8727         struct btrfs_path path;
8728         struct btrfs_extent_item *ei;
8729         struct btrfs_extent_inline_ref *iref;
8730         struct extent_buffer *leaf;
8731         unsigned long end;
8732         unsigned long ptr;
8733         int slot;
8734         int skinny_level;
8735         int type;
8736         u32 nodesize = root->nodesize;
8737         u32 item_size;
8738         u64 offset;
8739         int found_ref = 0;
8740         int err = 0;
8741         int ret;
8742
8743         btrfs_init_path(&path);
8744         key.objectid = bytenr;
8745         if (btrfs_fs_incompat(root->fs_info,
8746                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8747                 key.type = BTRFS_METADATA_ITEM_KEY;
8748         else
8749                 key.type = BTRFS_EXTENT_ITEM_KEY;
8750         key.offset = (u64)-1;
8751
8752         /* Search for the backref in extent tree */
8753         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8754         if (ret < 0) {
8755                 err |= BACKREF_MISSING;
8756                 goto out;
8757         }
8758         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8759         if (ret) {
8760                 err |= BACKREF_MISSING;
8761                 goto out;
8762         }
8763
8764         leaf = path.nodes[0];
8765         slot = path.slots[0];
8766         btrfs_item_key_to_cpu(leaf, &key, slot);
8767
8768         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8769
8770         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8771                 skinny_level = (int)key.offset;
8772                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8773         } else {
8774                 struct btrfs_tree_block_info *info;
8775
8776                 info = (struct btrfs_tree_block_info *)(ei + 1);
8777                 skinny_level = btrfs_tree_block_level(leaf, info);
8778                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8779         }
8780
8781         if (eb) {
8782                 u64 header_gen;
8783                 u64 extent_gen;
8784
8785                 if (!(btrfs_extent_flags(leaf, ei) &
8786                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8787                         error(
8788                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8789                                 key.objectid, nodesize,
8790                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8791                         err = BACKREF_MISMATCH;
8792                 }
8793                 header_gen = btrfs_header_generation(eb);
8794                 extent_gen = btrfs_extent_generation(leaf, ei);
8795                 if (header_gen != extent_gen) {
8796                         error(
8797         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8798                                 key.objectid, nodesize, header_gen,
8799                                 extent_gen);
8800                         err = BACKREF_MISMATCH;
8801                 }
8802                 if (level != skinny_level) {
8803                         error(
8804                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8805                                 key.objectid, nodesize, level, skinny_level);
8806                         err = BACKREF_MISMATCH;
8807                 }
8808                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8809                         error(
8810                         "extent[%llu %u] is referred by other roots than %llu",
8811                                 key.objectid, nodesize, root->objectid);
8812                         err = BACKREF_MISMATCH;
8813                 }
8814         }
8815
8816         /*
8817          * Iterate the extent/metadata item to find the exact backref
8818          */
8819         item_size = btrfs_item_size_nr(leaf, slot);
8820         ptr = (unsigned long)iref;
8821         end = (unsigned long)ei + item_size;
8822         while (ptr < end) {
8823                 iref = (struct btrfs_extent_inline_ref *)ptr;
8824                 type = btrfs_extent_inline_ref_type(leaf, iref);
8825                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8826
8827                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8828                         (offset == root->objectid || offset == owner)) {
8829                         found_ref = 1;
8830                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8831                         /* Check if the backref points to valid referencer */
8832                         found_ref = !check_tree_block_ref(root, NULL, offset,
8833                                                           level + 1, owner);
8834                 }
8835
8836                 if (found_ref)
8837                         break;
8838                 ptr += btrfs_extent_inline_ref_size(type);
8839         }
8840
8841         /*
8842          * Inlined extent item doesn't have what we need, check
8843          * TREE_BLOCK_REF_KEY
8844          */
8845         if (!found_ref) {
8846                 btrfs_release_path(&path);
8847                 key.objectid = bytenr;
8848                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8849                 key.offset = root->objectid;
8850
8851                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8852                 if (!ret)
8853                         found_ref = 1;
8854         }
8855         if (!found_ref)
8856                 err |= BACKREF_MISSING;
8857 out:
8858         btrfs_release_path(&path);
8859         if (eb && (err & BACKREF_MISSING))
8860                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8861                         bytenr, nodesize, owner, level);
8862         return err;
8863 }
8864
8865 /*
8866  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8867  *
8868  * Return >0 any error found and output error message
8869  * Return 0 for no error found
8870  */
8871 static int check_extent_data_item(struct btrfs_root *root,
8872                                   struct extent_buffer *eb, int slot)
8873 {
8874         struct btrfs_file_extent_item *fi;
8875         struct btrfs_path path;
8876         struct btrfs_root *extent_root = root->fs_info->extent_root;
8877         struct btrfs_key fi_key;
8878         struct btrfs_key dbref_key;
8879         struct extent_buffer *leaf;
8880         struct btrfs_extent_item *ei;
8881         struct btrfs_extent_inline_ref *iref;
8882         struct btrfs_extent_data_ref *dref;
8883         u64 owner;
8884         u64 file_extent_gen;
8885         u64 disk_bytenr;
8886         u64 disk_num_bytes;
8887         u64 extent_num_bytes;
8888         u64 extent_flags;
8889         u64 extent_gen;
8890         u32 item_size;
8891         unsigned long end;
8892         unsigned long ptr;
8893         int type;
8894         u64 ref_root;
8895         int found_dbackref = 0;
8896         int err = 0;
8897         int ret;
8898
8899         btrfs_item_key_to_cpu(eb, &fi_key, slot);
8900         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8901         file_extent_gen = btrfs_file_extent_generation(eb, fi);
8902
8903         /* Nothing to check for hole and inline data extents */
8904         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8905             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8906                 return 0;
8907
8908         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8909         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8910         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8911
8912         /* Check unaligned disk_num_bytes and num_bytes */
8913         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8914                 error(
8915 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8916                         fi_key.objectid, fi_key.offset, disk_num_bytes,
8917                         root->sectorsize);
8918                 err |= BYTES_UNALIGNED;
8919         } else {
8920                 data_bytes_allocated += disk_num_bytes;
8921         }
8922         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8923                 error(
8924 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8925                         fi_key.objectid, fi_key.offset, extent_num_bytes,
8926                         root->sectorsize);
8927                 err |= BYTES_UNALIGNED;
8928         } else {
8929                 data_bytes_referenced += extent_num_bytes;
8930         }
8931         owner = btrfs_header_owner(eb);
8932
8933         /* Check the extent item of the file extent in extent tree */
8934         btrfs_init_path(&path);
8935         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8936         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8937         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8938
8939         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8940         if (ret) {
8941                 err |= BACKREF_MISSING;
8942                 goto error;
8943         }
8944
8945         leaf = path.nodes[0];
8946         slot = path.slots[0];
8947         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8948
8949         extent_flags = btrfs_extent_flags(leaf, ei);
8950         extent_gen = btrfs_extent_generation(leaf, ei);
8951
8952         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8953                 error(
8954                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8955                     disk_bytenr, disk_num_bytes,
8956                     BTRFS_EXTENT_FLAG_DATA);
8957                 err |= BACKREF_MISMATCH;
8958         }
8959
8960         if (file_extent_gen < extent_gen) {
8961                 error(
8962 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8963                         disk_bytenr, disk_num_bytes, file_extent_gen,
8964                         extent_gen);
8965                 err |= BACKREF_MISMATCH;
8966         }
8967
8968         /* Check data backref inside that extent item */
8969         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8970         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8971         ptr = (unsigned long)iref;
8972         end = (unsigned long)ei + item_size;
8973         while (ptr < end) {
8974                 iref = (struct btrfs_extent_inline_ref *)ptr;
8975                 type = btrfs_extent_inline_ref_type(leaf, iref);
8976                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8977
8978                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8979                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
8980                         if (ref_root == owner || ref_root == root->objectid)
8981                                 found_dbackref = 1;
8982                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8983                         found_dbackref = !check_tree_block_ref(root, NULL,
8984                                 btrfs_extent_inline_ref_offset(leaf, iref),
8985                                 0, owner);
8986                 }
8987
8988                 if (found_dbackref)
8989                         break;
8990                 ptr += btrfs_extent_inline_ref_size(type);
8991         }
8992
8993         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8994         if (!found_dbackref) {
8995                 btrfs_release_path(&path);
8996
8997                 btrfs_init_path(&path);
8998                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8999                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
9000                 dbref_key.offset = hash_extent_data_ref(root->objectid,
9001                                 fi_key.objectid, fi_key.offset);
9002
9003                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
9004                                         &dbref_key, &path, 0, 0);
9005                 if (!ret)
9006                         found_dbackref = 1;
9007         }
9008
9009         if (!found_dbackref)
9010                 err |= BACKREF_MISSING;
9011 error:
9012         btrfs_release_path(&path);
9013         if (err & BACKREF_MISSING) {
9014                 error("data extent[%llu %llu] backref lost",
9015                       disk_bytenr, disk_num_bytes);
9016         }
9017         return err;
9018 }
9019
9020 /*
9021  * Get real tree block level for the case like shared block
9022  * Return >= 0 as tree level
9023  * Return <0 for error
9024  */
9025 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
9026 {
9027         struct extent_buffer *eb;
9028         struct btrfs_path path;
9029         struct btrfs_key key;
9030         struct btrfs_extent_item *ei;
9031         u64 flags;
9032         u64 transid;
9033         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9034         u8 backref_level;
9035         u8 header_level;
9036         int ret;
9037
9038         /* Search extent tree for extent generation and level */
9039         key.objectid = bytenr;
9040         key.type = BTRFS_METADATA_ITEM_KEY;
9041         key.offset = (u64)-1;
9042
9043         btrfs_init_path(&path);
9044         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
9045         if (ret < 0)
9046                 goto release_out;
9047         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
9048         if (ret < 0)
9049                 goto release_out;
9050         if (ret > 0) {
9051                 ret = -ENOENT;
9052                 goto release_out;
9053         }
9054
9055         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9056         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9057                             struct btrfs_extent_item);
9058         flags = btrfs_extent_flags(path.nodes[0], ei);
9059         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9060                 ret = -ENOENT;
9061                 goto release_out;
9062         }
9063
9064         /* Get transid for later read_tree_block() check */
9065         transid = btrfs_extent_generation(path.nodes[0], ei);
9066
9067         /* Get backref level as one source */
9068         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9069                 backref_level = key.offset;
9070         } else {
9071                 struct btrfs_tree_block_info *info;
9072
9073                 info = (struct btrfs_tree_block_info *)(ei + 1);
9074                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9075         }
9076         btrfs_release_path(&path);
9077
9078         /* Get level from tree block as an alternative source */
9079         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9080         if (!extent_buffer_uptodate(eb)) {
9081                 free_extent_buffer(eb);
9082                 return -EIO;
9083         }
9084         header_level = btrfs_header_level(eb);
9085         free_extent_buffer(eb);
9086
9087         if (header_level != backref_level)
9088                 return -EIO;
9089         return header_level;
9090
9091 release_out:
9092         btrfs_release_path(&path);
9093         return ret;
9094 }
9095
9096 /*
9097  * Check if a tree block backref is valid (points to a valid tree block)
9098  * if level == -1, level will be resolved
9099  * Return >0 for any error found and print error message
9100  */
9101 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9102                                     u64 bytenr, int level)
9103 {
9104         struct btrfs_root *root;
9105         struct btrfs_key key;
9106         struct btrfs_path path;
9107         struct extent_buffer *eb;
9108         struct extent_buffer *node;
9109         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9110         int err = 0;
9111         int ret;
9112
9113         /* Query level for level == -1 special case */
9114         if (level == -1)
9115                 level = query_tree_block_level(fs_info, bytenr);
9116         if (level < 0) {
9117                 err |= REFERENCER_MISSING;
9118                 goto out;
9119         }
9120
9121         key.objectid = root_id;
9122         key.type = BTRFS_ROOT_ITEM_KEY;
9123         key.offset = (u64)-1;
9124
9125         root = btrfs_read_fs_root(fs_info, &key);
9126         if (IS_ERR(root)) {
9127                 err |= REFERENCER_MISSING;
9128                 goto out;
9129         }
9130
9131         /* Read out the tree block to get item/node key */
9132         eb = read_tree_block(root, bytenr, root->nodesize, 0);
9133         if (!extent_buffer_uptodate(eb)) {
9134                 err |= REFERENCER_MISSING;
9135                 free_extent_buffer(eb);
9136                 goto out;
9137         }
9138
9139         /* Empty tree, no need to check key */
9140         if (!btrfs_header_nritems(eb) && !level) {
9141                 free_extent_buffer(eb);
9142                 goto out;
9143         }
9144
9145         if (level)
9146                 btrfs_node_key_to_cpu(eb, &key, 0);
9147         else
9148                 btrfs_item_key_to_cpu(eb, &key, 0);
9149
9150         free_extent_buffer(eb);
9151
9152         btrfs_init_path(&path);
9153         /* Search with the first key, to ensure we can reach it */
9154         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9155         if (ret) {
9156                 err |= REFERENCER_MISSING;
9157                 goto release_out;
9158         }
9159
9160         node = path.nodes[level];
9161         if (btrfs_header_bytenr(node) != bytenr) {
9162                 error(
9163         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9164                         bytenr, nodesize, bytenr,
9165                         btrfs_header_bytenr(node));
9166                 err |= REFERENCER_MISMATCH;
9167         }
9168         if (btrfs_header_level(node) != level) {
9169                 error(
9170         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9171                         bytenr, nodesize, level,
9172                         btrfs_header_level(node));
9173                 err |= REFERENCER_MISMATCH;
9174         }
9175
9176 release_out:
9177         btrfs_release_path(&path);
9178 out:
9179         if (err & REFERENCER_MISSING) {
9180                 if (level < 0)
9181                         error("extent [%llu %d] lost referencer (owner: %llu)",
9182                                 bytenr, nodesize, root_id);
9183                 else
9184                         error(
9185                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9186                                 bytenr, nodesize, root_id, level);
9187         }
9188
9189         return err;
9190 }
9191
9192 /*
9193  * Check referencer for shared block backref
9194  * If level == -1, this function will resolve the level.
9195  */
9196 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9197                                      u64 parent, u64 bytenr, int level)
9198 {
9199         struct extent_buffer *eb;
9200         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9201         u32 nr;
9202         int found_parent = 0;
9203         int i;
9204
9205         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9206         if (!extent_buffer_uptodate(eb))
9207                 goto out;
9208
9209         if (level == -1)
9210                 level = query_tree_block_level(fs_info, bytenr);
9211         if (level < 0)
9212                 goto out;
9213
9214         if (level + 1 != btrfs_header_level(eb))
9215                 goto out;
9216
9217         nr = btrfs_header_nritems(eb);
9218         for (i = 0; i < nr; i++) {
9219                 if (bytenr == btrfs_node_blockptr(eb, i)) {
9220                         found_parent = 1;
9221                         break;
9222                 }
9223         }
9224 out:
9225         free_extent_buffer(eb);
9226         if (!found_parent) {
9227                 error(
9228         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9229                         bytenr, nodesize, parent, level);
9230                 return REFERENCER_MISSING;
9231         }
9232         return 0;
9233 }
9234
9235 /*
9236  * Check referencer for normal (inlined) data ref
9237  * If len == 0, it will be resolved by searching in extent tree
9238  */
9239 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9240                                      u64 root_id, u64 objectid, u64 offset,
9241                                      u64 bytenr, u64 len, u32 count)
9242 {
9243         struct btrfs_root *root;
9244         struct btrfs_root *extent_root = fs_info->extent_root;
9245         struct btrfs_key key;
9246         struct btrfs_path path;
9247         struct extent_buffer *leaf;
9248         struct btrfs_file_extent_item *fi;
9249         u32 found_count = 0;
9250         int slot;
9251         int ret = 0;
9252
9253         if (!len) {
9254                 key.objectid = bytenr;
9255                 key.type = BTRFS_EXTENT_ITEM_KEY;
9256                 key.offset = (u64)-1;
9257
9258                 btrfs_init_path(&path);
9259                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9260                 if (ret < 0)
9261                         goto out;
9262                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9263                 if (ret)
9264                         goto out;
9265                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9266                 if (key.objectid != bytenr ||
9267                     key.type != BTRFS_EXTENT_ITEM_KEY)
9268                         goto out;
9269                 len = key.offset;
9270                 btrfs_release_path(&path);
9271         }
9272         key.objectid = root_id;
9273         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
9274         key.offset = (u64)-1;
9275         btrfs_init_path(&path);
9276
9277         root = btrfs_read_fs_root(fs_info, &key);
9278         if (IS_ERR(root))
9279                 goto out;
9280
9281         key.objectid = objectid;
9282         key.type = BTRFS_EXTENT_DATA_KEY;
9283         /*
9284          * It can be nasty as data backref offset is
9285          * file offset - file extent offset, which is smaller or
9286          * equal to original backref offset.  The only special case is
9287          * overflow.  So we need to special check and do further search.
9288          */
9289         key.offset = offset & (1ULL << 63) ? 0 : offset;
9290
9291         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9292         if (ret < 0)
9293                 goto out;
9294
9295         /*
9296          * Search afterwards to get correct one
9297          * NOTE: As we must do a comprehensive check on the data backref to
9298          * make sure the dref count also matches, we must iterate all file
9299          * extents for that inode.
9300          */
9301         while (1) {
9302                 leaf = path.nodes[0];
9303                 slot = path.slots[0];
9304
9305                 btrfs_item_key_to_cpu(leaf, &key, slot);
9306                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9307                         break;
9308                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9309                 /*
9310                  * Except normal disk bytenr and disk num bytes, we still
9311                  * need to do extra check on dbackref offset as
9312                  * dbackref offset = file_offset - file_extent_offset
9313                  */
9314                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9315                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9316                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9317                     offset)
9318                         found_count++;
9319
9320                 ret = btrfs_next_item(root, &path);
9321                 if (ret)
9322                         break;
9323         }
9324 out:
9325         btrfs_release_path(&path);
9326         if (found_count != count) {
9327                 error(
9328 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9329                         bytenr, len, root_id, objectid, offset, count, found_count);
9330                 return REFERENCER_MISSING;
9331         }
9332         return 0;
9333 }
9334
9335 /*
9336  * Check if the referencer of a shared data backref exists
9337  */
9338 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9339                                      u64 parent, u64 bytenr)
9340 {
9341         struct extent_buffer *eb;
9342         struct btrfs_key key;
9343         struct btrfs_file_extent_item *fi;
9344         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9345         u32 nr;
9346         int found_parent = 0;
9347         int i;
9348
9349         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9350         if (!extent_buffer_uptodate(eb))
9351                 goto out;
9352
9353         nr = btrfs_header_nritems(eb);
9354         for (i = 0; i < nr; i++) {
9355                 btrfs_item_key_to_cpu(eb, &key, i);
9356                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9357                         continue;
9358
9359                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9360                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9361                         continue;
9362
9363                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9364                         found_parent = 1;
9365                         break;
9366                 }
9367         }
9368
9369 out:
9370         free_extent_buffer(eb);
9371         if (!found_parent) {
9372                 error("shared extent %llu referencer lost (parent: %llu)",
9373                         bytenr, parent);
9374                 return REFERENCER_MISSING;
9375         }
9376         return 0;
9377 }
9378
9379 /*
9380  * This function will check a given extent item, including its backref and
9381  * itself (like crossing stripe boundary and type)
9382  *
9383  * Since we don't use extent_record anymore, introduce new error bit
9384  */
9385 static int check_extent_item(struct btrfs_fs_info *fs_info,
9386                              struct extent_buffer *eb, int slot)
9387 {
9388         struct btrfs_extent_item *ei;
9389         struct btrfs_extent_inline_ref *iref;
9390         struct btrfs_extent_data_ref *dref;
9391         unsigned long end;
9392         unsigned long ptr;
9393         int type;
9394         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9395         u32 item_size = btrfs_item_size_nr(eb, slot);
9396         u64 flags;
9397         u64 offset;
9398         int metadata = 0;
9399         int level;
9400         struct btrfs_key key;
9401         int ret;
9402         int err = 0;
9403
9404         btrfs_item_key_to_cpu(eb, &key, slot);
9405         if (key.type == BTRFS_EXTENT_ITEM_KEY)
9406                 bytes_used += key.offset;
9407         else
9408                 bytes_used += nodesize;
9409
9410         if (item_size < sizeof(*ei)) {
9411                 /*
9412                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9413                  * old thing when on disk format is still un-determined.
9414                  * No need to care about it anymore
9415                  */
9416                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9417                 return -ENOTTY;
9418         }
9419
9420         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9421         flags = btrfs_extent_flags(eb, ei);
9422
9423         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9424                 metadata = 1;
9425         if (metadata && check_crossing_stripes(key.objectid, eb->len)) {
9426                 error("bad metadata [%llu, %llu) crossing stripe boundary",
9427                       key.objectid, key.objectid + nodesize);
9428                 err |= CROSSING_STRIPE_BOUNDARY;
9429         }
9430
9431         ptr = (unsigned long)(ei + 1);
9432
9433         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9434                 /* Old EXTENT_ITEM metadata */
9435                 struct btrfs_tree_block_info *info;
9436
9437                 info = (struct btrfs_tree_block_info *)ptr;
9438                 level = btrfs_tree_block_level(eb, info);
9439                 ptr += sizeof(struct btrfs_tree_block_info);
9440         } else {
9441                 /* New METADATA_ITEM */
9442                 level = key.offset;
9443         }
9444         end = (unsigned long)ei + item_size;
9445
9446         if (ptr >= end) {
9447                 err |= ITEM_SIZE_MISMATCH;
9448                 goto out;
9449         }
9450
9451         /* Now check every backref in this extent item */
9452 next:
9453         iref = (struct btrfs_extent_inline_ref *)ptr;
9454         type = btrfs_extent_inline_ref_type(eb, iref);
9455         offset = btrfs_extent_inline_ref_offset(eb, iref);
9456         switch (type) {
9457         case BTRFS_TREE_BLOCK_REF_KEY:
9458                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9459                                                level);
9460                 err |= ret;
9461                 break;
9462         case BTRFS_SHARED_BLOCK_REF_KEY:
9463                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9464                                                  level);
9465                 err |= ret;
9466                 break;
9467         case BTRFS_EXTENT_DATA_REF_KEY:
9468                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9469                 ret = check_extent_data_backref(fs_info,
9470                                 btrfs_extent_data_ref_root(eb, dref),
9471                                 btrfs_extent_data_ref_objectid(eb, dref),
9472                                 btrfs_extent_data_ref_offset(eb, dref),
9473                                 key.objectid, key.offset,
9474                                 btrfs_extent_data_ref_count(eb, dref));
9475                 err |= ret;
9476                 break;
9477         case BTRFS_SHARED_DATA_REF_KEY:
9478                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9479                 err |= ret;
9480                 break;
9481         default:
9482                 error("extent[%llu %d %llu] has unknown ref type: %d",
9483                         key.objectid, key.type, key.offset, type);
9484                 err |= UNKNOWN_TYPE;
9485                 goto out;
9486         }
9487
9488         ptr += btrfs_extent_inline_ref_size(type);
9489         if (ptr < end)
9490                 goto next;
9491
9492 out:
9493         return err;
9494 }
9495
9496 /*
9497  * Check if a dev extent item is referred correctly by its chunk
9498  */
9499 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9500                                  struct extent_buffer *eb, int slot)
9501 {
9502         struct btrfs_root *chunk_root = fs_info->chunk_root;
9503         struct btrfs_dev_extent *ptr;
9504         struct btrfs_path path;
9505         struct btrfs_key chunk_key;
9506         struct btrfs_key devext_key;
9507         struct btrfs_chunk *chunk;
9508         struct extent_buffer *l;
9509         int num_stripes;
9510         u64 length;
9511         int i;
9512         int found_chunk = 0;
9513         int ret;
9514
9515         btrfs_item_key_to_cpu(eb, &devext_key, slot);
9516         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9517         length = btrfs_dev_extent_length(eb, ptr);
9518
9519         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9520         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9521         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9522
9523         btrfs_init_path(&path);
9524         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9525         if (ret)
9526                 goto out;
9527
9528         l = path.nodes[0];
9529         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9530         if (btrfs_chunk_length(l, chunk) != length)
9531                 goto out;
9532
9533         num_stripes = btrfs_chunk_num_stripes(l, chunk);
9534         for (i = 0; i < num_stripes; i++) {
9535                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9536                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9537
9538                 if (devid == devext_key.objectid &&
9539                     offset == devext_key.offset) {
9540                         found_chunk = 1;
9541                         break;
9542                 }
9543         }
9544 out:
9545         btrfs_release_path(&path);
9546         if (!found_chunk) {
9547                 error(
9548                 "device extent[%llu, %llu, %llu] did not find the related chunk",
9549                         devext_key.objectid, devext_key.offset, length);
9550                 return REFERENCER_MISSING;
9551         }
9552         return 0;
9553 }
9554
9555 /*
9556  * Check if the used space is correct with the dev item
9557  */
9558 static int check_dev_item(struct btrfs_fs_info *fs_info,
9559                           struct extent_buffer *eb, int slot)
9560 {
9561         struct btrfs_root *dev_root = fs_info->dev_root;
9562         struct btrfs_dev_item *dev_item;
9563         struct btrfs_path path;
9564         struct btrfs_key key;
9565         struct btrfs_dev_extent *ptr;
9566         u64 dev_id;
9567         u64 used;
9568         u64 total = 0;
9569         int ret;
9570
9571         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9572         dev_id = btrfs_device_id(eb, dev_item);
9573         used = btrfs_device_bytes_used(eb, dev_item);
9574
9575         key.objectid = dev_id;
9576         key.type = BTRFS_DEV_EXTENT_KEY;
9577         key.offset = 0;
9578
9579         btrfs_init_path(&path);
9580         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9581         if (ret < 0) {
9582                 btrfs_item_key_to_cpu(eb, &key, slot);
9583                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9584                         key.objectid, key.type, key.offset);
9585                 btrfs_release_path(&path);
9586                 return REFERENCER_MISSING;
9587         }
9588
9589         /* Iterate dev_extents to calculate the used space of a device */
9590         while (1) {
9591                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9592
9593                 if (key.objectid > dev_id)
9594                         break;
9595                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9596                         goto next;
9597
9598                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9599                                      struct btrfs_dev_extent);
9600                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9601 next:
9602                 ret = btrfs_next_item(dev_root, &path);
9603                 if (ret)
9604                         break;
9605         }
9606         btrfs_release_path(&path);
9607
9608         if (used != total) {
9609                 btrfs_item_key_to_cpu(eb, &key, slot);
9610                 error(
9611 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9612                         total, used, BTRFS_ROOT_TREE_OBJECTID,
9613                         BTRFS_DEV_EXTENT_KEY, dev_id);
9614                 return ACCOUNTING_MISMATCH;
9615         }
9616         return 0;
9617 }
9618
9619 /*
9620  * Check a block group item with its referener (chunk) and its used space
9621  * with extent/metadata item
9622  */
9623 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9624                                   struct extent_buffer *eb, int slot)
9625 {
9626         struct btrfs_root *extent_root = fs_info->extent_root;
9627         struct btrfs_root *chunk_root = fs_info->chunk_root;
9628         struct btrfs_block_group_item *bi;
9629         struct btrfs_block_group_item bg_item;
9630         struct btrfs_path path;
9631         struct btrfs_key bg_key;
9632         struct btrfs_key chunk_key;
9633         struct btrfs_key extent_key;
9634         struct btrfs_chunk *chunk;
9635         struct extent_buffer *leaf;
9636         struct btrfs_extent_item *ei;
9637         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9638         u64 flags;
9639         u64 bg_flags;
9640         u64 used;
9641         u64 total = 0;
9642         int ret;
9643         int err = 0;
9644
9645         btrfs_item_key_to_cpu(eb, &bg_key, slot);
9646         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9647         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9648         used = btrfs_block_group_used(&bg_item);
9649         bg_flags = btrfs_block_group_flags(&bg_item);
9650
9651         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9652         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9653         chunk_key.offset = bg_key.objectid;
9654
9655         btrfs_init_path(&path);
9656         /* Search for the referencer chunk */
9657         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9658         if (ret) {
9659                 error(
9660                 "block group[%llu %llu] did not find the related chunk item",
9661                         bg_key.objectid, bg_key.offset);
9662                 err |= REFERENCER_MISSING;
9663         } else {
9664                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9665                                         struct btrfs_chunk);
9666                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9667                                                 bg_key.offset) {
9668                         error(
9669         "block group[%llu %llu] related chunk item length does not match",
9670                                 bg_key.objectid, bg_key.offset);
9671                         err |= REFERENCER_MISMATCH;
9672                 }
9673         }
9674         btrfs_release_path(&path);
9675
9676         /* Search from the block group bytenr */
9677         extent_key.objectid = bg_key.objectid;
9678         extent_key.type = 0;
9679         extent_key.offset = 0;
9680
9681         btrfs_init_path(&path);
9682         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9683         if (ret < 0)
9684                 goto out;
9685
9686         /* Iterate extent tree to account used space */
9687         while (1) {
9688                 leaf = path.nodes[0];
9689                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9690                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9691                         break;
9692
9693                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9694                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9695                         goto next;
9696                 if (extent_key.objectid < bg_key.objectid)
9697                         goto next;
9698
9699                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9700                         total += nodesize;
9701                 else
9702                         total += extent_key.offset;
9703
9704                 ei = btrfs_item_ptr(leaf, path.slots[0],
9705                                     struct btrfs_extent_item);
9706                 flags = btrfs_extent_flags(leaf, ei);
9707                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9708                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9709                                 error(
9710                         "bad extent[%llu, %llu) type mismatch with chunk",
9711                                         extent_key.objectid,
9712                                         extent_key.objectid + extent_key.offset);
9713                                 err |= CHUNK_TYPE_MISMATCH;
9714                         }
9715                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9716                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9717                                     BTRFS_BLOCK_GROUP_METADATA))) {
9718                                 error(
9719                         "bad extent[%llu, %llu) type mismatch with chunk",
9720                                         extent_key.objectid,
9721                                         extent_key.objectid + nodesize);
9722                                 err |= CHUNK_TYPE_MISMATCH;
9723                         }
9724                 }
9725 next:
9726                 ret = btrfs_next_item(extent_root, &path);
9727                 if (ret)
9728                         break;
9729         }
9730
9731 out:
9732         btrfs_release_path(&path);
9733
9734         if (total != used) {
9735                 error(
9736                 "block group[%llu %llu] used %llu but extent items used %llu",
9737                         bg_key.objectid, bg_key.offset, used, total);
9738                 err |= ACCOUNTING_MISMATCH;
9739         }
9740         return err;
9741 }
9742
9743 /*
9744  * Check a chunk item.
9745  * Including checking all referred dev_extents and block group
9746  */
9747 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9748                             struct extent_buffer *eb, int slot)
9749 {
9750         struct btrfs_root *extent_root = fs_info->extent_root;
9751         struct btrfs_root *dev_root = fs_info->dev_root;
9752         struct btrfs_path path;
9753         struct btrfs_key chunk_key;
9754         struct btrfs_key bg_key;
9755         struct btrfs_key devext_key;
9756         struct btrfs_chunk *chunk;
9757         struct extent_buffer *leaf;
9758         struct btrfs_block_group_item *bi;
9759         struct btrfs_block_group_item bg_item;
9760         struct btrfs_dev_extent *ptr;
9761         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9762         u64 length;
9763         u64 chunk_end;
9764         u64 type;
9765         u64 profile;
9766         int num_stripes;
9767         u64 offset;
9768         u64 objectid;
9769         int i;
9770         int ret;
9771         int err = 0;
9772
9773         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9774         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9775         length = btrfs_chunk_length(eb, chunk);
9776         chunk_end = chunk_key.offset + length;
9777         if (!IS_ALIGNED(length, sectorsize)) {
9778                 error("chunk[%llu %llu) not aligned to %u",
9779                         chunk_key.offset, chunk_end, sectorsize);
9780                 err |= BYTES_UNALIGNED;
9781                 goto out;
9782         }
9783
9784         type = btrfs_chunk_type(eb, chunk);
9785         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9786         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9787                 error("chunk[%llu %llu) has no chunk type",
9788                         chunk_key.offset, chunk_end);
9789                 err |= UNKNOWN_TYPE;
9790         }
9791         if (profile && (profile & (profile - 1))) {
9792                 error("chunk[%llu %llu) multiple profiles detected: %llx",
9793                         chunk_key.offset, chunk_end, profile);
9794                 err |= UNKNOWN_TYPE;
9795         }
9796
9797         bg_key.objectid = chunk_key.offset;
9798         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9799         bg_key.offset = length;
9800
9801         btrfs_init_path(&path);
9802         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9803         if (ret) {
9804                 error(
9805                 "chunk[%llu %llu) did not find the related block group item",
9806                         chunk_key.offset, chunk_end);
9807                 err |= REFERENCER_MISSING;
9808         } else{
9809                 leaf = path.nodes[0];
9810                 bi = btrfs_item_ptr(leaf, path.slots[0],
9811                                     struct btrfs_block_group_item);
9812                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9813                                    sizeof(bg_item));
9814                 if (btrfs_block_group_flags(&bg_item) != type) {
9815                         error(
9816 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9817                                 chunk_key.offset, chunk_end, type,
9818                                 btrfs_block_group_flags(&bg_item));
9819                         err |= REFERENCER_MISSING;
9820                 }
9821         }
9822
9823         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9824         for (i = 0; i < num_stripes; i++) {
9825                 btrfs_release_path(&path);
9826                 btrfs_init_path(&path);
9827                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9828                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9829                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9830
9831                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9832                                         0, 0);
9833                 if (ret)
9834                         goto not_match_dev;
9835
9836                 leaf = path.nodes[0];
9837                 ptr = btrfs_item_ptr(leaf, path.slots[0],
9838                                      struct btrfs_dev_extent);
9839                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9840                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9841                 if (objectid != chunk_key.objectid ||
9842                     offset != chunk_key.offset ||
9843                     btrfs_dev_extent_length(leaf, ptr) != length)
9844                         goto not_match_dev;
9845                 continue;
9846 not_match_dev:
9847                 err |= BACKREF_MISSING;
9848                 error(
9849                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9850                         chunk_key.objectid, chunk_end, i);
9851                 continue;
9852         }
9853         btrfs_release_path(&path);
9854 out:
9855         return err;
9856 }
9857
9858 /*
9859  * Main entry function to check known items and update related accounting info
9860  */
9861 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9862 {
9863         struct btrfs_fs_info *fs_info = root->fs_info;
9864         struct btrfs_key key;
9865         int slot = 0;
9866         int type;
9867         struct btrfs_extent_data_ref *dref;
9868         int ret;
9869         int err = 0;
9870
9871 next:
9872         btrfs_item_key_to_cpu(eb, &key, slot);
9873         type = btrfs_key_type(&key);
9874
9875         switch (type) {
9876         case BTRFS_EXTENT_DATA_KEY:
9877                 ret = check_extent_data_item(root, eb, slot);
9878                 err |= ret;
9879                 break;
9880         case BTRFS_BLOCK_GROUP_ITEM_KEY:
9881                 ret = check_block_group_item(fs_info, eb, slot);
9882                 err |= ret;
9883                 break;
9884         case BTRFS_DEV_ITEM_KEY:
9885                 ret = check_dev_item(fs_info, eb, slot);
9886                 err |= ret;
9887                 break;
9888         case BTRFS_CHUNK_ITEM_KEY:
9889                 ret = check_chunk_item(fs_info, eb, slot);
9890                 err |= ret;
9891                 break;
9892         case BTRFS_DEV_EXTENT_KEY:
9893                 ret = check_dev_extent_item(fs_info, eb, slot);
9894                 err |= ret;
9895                 break;
9896         case BTRFS_EXTENT_ITEM_KEY:
9897         case BTRFS_METADATA_ITEM_KEY:
9898                 ret = check_extent_item(fs_info, eb, slot);
9899                 err |= ret;
9900                 break;
9901         case BTRFS_EXTENT_CSUM_KEY:
9902                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9903                 break;
9904         case BTRFS_TREE_BLOCK_REF_KEY:
9905                 ret = check_tree_block_backref(fs_info, key.offset,
9906                                                key.objectid, -1);
9907                 err |= ret;
9908                 break;
9909         case BTRFS_EXTENT_DATA_REF_KEY:
9910                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9911                 ret = check_extent_data_backref(fs_info,
9912                                 btrfs_extent_data_ref_root(eb, dref),
9913                                 btrfs_extent_data_ref_objectid(eb, dref),
9914                                 btrfs_extent_data_ref_offset(eb, dref),
9915                                 key.objectid, 0,
9916                                 btrfs_extent_data_ref_count(eb, dref));
9917                 err |= ret;
9918                 break;
9919         case BTRFS_SHARED_BLOCK_REF_KEY:
9920                 ret = check_shared_block_backref(fs_info, key.offset,
9921                                                  key.objectid, -1);
9922                 err |= ret;
9923                 break;
9924         case BTRFS_SHARED_DATA_REF_KEY:
9925                 ret = check_shared_data_backref(fs_info, key.offset,
9926                                                 key.objectid);
9927                 err |= ret;
9928                 break;
9929         default:
9930                 break;
9931         }
9932
9933         if (++slot < btrfs_header_nritems(eb))
9934                 goto next;
9935
9936         return err;
9937 }
9938
9939 /*
9940  * Helper function for later fs/subvol tree check.  To determine if a tree
9941  * block should be checked.
9942  * This function will ensure only the direct referencer with lowest rootid to
9943  * check a fs/subvolume tree block.
9944  *
9945  * Backref check at extent tree would detect errors like missing subvolume
9946  * tree, so we can do aggressive check to reduce duplicated checks.
9947  */
9948 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9949 {
9950         struct btrfs_root *extent_root = root->fs_info->extent_root;
9951         struct btrfs_key key;
9952         struct btrfs_path path;
9953         struct extent_buffer *leaf;
9954         int slot;
9955         struct btrfs_extent_item *ei;
9956         unsigned long ptr;
9957         unsigned long end;
9958         int type;
9959         u32 item_size;
9960         u64 offset;
9961         struct btrfs_extent_inline_ref *iref;
9962         int ret;
9963
9964         btrfs_init_path(&path);
9965         key.objectid = btrfs_header_bytenr(eb);
9966         key.type = BTRFS_METADATA_ITEM_KEY;
9967         key.offset = (u64)-1;
9968
9969         /*
9970          * Any failure in backref resolving means we can't determine
9971          * whom the tree block belongs to.
9972          * So in that case, we need to check that tree block
9973          */
9974         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9975         if (ret < 0)
9976                 goto need_check;
9977
9978         ret = btrfs_previous_extent_item(extent_root, &path,
9979                                          btrfs_header_bytenr(eb));
9980         if (ret)
9981                 goto need_check;
9982
9983         leaf = path.nodes[0];
9984         slot = path.slots[0];
9985         btrfs_item_key_to_cpu(leaf, &key, slot);
9986         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9987
9988         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9989                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9990         } else {
9991                 struct btrfs_tree_block_info *info;
9992
9993                 info = (struct btrfs_tree_block_info *)(ei + 1);
9994                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9995         }
9996
9997         item_size = btrfs_item_size_nr(leaf, slot);
9998         ptr = (unsigned long)iref;
9999         end = (unsigned long)ei + item_size;
10000         while (ptr < end) {
10001                 iref = (struct btrfs_extent_inline_ref *)ptr;
10002                 type = btrfs_extent_inline_ref_type(leaf, iref);
10003                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10004
10005                 /*
10006                  * We only check the tree block if current root is
10007                  * the lowest referencer of it.
10008                  */
10009                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10010                     offset < root->objectid) {
10011                         btrfs_release_path(&path);
10012                         return 0;
10013                 }
10014
10015                 ptr += btrfs_extent_inline_ref_size(type);
10016         }
10017         /*
10018          * Normally we should also check keyed tree block ref, but that may be
10019          * very time consuming.  Inlined ref should already make us skip a lot
10020          * of refs now.  So skip search keyed tree block ref.
10021          */
10022
10023 need_check:
10024         btrfs_release_path(&path);
10025         return 1;
10026 }
10027
10028 /*
10029  * Traversal function for tree block. We will do:
10030  * 1) Skip shared fs/subvolume tree blocks
10031  * 2) Update related bytes accounting
10032  * 3) Pre-order traversal
10033  */
10034 static int traverse_tree_block(struct btrfs_root *root,
10035                                 struct extent_buffer *node)
10036 {
10037         struct extent_buffer *eb;
10038         int level;
10039         u64 nr;
10040         int i;
10041         int err = 0;
10042         int ret;
10043
10044         /*
10045          * Skip shared fs/subvolume tree block, in that case they will
10046          * be checked by referencer with lowest rootid
10047          */
10048         if (is_fstree(root->objectid) && !should_check(root, node))
10049                 return 0;
10050
10051         /* Update bytes accounting */
10052         total_btree_bytes += node->len;
10053         if (fs_root_objectid(btrfs_header_owner(node)))
10054                 total_fs_tree_bytes += node->len;
10055         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
10056                 total_extent_tree_bytes += node->len;
10057         if (!found_old_backref &&
10058             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
10059             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
10060             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
10061                 found_old_backref = 1;
10062
10063         /* pre-order tranversal, check itself first */
10064         level = btrfs_header_level(node);
10065         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
10066                                    btrfs_header_level(node),
10067                                    btrfs_header_owner(node));
10068         err |= ret;
10069         if (err)
10070                 error(
10071         "check %s failed root %llu bytenr %llu level %d, force continue check",
10072                         level ? "node":"leaf", root->objectid,
10073                         btrfs_header_bytenr(node), btrfs_header_level(node));
10074
10075         if (!level) {
10076                 btree_space_waste += btrfs_leaf_free_space(root, node);
10077                 ret = check_leaf_items(root, node);
10078                 err |= ret;
10079                 return err;
10080         }
10081
10082         nr = btrfs_header_nritems(node);
10083         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10084                 sizeof(struct btrfs_key_ptr);
10085
10086         /* Then check all its children */
10087         for (i = 0; i < nr; i++) {
10088                 u64 blocknr = btrfs_node_blockptr(node, i);
10089
10090                 /*
10091                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10092                  * to call the function itself.
10093                  */
10094                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10095                 if (extent_buffer_uptodate(eb)) {
10096                         ret = traverse_tree_block(root, eb);
10097                         err |= ret;
10098                 }
10099                 free_extent_buffer(eb);
10100         }
10101
10102         return err;
10103 }
10104
10105 /*
10106  * Low memory usage version check_chunks_and_extents.
10107  */
10108 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10109 {
10110         struct btrfs_path path;
10111         struct btrfs_key key;
10112         struct btrfs_root *root1;
10113         struct btrfs_root *cur_root;
10114         int err = 0;
10115         int ret;
10116
10117         root1 = root->fs_info->chunk_root;
10118         ret = traverse_tree_block(root1, root1->node);
10119         err |= ret;
10120
10121         root1 = root->fs_info->tree_root;
10122         ret = traverse_tree_block(root1, root1->node);
10123         err |= ret;
10124
10125         btrfs_init_path(&path);
10126         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10127         key.offset = 0;
10128         key.type = BTRFS_ROOT_ITEM_KEY;
10129
10130         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10131         if (ret) {
10132                 error("cannot find extent treet in tree_root");
10133                 goto out;
10134         }
10135
10136         while (1) {
10137                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10138                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10139                         goto next;
10140                 key.offset = (u64)-1;
10141
10142                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10143                 if (IS_ERR(cur_root) || !cur_root) {
10144                         error("failed to read tree: %lld", key.objectid);
10145                         goto next;
10146                 }
10147
10148                 ret = traverse_tree_block(cur_root, cur_root->node);
10149                 err |= ret;
10150
10151 next:
10152                 ret = btrfs_next_item(root1, &path);
10153                 if (ret)
10154                         goto out;
10155         }
10156
10157 out:
10158         btrfs_release_path(&path);
10159         return err;
10160 }
10161
10162 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10163                            struct btrfs_root *root, int overwrite)
10164 {
10165         struct extent_buffer *c;
10166         struct extent_buffer *old = root->node;
10167         int level;
10168         int ret;
10169         struct btrfs_disk_key disk_key = {0,0,0};
10170
10171         level = 0;
10172
10173         if (overwrite) {
10174                 c = old;
10175                 extent_buffer_get(c);
10176                 goto init;
10177         }
10178         c = btrfs_alloc_free_block(trans, root,
10179                                    root->nodesize,
10180                                    root->root_key.objectid,
10181                                    &disk_key, level, 0, 0);
10182         if (IS_ERR(c)) {
10183                 c = old;
10184                 extent_buffer_get(c);
10185                 overwrite = 1;
10186         }
10187 init:
10188         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10189         btrfs_set_header_level(c, level);
10190         btrfs_set_header_bytenr(c, c->start);
10191         btrfs_set_header_generation(c, trans->transid);
10192         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10193         btrfs_set_header_owner(c, root->root_key.objectid);
10194
10195         write_extent_buffer(c, root->fs_info->fsid,
10196                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
10197
10198         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10199                             btrfs_header_chunk_tree_uuid(c),
10200                             BTRFS_UUID_SIZE);
10201
10202         btrfs_mark_buffer_dirty(c);
10203         /*
10204          * this case can happen in the following case:
10205          *
10206          * 1.overwrite previous root.
10207          *
10208          * 2.reinit reloc data root, this is because we skip pin
10209          * down reloc data tree before which means we can allocate
10210          * same block bytenr here.
10211          */
10212         if (old->start == c->start) {
10213                 btrfs_set_root_generation(&root->root_item,
10214                                           trans->transid);
10215                 root->root_item.level = btrfs_header_level(root->node);
10216                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10217                                         &root->root_key, &root->root_item);
10218                 if (ret) {
10219                         free_extent_buffer(c);
10220                         return ret;
10221                 }
10222         }
10223         free_extent_buffer(old);
10224         root->node = c;
10225         add_root_to_dirty_list(root);
10226         return 0;
10227 }
10228
10229 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10230                                 struct extent_buffer *eb, int tree_root)
10231 {
10232         struct extent_buffer *tmp;
10233         struct btrfs_root_item *ri;
10234         struct btrfs_key key;
10235         u64 bytenr;
10236         u32 nodesize;
10237         int level = btrfs_header_level(eb);
10238         int nritems;
10239         int ret;
10240         int i;
10241
10242         /*
10243          * If we have pinned this block before, don't pin it again.
10244          * This can not only avoid forever loop with broken filesystem
10245          * but also give us some speedups.
10246          */
10247         if (test_range_bit(&fs_info->pinned_extents, eb->start,
10248                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10249                 return 0;
10250
10251         btrfs_pin_extent(fs_info, eb->start, eb->len);
10252
10253         nodesize = btrfs_super_nodesize(fs_info->super_copy);
10254         nritems = btrfs_header_nritems(eb);
10255         for (i = 0; i < nritems; i++) {
10256                 if (level == 0) {
10257                         btrfs_item_key_to_cpu(eb, &key, i);
10258                         if (key.type != BTRFS_ROOT_ITEM_KEY)
10259                                 continue;
10260                         /* Skip the extent root and reloc roots */
10261                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10262                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10263                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10264                                 continue;
10265                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10266                         bytenr = btrfs_disk_root_bytenr(eb, ri);
10267
10268                         /*
10269                          * If at any point we start needing the real root we
10270                          * will have to build a stump root for the root we are
10271                          * in, but for now this doesn't actually use the root so
10272                          * just pass in extent_root.
10273                          */
10274                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10275                                               nodesize, 0);
10276                         if (!extent_buffer_uptodate(tmp)) {
10277                                 fprintf(stderr, "Error reading root block\n");
10278                                 return -EIO;
10279                         }
10280                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
10281                         free_extent_buffer(tmp);
10282                         if (ret)
10283                                 return ret;
10284                 } else {
10285                         bytenr = btrfs_node_blockptr(eb, i);
10286
10287                         /* If we aren't the tree root don't read the block */
10288                         if (level == 1 && !tree_root) {
10289                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
10290                                 continue;
10291                         }
10292
10293                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10294                                               nodesize, 0);
10295                         if (!extent_buffer_uptodate(tmp)) {
10296                                 fprintf(stderr, "Error reading tree block\n");
10297                                 return -EIO;
10298                         }
10299                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10300                         free_extent_buffer(tmp);
10301                         if (ret)
10302                                 return ret;
10303                 }
10304         }
10305
10306         return 0;
10307 }
10308
10309 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10310 {
10311         int ret;
10312
10313         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10314         if (ret)
10315                 return ret;
10316
10317         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10318 }
10319
10320 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10321 {
10322         struct btrfs_block_group_cache *cache;
10323         struct btrfs_path *path;
10324         struct extent_buffer *leaf;
10325         struct btrfs_chunk *chunk;
10326         struct btrfs_key key;
10327         int ret;
10328         u64 start;
10329
10330         path = btrfs_alloc_path();
10331         if (!path)
10332                 return -ENOMEM;
10333
10334         key.objectid = 0;
10335         key.type = BTRFS_CHUNK_ITEM_KEY;
10336         key.offset = 0;
10337
10338         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10339         if (ret < 0) {
10340                 btrfs_free_path(path);
10341                 return ret;
10342         }
10343
10344         /*
10345          * We do this in case the block groups were screwed up and had alloc
10346          * bits that aren't actually set on the chunks.  This happens with
10347          * restored images every time and could happen in real life I guess.
10348          */
10349         fs_info->avail_data_alloc_bits = 0;
10350         fs_info->avail_metadata_alloc_bits = 0;
10351         fs_info->avail_system_alloc_bits = 0;
10352
10353         /* First we need to create the in-memory block groups */
10354         while (1) {
10355                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10356                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
10357                         if (ret < 0) {
10358                                 btrfs_free_path(path);
10359                                 return ret;
10360                         }
10361                         if (ret) {
10362                                 ret = 0;
10363                                 break;
10364                         }
10365                 }
10366                 leaf = path->nodes[0];
10367                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10368                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10369                         path->slots[0]++;
10370                         continue;
10371                 }
10372
10373                 chunk = btrfs_item_ptr(leaf, path->slots[0],
10374                                        struct btrfs_chunk);
10375                 btrfs_add_block_group(fs_info, 0,
10376                                       btrfs_chunk_type(leaf, chunk),
10377                                       key.objectid, key.offset,
10378                                       btrfs_chunk_length(leaf, chunk));
10379                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10380                                  key.offset + btrfs_chunk_length(leaf, chunk),
10381                                  GFP_NOFS);
10382                 path->slots[0]++;
10383         }
10384         start = 0;
10385         while (1) {
10386                 cache = btrfs_lookup_first_block_group(fs_info, start);
10387                 if (!cache)
10388                         break;
10389                 cache->cached = 1;
10390                 start = cache->key.objectid + cache->key.offset;
10391         }
10392
10393         btrfs_free_path(path);
10394         return 0;
10395 }
10396
10397 static int reset_balance(struct btrfs_trans_handle *trans,
10398                          struct btrfs_fs_info *fs_info)
10399 {
10400         struct btrfs_root *root = fs_info->tree_root;
10401         struct btrfs_path *path;
10402         struct extent_buffer *leaf;
10403         struct btrfs_key key;
10404         int del_slot, del_nr = 0;
10405         int ret;
10406         int found = 0;
10407
10408         path = btrfs_alloc_path();
10409         if (!path)
10410                 return -ENOMEM;
10411
10412         key.objectid = BTRFS_BALANCE_OBJECTID;
10413         key.type = BTRFS_BALANCE_ITEM_KEY;
10414         key.offset = 0;
10415
10416         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10417         if (ret) {
10418                 if (ret > 0)
10419                         ret = 0;
10420                 if (!ret)
10421                         goto reinit_data_reloc;
10422                 else
10423                         goto out;
10424         }
10425
10426         ret = btrfs_del_item(trans, root, path);
10427         if (ret)
10428                 goto out;
10429         btrfs_release_path(path);
10430
10431         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10432         key.type = BTRFS_ROOT_ITEM_KEY;
10433         key.offset = 0;
10434
10435         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10436         if (ret < 0)
10437                 goto out;
10438         while (1) {
10439                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10440                         if (!found)
10441                                 break;
10442
10443                         if (del_nr) {
10444                                 ret = btrfs_del_items(trans, root, path,
10445                                                       del_slot, del_nr);
10446                                 del_nr = 0;
10447                                 if (ret)
10448                                         goto out;
10449                         }
10450                         key.offset++;
10451                         btrfs_release_path(path);
10452
10453                         found = 0;
10454                         ret = btrfs_search_slot(trans, root, &key, path,
10455                                                 -1, 1);
10456                         if (ret < 0)
10457                                 goto out;
10458                         continue;
10459                 }
10460                 found = 1;
10461                 leaf = path->nodes[0];
10462                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10463                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10464                         break;
10465                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10466                         path->slots[0]++;
10467                         continue;
10468                 }
10469                 if (!del_nr) {
10470                         del_slot = path->slots[0];
10471                         del_nr = 1;
10472                 } else {
10473                         del_nr++;
10474                 }
10475                 path->slots[0]++;
10476         }
10477
10478         if (del_nr) {
10479                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10480                 if (ret)
10481                         goto out;
10482         }
10483         btrfs_release_path(path);
10484
10485 reinit_data_reloc:
10486         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10487         key.type = BTRFS_ROOT_ITEM_KEY;
10488         key.offset = (u64)-1;
10489         root = btrfs_read_fs_root(fs_info, &key);
10490         if (IS_ERR(root)) {
10491                 fprintf(stderr, "Error reading data reloc tree\n");
10492                 ret = PTR_ERR(root);
10493                 goto out;
10494         }
10495         record_root_in_trans(trans, root);
10496         ret = btrfs_fsck_reinit_root(trans, root, 0);
10497         if (ret)
10498                 goto out;
10499         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10500 out:
10501         btrfs_free_path(path);
10502         return ret;
10503 }
10504
10505 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10506                               struct btrfs_fs_info *fs_info)
10507 {
10508         u64 start = 0;
10509         int ret;
10510
10511         /*
10512          * The only reason we don't do this is because right now we're just
10513          * walking the trees we find and pinning down their bytes, we don't look
10514          * at any of the leaves.  In order to do mixed groups we'd have to check
10515          * the leaves of any fs roots and pin down the bytes for any file
10516          * extents we find.  Not hard but why do it if we don't have to?
10517          */
10518         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10519                 fprintf(stderr, "We don't support re-initing the extent tree "
10520                         "for mixed block groups yet, please notify a btrfs "
10521                         "developer you want to do this so they can add this "
10522                         "functionality.\n");
10523                 return -EINVAL;
10524         }
10525
10526         /*
10527          * first we need to walk all of the trees except the extent tree and pin
10528          * down the bytes that are in use so we don't overwrite any existing
10529          * metadata.
10530          */
10531         ret = pin_metadata_blocks(fs_info);
10532         if (ret) {
10533                 fprintf(stderr, "error pinning down used bytes\n");
10534                 return ret;
10535         }
10536
10537         /*
10538          * Need to drop all the block groups since we're going to recreate all
10539          * of them again.
10540          */
10541         btrfs_free_block_groups(fs_info);
10542         ret = reset_block_groups(fs_info);
10543         if (ret) {
10544                 fprintf(stderr, "error resetting the block groups\n");
10545                 return ret;
10546         }
10547
10548         /* Ok we can allocate now, reinit the extent root */
10549         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10550         if (ret) {
10551                 fprintf(stderr, "extent root initialization failed\n");
10552                 /*
10553                  * When the transaction code is updated we should end the
10554                  * transaction, but for now progs only knows about commit so
10555                  * just return an error.
10556                  */
10557                 return ret;
10558         }
10559
10560         /*
10561          * Now we have all the in-memory block groups setup so we can make
10562          * allocations properly, and the metadata we care about is safe since we
10563          * pinned all of it above.
10564          */
10565         while (1) {
10566                 struct btrfs_block_group_cache *cache;
10567
10568                 cache = btrfs_lookup_first_block_group(fs_info, start);
10569                 if (!cache)
10570                         break;
10571                 start = cache->key.objectid + cache->key.offset;
10572                 ret = btrfs_insert_item(trans, fs_info->extent_root,
10573                                         &cache->key, &cache->item,
10574                                         sizeof(cache->item));
10575                 if (ret) {
10576                         fprintf(stderr, "Error adding block group\n");
10577                         return ret;
10578                 }
10579                 btrfs_extent_post_op(trans, fs_info->extent_root);
10580         }
10581
10582         ret = reset_balance(trans, fs_info);
10583         if (ret)
10584                 fprintf(stderr, "error resetting the pending balance\n");
10585
10586         return ret;
10587 }
10588
10589 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10590 {
10591         struct btrfs_path *path;
10592         struct btrfs_trans_handle *trans;
10593         struct btrfs_key key;
10594         int ret;
10595
10596         printf("Recowing metadata block %llu\n", eb->start);
10597         key.objectid = btrfs_header_owner(eb);
10598         key.type = BTRFS_ROOT_ITEM_KEY;
10599         key.offset = (u64)-1;
10600
10601         root = btrfs_read_fs_root(root->fs_info, &key);
10602         if (IS_ERR(root)) {
10603                 fprintf(stderr, "Couldn't find owner root %llu\n",
10604                         key.objectid);
10605                 return PTR_ERR(root);
10606         }
10607
10608         path = btrfs_alloc_path();
10609         if (!path)
10610                 return -ENOMEM;
10611
10612         trans = btrfs_start_transaction(root, 1);
10613         if (IS_ERR(trans)) {
10614                 btrfs_free_path(path);
10615                 return PTR_ERR(trans);
10616         }
10617
10618         path->lowest_level = btrfs_header_level(eb);
10619         if (path->lowest_level)
10620                 btrfs_node_key_to_cpu(eb, &key, 0);
10621         else
10622                 btrfs_item_key_to_cpu(eb, &key, 0);
10623
10624         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10625         btrfs_commit_transaction(trans, root);
10626         btrfs_free_path(path);
10627         return ret;
10628 }
10629
10630 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10631 {
10632         struct btrfs_path *path;
10633         struct btrfs_trans_handle *trans;
10634         struct btrfs_key key;
10635         int ret;
10636
10637         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10638                bad->key.type, bad->key.offset);
10639         key.objectid = bad->root_id;
10640         key.type = BTRFS_ROOT_ITEM_KEY;
10641         key.offset = (u64)-1;
10642
10643         root = btrfs_read_fs_root(root->fs_info, &key);
10644         if (IS_ERR(root)) {
10645                 fprintf(stderr, "Couldn't find owner root %llu\n",
10646                         key.objectid);
10647                 return PTR_ERR(root);
10648         }
10649
10650         path = btrfs_alloc_path();
10651         if (!path)
10652                 return -ENOMEM;
10653
10654         trans = btrfs_start_transaction(root, 1);
10655         if (IS_ERR(trans)) {
10656                 btrfs_free_path(path);
10657                 return PTR_ERR(trans);
10658         }
10659
10660         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10661         if (ret) {
10662                 if (ret > 0)
10663                         ret = 0;
10664                 goto out;
10665         }
10666         ret = btrfs_del_item(trans, root, path);
10667 out:
10668         btrfs_commit_transaction(trans, root);
10669         btrfs_free_path(path);
10670         return ret;
10671 }
10672
10673 static int zero_log_tree(struct btrfs_root *root)
10674 {
10675         struct btrfs_trans_handle *trans;
10676         int ret;
10677
10678         trans = btrfs_start_transaction(root, 1);
10679         if (IS_ERR(trans)) {
10680                 ret = PTR_ERR(trans);
10681                 return ret;
10682         }
10683         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10684         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10685         ret = btrfs_commit_transaction(trans, root);
10686         return ret;
10687 }
10688
10689 static int populate_csum(struct btrfs_trans_handle *trans,
10690                          struct btrfs_root *csum_root, char *buf, u64 start,
10691                          u64 len)
10692 {
10693         u64 offset = 0;
10694         u64 sectorsize;
10695         int ret = 0;
10696
10697         while (offset < len) {
10698                 sectorsize = csum_root->sectorsize;
10699                 ret = read_extent_data(csum_root, buf, start + offset,
10700                                        &sectorsize, 0);
10701                 if (ret)
10702                         break;
10703                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10704                                             start + offset, buf, sectorsize);
10705                 if (ret)
10706                         break;
10707                 offset += sectorsize;
10708         }
10709         return ret;
10710 }
10711
10712 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10713                                       struct btrfs_root *csum_root,
10714                                       struct btrfs_root *cur_root)
10715 {
10716         struct btrfs_path *path;
10717         struct btrfs_key key;
10718         struct extent_buffer *node;
10719         struct btrfs_file_extent_item *fi;
10720         char *buf = NULL;
10721         u64 start = 0;
10722         u64 len = 0;
10723         int slot = 0;
10724         int ret = 0;
10725
10726         path = btrfs_alloc_path();
10727         if (!path)
10728                 return -ENOMEM;
10729         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10730         if (!buf) {
10731                 ret = -ENOMEM;
10732                 goto out;
10733         }
10734
10735         key.objectid = 0;
10736         key.offset = 0;
10737         key.type = 0;
10738
10739         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10740         if (ret < 0)
10741                 goto out;
10742         /* Iterate all regular file extents and fill its csum */
10743         while (1) {
10744                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10745
10746                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10747                         goto next;
10748                 node = path->nodes[0];
10749                 slot = path->slots[0];
10750                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10751                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10752                         goto next;
10753                 start = btrfs_file_extent_disk_bytenr(node, fi);
10754                 len = btrfs_file_extent_disk_num_bytes(node, fi);
10755
10756                 ret = populate_csum(trans, csum_root, buf, start, len);
10757                 if (ret == -EEXIST)
10758                         ret = 0;
10759                 if (ret < 0)
10760                         goto out;
10761 next:
10762                 /*
10763                  * TODO: if next leaf is corrupted, jump to nearest next valid
10764                  * leaf.
10765                  */
10766                 ret = btrfs_next_item(cur_root, path);
10767                 if (ret < 0)
10768                         goto out;
10769                 if (ret > 0) {
10770                         ret = 0;
10771                         goto out;
10772                 }
10773         }
10774
10775 out:
10776         btrfs_free_path(path);
10777         free(buf);
10778         return ret;
10779 }
10780
10781 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10782                                   struct btrfs_root *csum_root)
10783 {
10784         struct btrfs_fs_info *fs_info = csum_root->fs_info;
10785         struct btrfs_path *path;
10786         struct btrfs_root *tree_root = fs_info->tree_root;
10787         struct btrfs_root *cur_root;
10788         struct extent_buffer *node;
10789         struct btrfs_key key;
10790         int slot = 0;
10791         int ret = 0;
10792
10793         path = btrfs_alloc_path();
10794         if (!path)
10795                 return -ENOMEM;
10796
10797         key.objectid = BTRFS_FS_TREE_OBJECTID;
10798         key.offset = 0;
10799         key.type = BTRFS_ROOT_ITEM_KEY;
10800
10801         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10802         if (ret < 0)
10803                 goto out;
10804         if (ret > 0) {
10805                 ret = -ENOENT;
10806                 goto out;
10807         }
10808
10809         while (1) {
10810                 node = path->nodes[0];
10811                 slot = path->slots[0];
10812                 btrfs_item_key_to_cpu(node, &key, slot);
10813                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10814                         goto out;
10815                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10816                         goto next;
10817                 if (!is_fstree(key.objectid))
10818                         goto next;
10819                 key.offset = (u64)-1;
10820
10821                 cur_root = btrfs_read_fs_root(fs_info, &key);
10822                 if (IS_ERR(cur_root) || !cur_root) {
10823                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10824                                 key.objectid);
10825                         goto out;
10826                 }
10827                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10828                                 cur_root);
10829                 if (ret < 0)
10830                         goto out;
10831 next:
10832                 ret = btrfs_next_item(tree_root, path);
10833                 if (ret > 0) {
10834                         ret = 0;
10835                         goto out;
10836                 }
10837                 if (ret < 0)
10838                         goto out;
10839         }
10840
10841 out:
10842         btrfs_free_path(path);
10843         return ret;
10844 }
10845
10846 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10847                                       struct btrfs_root *csum_root)
10848 {
10849         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10850         struct btrfs_path *path;
10851         struct btrfs_extent_item *ei;
10852         struct extent_buffer *leaf;
10853         char *buf;
10854         struct btrfs_key key;
10855         int ret;
10856
10857         path = btrfs_alloc_path();
10858         if (!path)
10859                 return -ENOMEM;
10860
10861         key.objectid = 0;
10862         key.type = BTRFS_EXTENT_ITEM_KEY;
10863         key.offset = 0;
10864
10865         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10866         if (ret < 0) {
10867                 btrfs_free_path(path);
10868                 return ret;
10869         }
10870
10871         buf = malloc(csum_root->sectorsize);
10872         if (!buf) {
10873                 btrfs_free_path(path);
10874                 return -ENOMEM;
10875         }
10876
10877         while (1) {
10878                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10879                         ret = btrfs_next_leaf(extent_root, path);
10880                         if (ret < 0)
10881                                 break;
10882                         if (ret) {
10883                                 ret = 0;
10884                                 break;
10885                         }
10886                 }
10887                 leaf = path->nodes[0];
10888
10889                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10890                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10891                         path->slots[0]++;
10892                         continue;
10893                 }
10894
10895                 ei = btrfs_item_ptr(leaf, path->slots[0],
10896                                     struct btrfs_extent_item);
10897                 if (!(btrfs_extent_flags(leaf, ei) &
10898                       BTRFS_EXTENT_FLAG_DATA)) {
10899                         path->slots[0]++;
10900                         continue;
10901                 }
10902
10903                 ret = populate_csum(trans, csum_root, buf, key.objectid,
10904                                     key.offset);
10905                 if (ret)
10906                         break;
10907                 path->slots[0]++;
10908         }
10909
10910         btrfs_free_path(path);
10911         free(buf);
10912         return ret;
10913 }
10914
10915 /*
10916  * Recalculate the csum and put it into the csum tree.
10917  *
10918  * Extent tree init will wipe out all the extent info, so in that case, we
10919  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
10920  * will use fs/subvol trees to init the csum tree.
10921  */
10922 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10923                           struct btrfs_root *csum_root,
10924                           int search_fs_tree)
10925 {
10926         if (search_fs_tree)
10927                 return fill_csum_tree_from_fs(trans, csum_root);
10928         else
10929                 return fill_csum_tree_from_extent(trans, csum_root);
10930 }
10931
10932 static void free_roots_info_cache(void)
10933 {
10934         if (!roots_info_cache)
10935                 return;
10936
10937         while (!cache_tree_empty(roots_info_cache)) {
10938                 struct cache_extent *entry;
10939                 struct root_item_info *rii;
10940
10941                 entry = first_cache_extent(roots_info_cache);
10942                 if (!entry)
10943                         break;
10944                 remove_cache_extent(roots_info_cache, entry);
10945                 rii = container_of(entry, struct root_item_info, cache_extent);
10946                 free(rii);
10947         }
10948
10949         free(roots_info_cache);
10950         roots_info_cache = NULL;
10951 }
10952
10953 static int build_roots_info_cache(struct btrfs_fs_info *info)
10954 {
10955         int ret = 0;
10956         struct btrfs_key key;
10957         struct extent_buffer *leaf;
10958         struct btrfs_path *path;
10959
10960         if (!roots_info_cache) {
10961                 roots_info_cache = malloc(sizeof(*roots_info_cache));
10962                 if (!roots_info_cache)
10963                         return -ENOMEM;
10964                 cache_tree_init(roots_info_cache);
10965         }
10966
10967         path = btrfs_alloc_path();
10968         if (!path)
10969                 return -ENOMEM;
10970
10971         key.objectid = 0;
10972         key.type = BTRFS_EXTENT_ITEM_KEY;
10973         key.offset = 0;
10974
10975         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10976         if (ret < 0)
10977                 goto out;
10978         leaf = path->nodes[0];
10979
10980         while (1) {
10981                 struct btrfs_key found_key;
10982                 struct btrfs_extent_item *ei;
10983                 struct btrfs_extent_inline_ref *iref;
10984                 int slot = path->slots[0];
10985                 int type;
10986                 u64 flags;
10987                 u64 root_id;
10988                 u8 level;
10989                 struct cache_extent *entry;
10990                 struct root_item_info *rii;
10991
10992                 if (slot >= btrfs_header_nritems(leaf)) {
10993                         ret = btrfs_next_leaf(info->extent_root, path);
10994                         if (ret < 0) {
10995                                 break;
10996                         } else if (ret) {
10997                                 ret = 0;
10998                                 break;
10999                         }
11000                         leaf = path->nodes[0];
11001                         slot = path->slots[0];
11002                 }
11003
11004                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11005
11006                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
11007                     found_key.type != BTRFS_METADATA_ITEM_KEY)
11008                         goto next;
11009
11010                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11011                 flags = btrfs_extent_flags(leaf, ei);
11012
11013                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
11014                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
11015                         goto next;
11016
11017                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
11018                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11019                         level = found_key.offset;
11020                 } else {
11021                         struct btrfs_tree_block_info *binfo;
11022
11023                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
11024                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
11025                         level = btrfs_tree_block_level(leaf, binfo);
11026                 }
11027
11028                 /*
11029                  * For a root extent, it must be of the following type and the
11030                  * first (and only one) iref in the item.
11031                  */
11032                 type = btrfs_extent_inline_ref_type(leaf, iref);
11033                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
11034                         goto next;
11035
11036                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
11037                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11038                 if (!entry) {
11039                         rii = malloc(sizeof(struct root_item_info));
11040                         if (!rii) {
11041                                 ret = -ENOMEM;
11042                                 goto out;
11043                         }
11044                         rii->cache_extent.start = root_id;
11045                         rii->cache_extent.size = 1;
11046                         rii->level = (u8)-1;
11047                         entry = &rii->cache_extent;
11048                         ret = insert_cache_extent(roots_info_cache, entry);
11049                         ASSERT(ret == 0);
11050                 } else {
11051                         rii = container_of(entry, struct root_item_info,
11052                                            cache_extent);
11053                 }
11054
11055                 ASSERT(rii->cache_extent.start == root_id);
11056                 ASSERT(rii->cache_extent.size == 1);
11057
11058                 if (level > rii->level || rii->level == (u8)-1) {
11059                         rii->level = level;
11060                         rii->bytenr = found_key.objectid;
11061                         rii->gen = btrfs_extent_generation(leaf, ei);
11062                         rii->node_count = 1;
11063                 } else if (level == rii->level) {
11064                         rii->node_count++;
11065                 }
11066 next:
11067                 path->slots[0]++;
11068         }
11069
11070 out:
11071         btrfs_free_path(path);
11072
11073         return ret;
11074 }
11075
11076 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11077                                   struct btrfs_path *path,
11078                                   const struct btrfs_key *root_key,
11079                                   const int read_only_mode)
11080 {
11081         const u64 root_id = root_key->objectid;
11082         struct cache_extent *entry;
11083         struct root_item_info *rii;
11084         struct btrfs_root_item ri;
11085         unsigned long offset;
11086
11087         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11088         if (!entry) {
11089                 fprintf(stderr,
11090                         "Error: could not find extent items for root %llu\n",
11091                         root_key->objectid);
11092                 return -ENOENT;
11093         }
11094
11095         rii = container_of(entry, struct root_item_info, cache_extent);
11096         ASSERT(rii->cache_extent.start == root_id);
11097         ASSERT(rii->cache_extent.size == 1);
11098
11099         if (rii->node_count != 1) {
11100                 fprintf(stderr,
11101                         "Error: could not find btree root extent for root %llu\n",
11102                         root_id);
11103                 return -ENOENT;
11104         }
11105
11106         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11107         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11108
11109         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11110             btrfs_root_level(&ri) != rii->level ||
11111             btrfs_root_generation(&ri) != rii->gen) {
11112
11113                 /*
11114                  * If we're in repair mode but our caller told us to not update
11115                  * the root item, i.e. just check if it needs to be updated, don't
11116                  * print this message, since the caller will call us again shortly
11117                  * for the same root item without read only mode (the caller will
11118                  * open a transaction first).
11119                  */
11120                 if (!(read_only_mode && repair))
11121                         fprintf(stderr,
11122                                 "%sroot item for root %llu,"
11123                                 " current bytenr %llu, current gen %llu, current level %u,"
11124                                 " new bytenr %llu, new gen %llu, new level %u\n",
11125                                 (read_only_mode ? "" : "fixing "),
11126                                 root_id,
11127                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11128                                 btrfs_root_level(&ri),
11129                                 rii->bytenr, rii->gen, rii->level);
11130
11131                 if (btrfs_root_generation(&ri) > rii->gen) {
11132                         fprintf(stderr,
11133                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11134                                 root_id, btrfs_root_generation(&ri), rii->gen);
11135                         return -EINVAL;
11136                 }
11137
11138                 if (!read_only_mode) {
11139                         btrfs_set_root_bytenr(&ri, rii->bytenr);
11140                         btrfs_set_root_level(&ri, rii->level);
11141                         btrfs_set_root_generation(&ri, rii->gen);
11142                         write_extent_buffer(path->nodes[0], &ri,
11143                                             offset, sizeof(ri));
11144                 }
11145
11146                 return 1;
11147         }
11148
11149         return 0;
11150 }
11151
11152 /*
11153  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11154  * caused read-only snapshots to be corrupted if they were created at a moment
11155  * when the source subvolume/snapshot had orphan items. The issue was that the
11156  * on-disk root items became incorrect, referring to the pre orphan cleanup root
11157  * node instead of the post orphan cleanup root node.
11158  * So this function, and its callees, just detects and fixes those cases. Even
11159  * though the regression was for read-only snapshots, this function applies to
11160  * any snapshot/subvolume root.
11161  * This must be run before any other repair code - not doing it so, makes other
11162  * repair code delete or modify backrefs in the extent tree for example, which
11163  * will result in an inconsistent fs after repairing the root items.
11164  */
11165 static int repair_root_items(struct btrfs_fs_info *info)
11166 {
11167         struct btrfs_path *path = NULL;
11168         struct btrfs_key key;
11169         struct extent_buffer *leaf;
11170         struct btrfs_trans_handle *trans = NULL;
11171         int ret = 0;
11172         int bad_roots = 0;
11173         int need_trans = 0;
11174
11175         ret = build_roots_info_cache(info);
11176         if (ret)
11177                 goto out;
11178
11179         path = btrfs_alloc_path();
11180         if (!path) {
11181                 ret = -ENOMEM;
11182                 goto out;
11183         }
11184
11185         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11186         key.type = BTRFS_ROOT_ITEM_KEY;
11187         key.offset = 0;
11188
11189 again:
11190         /*
11191          * Avoid opening and committing transactions if a leaf doesn't have
11192          * any root items that need to be fixed, so that we avoid rotating
11193          * backup roots unnecessarily.
11194          */
11195         if (need_trans) {
11196                 trans = btrfs_start_transaction(info->tree_root, 1);
11197                 if (IS_ERR(trans)) {
11198                         ret = PTR_ERR(trans);
11199                         goto out;
11200                 }
11201         }
11202
11203         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11204                                 0, trans ? 1 : 0);
11205         if (ret < 0)
11206                 goto out;
11207         leaf = path->nodes[0];
11208
11209         while (1) {
11210                 struct btrfs_key found_key;
11211
11212                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11213                         int no_more_keys = find_next_key(path, &key);
11214
11215                         btrfs_release_path(path);
11216                         if (trans) {
11217                                 ret = btrfs_commit_transaction(trans,
11218                                                                info->tree_root);
11219                                 trans = NULL;
11220                                 if (ret < 0)
11221                                         goto out;
11222                         }
11223                         need_trans = 0;
11224                         if (no_more_keys)
11225                                 break;
11226                         goto again;
11227                 }
11228
11229                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11230
11231                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11232                         goto next;
11233                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11234                         goto next;
11235
11236                 ret = maybe_repair_root_item(info, path, &found_key,
11237                                              trans ? 0 : 1);
11238                 if (ret < 0)
11239                         goto out;
11240                 if (ret) {
11241                         if (!trans && repair) {
11242                                 need_trans = 1;
11243                                 key = found_key;
11244                                 btrfs_release_path(path);
11245                                 goto again;
11246                         }
11247                         bad_roots++;
11248                 }
11249 next:
11250                 path->slots[0]++;
11251         }
11252         ret = 0;
11253 out:
11254         free_roots_info_cache();
11255         btrfs_free_path(path);
11256         if (trans)
11257                 btrfs_commit_transaction(trans, info->tree_root);
11258         if (ret < 0)
11259                 return ret;
11260
11261         return bad_roots;
11262 }
11263
11264 const char * const cmd_check_usage[] = {
11265         "btrfs check [options] <device>",
11266         "Check structural integrity of a filesystem (unmounted).",
11267         "Check structural integrity of an unmounted filesystem. Verify internal",
11268         "trees' consistency and item connectivity. In the repair mode try to",
11269         "fix the problems found. ",
11270         "WARNING: the repair mode is considered dangerous",
11271         "",
11272         "-s|--super <superblock>     use this superblock copy",
11273         "-b|--backup                 use the first valid backup root copy",
11274         "--repair                    try to repair the filesystem",
11275         "--readonly                  run in read-only mode (default)",
11276         "--init-csum-tree            create a new CRC tree",
11277         "--init-extent-tree          create a new extent tree",
11278         "--mode <MODE>               select mode, allows to make some memory/IO",
11279         "                            trade-offs, where MODE is one of:",
11280         "                            original - read inodes and extents to memory (requires",
11281         "                                       more memory, does less IO)",
11282         "                            lowmem   - try to use less memory but read blocks again",
11283         "                                       when needed",
11284         "--check-data-csum           verify checksums of data blocks",
11285         "-Q|--qgroup-report           print a report on qgroup consistency",
11286         "-E|--subvol-extents <subvolid>",
11287         "                            print subvolume extents and sharing state",
11288         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
11289         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
11290         "-p|--progress               indicate progress",
11291         NULL
11292 };
11293
11294 int cmd_check(int argc, char **argv)
11295 {
11296         struct cache_tree root_cache;
11297         struct btrfs_root *root;
11298         struct btrfs_fs_info *info;
11299         u64 bytenr = 0;
11300         u64 subvolid = 0;
11301         u64 tree_root_bytenr = 0;
11302         u64 chunk_root_bytenr = 0;
11303         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11304         int ret;
11305         u64 num;
11306         int init_csum_tree = 0;
11307         int readonly = 0;
11308         int qgroup_report = 0;
11309         int qgroups_repaired = 0;
11310         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11311
11312         while(1) {
11313                 int c;
11314                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11315                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11316                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11317                         GETOPT_VAL_MODE };
11318                 static const struct option long_options[] = {
11319                         { "super", required_argument, NULL, 's' },
11320                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11321                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11322                         { "init-csum-tree", no_argument, NULL,
11323                                 GETOPT_VAL_INIT_CSUM },
11324                         { "init-extent-tree", no_argument, NULL,
11325                                 GETOPT_VAL_INIT_EXTENT },
11326                         { "check-data-csum", no_argument, NULL,
11327                                 GETOPT_VAL_CHECK_CSUM },
11328                         { "backup", no_argument, NULL, 'b' },
11329                         { "subvol-extents", required_argument, NULL, 'E' },
11330                         { "qgroup-report", no_argument, NULL, 'Q' },
11331                         { "tree-root", required_argument, NULL, 'r' },
11332                         { "chunk-root", required_argument, NULL,
11333                                 GETOPT_VAL_CHUNK_TREE },
11334                         { "progress", no_argument, NULL, 'p' },
11335                         { "mode", required_argument, NULL,
11336                                 GETOPT_VAL_MODE },
11337                         { NULL, 0, NULL, 0}
11338                 };
11339
11340                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11341                 if (c < 0)
11342                         break;
11343                 switch(c) {
11344                         case 'a': /* ignored */ break;
11345                         case 'b':
11346                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11347                                 break;
11348                         case 's':
11349                                 num = arg_strtou64(optarg);
11350                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11351                                         fprintf(stderr,
11352                                                 "ERROR: super mirror should be less than: %d\n",
11353                                                 BTRFS_SUPER_MIRROR_MAX);
11354                                         exit(1);
11355                                 }
11356                                 bytenr = btrfs_sb_offset(((int)num));
11357                                 printf("using SB copy %llu, bytenr %llu\n", num,
11358                                        (unsigned long long)bytenr);
11359                                 break;
11360                         case 'Q':
11361                                 qgroup_report = 1;
11362                                 break;
11363                         case 'E':
11364                                 subvolid = arg_strtou64(optarg);
11365                                 break;
11366                         case 'r':
11367                                 tree_root_bytenr = arg_strtou64(optarg);
11368                                 break;
11369                         case GETOPT_VAL_CHUNK_TREE:
11370                                 chunk_root_bytenr = arg_strtou64(optarg);
11371                                 break;
11372                         case 'p':
11373                                 ctx.progress_enabled = true;
11374                                 break;
11375                         case '?':
11376                         case 'h':
11377                                 usage(cmd_check_usage);
11378                         case GETOPT_VAL_REPAIR:
11379                                 printf("enabling repair mode\n");
11380                                 repair = 1;
11381                                 ctree_flags |= OPEN_CTREE_WRITES;
11382                                 break;
11383                         case GETOPT_VAL_READONLY:
11384                                 readonly = 1;
11385                                 break;
11386                         case GETOPT_VAL_INIT_CSUM:
11387                                 printf("Creating a new CRC tree\n");
11388                                 init_csum_tree = 1;
11389                                 repair = 1;
11390                                 ctree_flags |= OPEN_CTREE_WRITES;
11391                                 break;
11392                         case GETOPT_VAL_INIT_EXTENT:
11393                                 init_extent_tree = 1;
11394                                 ctree_flags |= (OPEN_CTREE_WRITES |
11395                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
11396                                 repair = 1;
11397                                 break;
11398                         case GETOPT_VAL_CHECK_CSUM:
11399                                 check_data_csum = 1;
11400                                 break;
11401                         case GETOPT_VAL_MODE:
11402                                 check_mode = parse_check_mode(optarg);
11403                                 if (check_mode == CHECK_MODE_UNKNOWN) {
11404                                         error("unknown mode: %s", optarg);
11405                                         exit(1);
11406                                 }
11407                                 break;
11408                 }
11409         }
11410
11411         if (check_argc_exact(argc - optind, 1))
11412                 usage(cmd_check_usage);
11413
11414         if (ctx.progress_enabled) {
11415                 ctx.tp = TASK_NOTHING;
11416                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11417         }
11418
11419         /* This check is the only reason for --readonly to exist */
11420         if (readonly && repair) {
11421                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
11422                 exit(1);
11423         }
11424
11425         /*
11426          * Not supported yet
11427          */
11428         if (repair && check_mode == CHECK_MODE_LOWMEM) {
11429                 error("Low memory mode doesn't support repair yet");
11430                 exit(1);
11431         }
11432
11433         radix_tree_init();
11434         cache_tree_init(&root_cache);
11435
11436         if((ret = check_mounted(argv[optind])) < 0) {
11437                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
11438                 goto err_out;
11439         } else if(ret) {
11440                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
11441                 ret = -EBUSY;
11442                 goto err_out;
11443         }
11444
11445         /* only allow partial opening under repair mode */
11446         if (repair)
11447                 ctree_flags |= OPEN_CTREE_PARTIAL;
11448
11449         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11450                                   chunk_root_bytenr, ctree_flags);
11451         if (!info) {
11452                 fprintf(stderr, "Couldn't open file system\n");
11453                 ret = -EIO;
11454                 goto err_out;
11455         }
11456
11457         global_info = info;
11458         root = info->fs_root;
11459
11460         /*
11461          * repair mode will force us to commit transaction which
11462          * will make us fail to load log tree when mounting.
11463          */
11464         if (repair && btrfs_super_log_root(info->super_copy)) {
11465                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
11466                 if (!ret) {
11467                         ret = 1;
11468                         goto close_out;
11469                 }
11470                 ret = zero_log_tree(root);
11471                 if (ret) {
11472                         fprintf(stderr, "fail to zero log tree\n");
11473                         goto close_out;
11474                 }
11475         }
11476
11477         uuid_unparse(info->super_copy->fsid, uuidbuf);
11478         if (qgroup_report) {
11479                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11480                        uuidbuf);
11481                 ret = qgroup_verify_all(info);
11482                 if (ret == 0)
11483                         report_qgroups(1);
11484                 goto close_out;
11485         }
11486         if (subvolid) {
11487                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11488                        subvolid, argv[optind], uuidbuf);
11489                 ret = print_extent_state(info, subvolid);
11490                 goto close_out;
11491         }
11492         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11493
11494         if (!extent_buffer_uptodate(info->tree_root->node) ||
11495             !extent_buffer_uptodate(info->dev_root->node) ||
11496             !extent_buffer_uptodate(info->chunk_root->node)) {
11497                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11498                 ret = -EIO;
11499                 goto close_out;
11500         }
11501
11502         if (init_extent_tree || init_csum_tree) {
11503                 struct btrfs_trans_handle *trans;
11504
11505                 trans = btrfs_start_transaction(info->extent_root, 0);
11506                 if (IS_ERR(trans)) {
11507                         fprintf(stderr, "Error starting transaction\n");
11508                         ret = PTR_ERR(trans);
11509                         goto close_out;
11510                 }
11511
11512                 if (init_extent_tree) {
11513                         printf("Creating a new extent tree\n");
11514                         ret = reinit_extent_tree(trans, info);
11515                         if (ret)
11516                                 goto close_out;
11517                 }
11518
11519                 if (init_csum_tree) {
11520                         fprintf(stderr, "Reinit crc root\n");
11521                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11522                         if (ret) {
11523                                 fprintf(stderr, "crc root initialization failed\n");
11524                                 ret = -EIO;
11525                                 goto close_out;
11526                         }
11527
11528                         ret = fill_csum_tree(trans, info->csum_root,
11529                                              init_extent_tree);
11530                         if (ret) {
11531                                 fprintf(stderr, "crc refilling failed\n");
11532                                 return -EIO;
11533                         }
11534                 }
11535                 /*
11536                  * Ok now we commit and run the normal fsck, which will add
11537                  * extent entries for all of the items it finds.
11538                  */
11539                 ret = btrfs_commit_transaction(trans, info->extent_root);
11540                 if (ret)
11541                         goto close_out;
11542         }
11543         if (!extent_buffer_uptodate(info->extent_root->node)) {
11544                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11545                 ret = -EIO;
11546                 goto close_out;
11547         }
11548         if (!extent_buffer_uptodate(info->csum_root->node)) {
11549                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
11550                 ret = -EIO;
11551                 goto close_out;
11552         }
11553
11554         if (!ctx.progress_enabled)
11555                 fprintf(stderr, "checking extents\n");
11556         if (check_mode == CHECK_MODE_LOWMEM)
11557                 ret = check_chunks_and_extents_v2(root);
11558         else
11559                 ret = check_chunks_and_extents(root);
11560         if (ret)
11561                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
11562
11563         ret = repair_root_items(info);
11564         if (ret < 0)
11565                 goto close_out;
11566         if (repair) {
11567                 fprintf(stderr, "Fixed %d roots.\n", ret);
11568                 ret = 0;
11569         } else if (ret > 0) {
11570                 fprintf(stderr,
11571                        "Found %d roots with an outdated root item.\n",
11572                        ret);
11573                 fprintf(stderr,
11574                         "Please run a filesystem check with the option --repair to fix them.\n");
11575                 ret = 1;
11576                 goto close_out;
11577         }
11578
11579         if (!ctx.progress_enabled) {
11580                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11581                         fprintf(stderr, "checking free space tree\n");
11582                 else
11583                         fprintf(stderr, "checking free space cache\n");
11584         }
11585         ret = check_space_cache(root);
11586         if (ret)
11587                 goto out;
11588
11589         /*
11590          * We used to have to have these hole extents in between our real
11591          * extents so if we don't have this flag set we need to make sure there
11592          * are no gaps in the file extents for inodes, otherwise we can just
11593          * ignore it when this happens.
11594          */
11595         no_holes = btrfs_fs_incompat(root->fs_info,
11596                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11597         if (!ctx.progress_enabled)
11598                 fprintf(stderr, "checking fs roots\n");
11599         ret = check_fs_roots(root, &root_cache);
11600         if (ret)
11601                 goto out;
11602
11603         fprintf(stderr, "checking csums\n");
11604         ret = check_csums(root);
11605         if (ret)
11606                 goto out;
11607
11608         fprintf(stderr, "checking root refs\n");
11609         ret = check_root_refs(root, &root_cache);
11610         if (ret)
11611                 goto out;
11612
11613         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11614                 struct extent_buffer *eb;
11615
11616                 eb = list_first_entry(&root->fs_info->recow_ebs,
11617                                       struct extent_buffer, recow);
11618                 list_del_init(&eb->recow);
11619                 ret = recow_extent_buffer(root, eb);
11620                 if (ret)
11621                         break;
11622         }
11623
11624         while (!list_empty(&delete_items)) {
11625                 struct bad_item *bad;
11626
11627                 bad = list_first_entry(&delete_items, struct bad_item, list);
11628                 list_del_init(&bad->list);
11629                 if (repair)
11630                         ret = delete_bad_item(root, bad);
11631                 free(bad);
11632         }
11633
11634         if (info->quota_enabled) {
11635                 int err;
11636                 fprintf(stderr, "checking quota groups\n");
11637                 err = qgroup_verify_all(info);
11638                 if (err)
11639                         goto out;
11640                 report_qgroups(0);
11641                 err = repair_qgroups(info, &qgroups_repaired);
11642                 if (err)
11643                         goto out;
11644         }
11645
11646         if (!list_empty(&root->fs_info->recow_ebs)) {
11647                 fprintf(stderr, "Transid errors in file system\n");
11648                 ret = 1;
11649         }
11650 out:
11651         /* Don't override original ret */
11652         if (!ret && qgroups_repaired)
11653                 ret = qgroups_repaired;
11654
11655         if (found_old_backref) { /*
11656                  * there was a disk format change when mixed
11657                  * backref was in testing tree. The old format
11658                  * existed about one week.
11659                  */
11660                 printf("\n * Found old mixed backref format. "
11661                        "The old format is not supported! *"
11662                        "\n * Please mount the FS in readonly mode, "
11663                        "backup data and re-format the FS. *\n\n");
11664                 ret = 1;
11665         }
11666         printf("found %llu bytes used err is %d\n",
11667                (unsigned long long)bytes_used, ret);
11668         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11669         printf("total tree bytes: %llu\n",
11670                (unsigned long long)total_btree_bytes);
11671         printf("total fs tree bytes: %llu\n",
11672                (unsigned long long)total_fs_tree_bytes);
11673         printf("total extent tree bytes: %llu\n",
11674                (unsigned long long)total_extent_tree_bytes);
11675         printf("btree space waste bytes: %llu\n",
11676                (unsigned long long)btree_space_waste);
11677         printf("file data blocks allocated: %llu\n referenced %llu\n",
11678                 (unsigned long long)data_bytes_allocated,
11679                 (unsigned long long)data_bytes_referenced);
11680
11681         free_qgroup_counts();
11682         free_root_recs_tree(&root_cache);
11683 close_out:
11684         close_ctree(root);
11685 err_out:
11686         if (ctx.progress_enabled)
11687                 task_deinit(ctx.info);
11688
11689         return ret;
11690 }