btrfs-progs: check: adjust command line options for the low-memory mode
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
76
77 enum btrfs_check_mode {
78         CHECK_MODE_ORIGINAL,
79         CHECK_MODE_LOWMEM,
80         CHECK_MODE_UNKNOWN,
81         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
82 };
83
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
85
86 struct extent_backref {
87         struct rb_node node;
88         unsigned int is_data:1;
89         unsigned int found_extent_tree:1;
90         unsigned int full_backref:1;
91         unsigned int found_ref:1;
92         unsigned int broken:1;
93 };
94
95 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
96 {
97         return rb_entry(node, struct extent_backref, node);
98 }
99
100 struct data_backref {
101         struct extent_backref node;
102         union {
103                 u64 parent;
104                 u64 root;
105         };
106         u64 owner;
107         u64 offset;
108         u64 disk_bytenr;
109         u64 bytes;
110         u64 ram_bytes;
111         u32 num_refs;
112         u32 found_ref;
113 };
114
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
116 {
117         return container_of(back, struct data_backref, node);
118 }
119
120 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
121 {
122         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
123         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
124         struct data_backref *back1 = to_data_backref(ext1);
125         struct data_backref *back2 = to_data_backref(ext2);
126
127         WARN_ON(!ext1->is_data);
128         WARN_ON(!ext2->is_data);
129
130         /* parent and root are a union, so this covers both */
131         if (back1->parent > back2->parent)
132                 return 1;
133         if (back1->parent < back2->parent)
134                 return -1;
135
136         /* This is a full backref and the parents match. */
137         if (back1->node.full_backref)
138                 return 0;
139
140         if (back1->owner > back2->owner)
141                 return 1;
142         if (back1->owner < back2->owner)
143                 return -1;
144
145         if (back1->offset > back2->offset)
146                 return 1;
147         if (back1->offset < back2->offset)
148                 return -1;
149
150         if (back1->bytes > back2->bytes)
151                 return 1;
152         if (back1->bytes < back2->bytes)
153                 return -1;
154
155         if (back1->found_ref && back2->found_ref) {
156                 if (back1->disk_bytenr > back2->disk_bytenr)
157                         return 1;
158                 if (back1->disk_bytenr < back2->disk_bytenr)
159                         return -1;
160
161                 if (back1->found_ref > back2->found_ref)
162                         return 1;
163                 if (back1->found_ref < back2->found_ref)
164                         return -1;
165         }
166
167         return 0;
168 }
169
170 /*
171  * Much like data_backref, just removed the undetermined members
172  * and change it to use list_head.
173  * During extent scan, it is stored in root->orphan_data_extent.
174  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
175  */
176 struct orphan_data_extent {
177         struct list_head list;
178         u64 root;
179         u64 objectid;
180         u64 offset;
181         u64 disk_bytenr;
182         u64 disk_len;
183 };
184
185 struct tree_backref {
186         struct extent_backref node;
187         union {
188                 u64 parent;
189                 u64 root;
190         };
191 };
192
193 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
194 {
195         return container_of(back, struct tree_backref, node);
196 }
197
198 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
199 {
200         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
201         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
202         struct tree_backref *back1 = to_tree_backref(ext1);
203         struct tree_backref *back2 = to_tree_backref(ext2);
204
205         WARN_ON(ext1->is_data);
206         WARN_ON(ext2->is_data);
207
208         /* parent and root are a union, so this covers both */
209         if (back1->parent > back2->parent)
210                 return 1;
211         if (back1->parent < back2->parent)
212                 return -1;
213
214         return 0;
215 }
216
217 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
218 {
219         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
220         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
221
222         if (ext1->is_data > ext2->is_data)
223                 return 1;
224
225         if (ext1->is_data < ext2->is_data)
226                 return -1;
227
228         if (ext1->full_backref > ext2->full_backref)
229                 return 1;
230         if (ext1->full_backref < ext2->full_backref)
231                 return -1;
232
233         if (ext1->is_data)
234                 return compare_data_backref(node1, node2);
235         else
236                 return compare_tree_backref(node1, node2);
237 }
238
239 /* Explicit initialization for extent_record::flag_block_full_backref */
240 enum { FLAG_UNSET = 2 };
241
242 struct extent_record {
243         struct list_head backrefs;
244         struct list_head dups;
245         struct rb_root backref_tree;
246         struct list_head list;
247         struct cache_extent cache;
248         struct btrfs_disk_key parent_key;
249         u64 start;
250         u64 max_size;
251         u64 nr;
252         u64 refs;
253         u64 extent_item_refs;
254         u64 generation;
255         u64 parent_generation;
256         u64 info_objectid;
257         u32 num_duplicates;
258         u8 info_level;
259         unsigned int flag_block_full_backref:2;
260         unsigned int found_rec:1;
261         unsigned int content_checked:1;
262         unsigned int owner_ref_checked:1;
263         unsigned int is_root:1;
264         unsigned int metadata:1;
265         unsigned int bad_full_backref:1;
266         unsigned int crossing_stripes:1;
267         unsigned int wrong_chunk_type:1;
268 };
269
270 static inline struct extent_record* to_extent_record(struct list_head *entry)
271 {
272         return container_of(entry, struct extent_record, list);
273 }
274
275 struct inode_backref {
276         struct list_head list;
277         unsigned int found_dir_item:1;
278         unsigned int found_dir_index:1;
279         unsigned int found_inode_ref:1;
280         unsigned int filetype:8;
281         int errors;
282         unsigned int ref_type;
283         u64 dir;
284         u64 index;
285         u16 namelen;
286         char name[0];
287 };
288
289 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
290 {
291         return list_entry(entry, struct inode_backref, list);
292 }
293
294 struct root_item_record {
295         struct list_head list;
296         u64 objectid;
297         u64 bytenr;
298         u64 last_snapshot;
299         u8 level;
300         u8 drop_level;
301         int level_size;
302         struct btrfs_key drop_key;
303 };
304
305 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
306 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
307 #define REF_ERR_NO_INODE_REF            (1 << 2)
308 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
309 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
310 #define REF_ERR_DUP_INODE_REF           (1 << 5)
311 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
312 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
313 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
314 #define REF_ERR_NO_ROOT_REF             (1 << 9)
315 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
316 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
317 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
318
319 struct file_extent_hole {
320         struct rb_node node;
321         u64 start;
322         u64 len;
323 };
324
325 struct inode_record {
326         struct list_head backrefs;
327         unsigned int checked:1;
328         unsigned int merging:1;
329         unsigned int found_inode_item:1;
330         unsigned int found_dir_item:1;
331         unsigned int found_file_extent:1;
332         unsigned int found_csum_item:1;
333         unsigned int some_csum_missing:1;
334         unsigned int nodatasum:1;
335         int errors;
336
337         u64 ino;
338         u32 nlink;
339         u32 imode;
340         u64 isize;
341         u64 nbytes;
342
343         u32 found_link;
344         u64 found_size;
345         u64 extent_start;
346         u64 extent_end;
347         struct rb_root holes;
348         struct list_head orphan_extents;
349
350         u32 refs;
351 };
352
353 #define I_ERR_NO_INODE_ITEM             (1 << 0)
354 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
355 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
356 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
357 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
358 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
359 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
360 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
361 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
362 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
363 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
364 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
365 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
366 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
367 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
368
369 struct root_backref {
370         struct list_head list;
371         unsigned int found_dir_item:1;
372         unsigned int found_dir_index:1;
373         unsigned int found_back_ref:1;
374         unsigned int found_forward_ref:1;
375         unsigned int reachable:1;
376         int errors;
377         u64 ref_root;
378         u64 dir;
379         u64 index;
380         u16 namelen;
381         char name[0];
382 };
383
384 static inline struct root_backref* to_root_backref(struct list_head *entry)
385 {
386         return list_entry(entry, struct root_backref, list);
387 }
388
389 struct root_record {
390         struct list_head backrefs;
391         struct cache_extent cache;
392         unsigned int found_root_item:1;
393         u64 objectid;
394         u32 found_ref;
395 };
396
397 struct ptr_node {
398         struct cache_extent cache;
399         void *data;
400 };
401
402 struct shared_node {
403         struct cache_extent cache;
404         struct cache_tree root_cache;
405         struct cache_tree inode_cache;
406         struct inode_record *current;
407         u32 refs;
408 };
409
410 struct block_info {
411         u64 start;
412         u32 size;
413 };
414
415 struct walk_control {
416         struct cache_tree shared;
417         struct shared_node *nodes[BTRFS_MAX_LEVEL];
418         int active_node;
419         int root_level;
420 };
421
422 struct bad_item {
423         struct btrfs_key key;
424         u64 root_id;
425         struct list_head list;
426 };
427
428 struct extent_entry {
429         u64 bytenr;
430         u64 bytes;
431         int count;
432         int broken;
433         struct list_head list;
434 };
435
436 struct root_item_info {
437         /* level of the root */
438         u8 level;
439         /* number of nodes at this level, must be 1 for a root */
440         int node_count;
441         u64 bytenr;
442         u64 gen;
443         struct cache_extent cache_extent;
444 };
445
446 /*
447  * Error bit for low memory mode check.
448  *
449  * Currently no caller cares about it yet.  Just internal use for error
450  * classification.
451  */
452 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
453 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
454 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
455 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
456 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
457 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
458 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
459 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
460 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
461 #define CHUNK_TYPE_MISMATCH     (1 << 8)
462
463 static void *print_status_check(void *p)
464 {
465         struct task_ctx *priv = p;
466         const char work_indicator[] = { '.', 'o', 'O', 'o' };
467         uint32_t count = 0;
468         static char *task_position_string[] = {
469                 "checking extents",
470                 "checking free space cache",
471                 "checking fs roots",
472         };
473
474         task_period_start(priv->info, 1000 /* 1s */);
475
476         if (priv->tp == TASK_NOTHING)
477                 return NULL;
478
479         while (1) {
480                 printf("%s [%c]\r", task_position_string[priv->tp],
481                                 work_indicator[count % 4]);
482                 count++;
483                 fflush(stdout);
484                 task_period_wait(priv->info);
485         }
486         return NULL;
487 }
488
489 static int print_status_return(void *p)
490 {
491         printf("\n");
492         fflush(stdout);
493
494         return 0;
495 }
496
497 static enum btrfs_check_mode parse_check_mode(const char *str)
498 {
499         if (strcmp(str, "lowmem") == 0)
500                 return CHECK_MODE_LOWMEM;
501         if (strcmp(str, "orig") == 0)
502                 return CHECK_MODE_ORIGINAL;
503         if (strcmp(str, "original") == 0)
504                 return CHECK_MODE_ORIGINAL;
505
506         return CHECK_MODE_UNKNOWN;
507 }
508
509 /* Compatible function to allow reuse of old codes */
510 static u64 first_extent_gap(struct rb_root *holes)
511 {
512         struct file_extent_hole *hole;
513
514         if (RB_EMPTY_ROOT(holes))
515                 return (u64)-1;
516
517         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
518         return hole->start;
519 }
520
521 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
522 {
523         struct file_extent_hole *hole1;
524         struct file_extent_hole *hole2;
525
526         hole1 = rb_entry(node1, struct file_extent_hole, node);
527         hole2 = rb_entry(node2, struct file_extent_hole, node);
528
529         if (hole1->start > hole2->start)
530                 return -1;
531         if (hole1->start < hole2->start)
532                 return 1;
533         /* Now hole1->start == hole2->start */
534         if (hole1->len >= hole2->len)
535                 /*
536                  * Hole 1 will be merge center
537                  * Same hole will be merged later
538                  */
539                 return -1;
540         /* Hole 2 will be merge center */
541         return 1;
542 }
543
544 /*
545  * Add a hole to the record
546  *
547  * This will do hole merge for copy_file_extent_holes(),
548  * which will ensure there won't be continuous holes.
549  */
550 static int add_file_extent_hole(struct rb_root *holes,
551                                 u64 start, u64 len)
552 {
553         struct file_extent_hole *hole;
554         struct file_extent_hole *prev = NULL;
555         struct file_extent_hole *next = NULL;
556
557         hole = malloc(sizeof(*hole));
558         if (!hole)
559                 return -ENOMEM;
560         hole->start = start;
561         hole->len = len;
562         /* Since compare will not return 0, no -EEXIST will happen */
563         rb_insert(holes, &hole->node, compare_hole);
564
565         /* simple merge with previous hole */
566         if (rb_prev(&hole->node))
567                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
568                                 node);
569         if (prev && prev->start + prev->len >= hole->start) {
570                 hole->len = hole->start + hole->len - prev->start;
571                 hole->start = prev->start;
572                 rb_erase(&prev->node, holes);
573                 free(prev);
574                 prev = NULL;
575         }
576
577         /* iterate merge with next holes */
578         while (1) {
579                 if (!rb_next(&hole->node))
580                         break;
581                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
582                                         node);
583                 if (hole->start + hole->len >= next->start) {
584                         if (hole->start + hole->len <= next->start + next->len)
585                                 hole->len = next->start + next->len -
586                                             hole->start;
587                         rb_erase(&next->node, holes);
588                         free(next);
589                         next = NULL;
590                 } else
591                         break;
592         }
593         return 0;
594 }
595
596 static int compare_hole_range(struct rb_node *node, void *data)
597 {
598         struct file_extent_hole *hole;
599         u64 start;
600
601         hole = (struct file_extent_hole *)data;
602         start = hole->start;
603
604         hole = rb_entry(node, struct file_extent_hole, node);
605         if (start < hole->start)
606                 return -1;
607         if (start >= hole->start && start < hole->start + hole->len)
608                 return 0;
609         return 1;
610 }
611
612 /*
613  * Delete a hole in the record
614  *
615  * This will do the hole split and is much restrict than add.
616  */
617 static int del_file_extent_hole(struct rb_root *holes,
618                                 u64 start, u64 len)
619 {
620         struct file_extent_hole *hole;
621         struct file_extent_hole tmp;
622         u64 prev_start = 0;
623         u64 prev_len = 0;
624         u64 next_start = 0;
625         u64 next_len = 0;
626         struct rb_node *node;
627         int have_prev = 0;
628         int have_next = 0;
629         int ret = 0;
630
631         tmp.start = start;
632         tmp.len = len;
633         node = rb_search(holes, &tmp, compare_hole_range, NULL);
634         if (!node)
635                 return -EEXIST;
636         hole = rb_entry(node, struct file_extent_hole, node);
637         if (start + len > hole->start + hole->len)
638                 return -EEXIST;
639
640         /*
641          * Now there will be no overlap, delete the hole and re-add the
642          * split(s) if they exists.
643          */
644         if (start > hole->start) {
645                 prev_start = hole->start;
646                 prev_len = start - hole->start;
647                 have_prev = 1;
648         }
649         if (hole->start + hole->len > start + len) {
650                 next_start = start + len;
651                 next_len = hole->start + hole->len - start - len;
652                 have_next = 1;
653         }
654         rb_erase(node, holes);
655         free(hole);
656         if (have_prev) {
657                 ret = add_file_extent_hole(holes, prev_start, prev_len);
658                 if (ret < 0)
659                         return ret;
660         }
661         if (have_next) {
662                 ret = add_file_extent_hole(holes, next_start, next_len);
663                 if (ret < 0)
664                         return ret;
665         }
666         return 0;
667 }
668
669 static int copy_file_extent_holes(struct rb_root *dst,
670                                   struct rb_root *src)
671 {
672         struct file_extent_hole *hole;
673         struct rb_node *node;
674         int ret = 0;
675
676         node = rb_first(src);
677         while (node) {
678                 hole = rb_entry(node, struct file_extent_hole, node);
679                 ret = add_file_extent_hole(dst, hole->start, hole->len);
680                 if (ret)
681                         break;
682                 node = rb_next(node);
683         }
684         return ret;
685 }
686
687 static void free_file_extent_holes(struct rb_root *holes)
688 {
689         struct rb_node *node;
690         struct file_extent_hole *hole;
691
692         node = rb_first(holes);
693         while (node) {
694                 hole = rb_entry(node, struct file_extent_hole, node);
695                 rb_erase(node, holes);
696                 free(hole);
697                 node = rb_first(holes);
698         }
699 }
700
701 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
702
703 static void record_root_in_trans(struct btrfs_trans_handle *trans,
704                                  struct btrfs_root *root)
705 {
706         if (root->last_trans != trans->transid) {
707                 root->track_dirty = 1;
708                 root->last_trans = trans->transid;
709                 root->commit_root = root->node;
710                 extent_buffer_get(root->node);
711         }
712 }
713
714 static u8 imode_to_type(u32 imode)
715 {
716 #define S_SHIFT 12
717         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
718                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
719                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
720                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
721                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
722                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
723                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
724                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
725         };
726
727         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
728 #undef S_SHIFT
729 }
730
731 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
732 {
733         struct device_record *rec1;
734         struct device_record *rec2;
735
736         rec1 = rb_entry(node1, struct device_record, node);
737         rec2 = rb_entry(node2, struct device_record, node);
738         if (rec1->devid > rec2->devid)
739                 return -1;
740         else if (rec1->devid < rec2->devid)
741                 return 1;
742         else
743                 return 0;
744 }
745
746 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
747 {
748         struct inode_record *rec;
749         struct inode_backref *backref;
750         struct inode_backref *orig;
751         struct inode_backref *tmp;
752         struct orphan_data_extent *src_orphan;
753         struct orphan_data_extent *dst_orphan;
754         size_t size;
755         int ret;
756
757         rec = malloc(sizeof(*rec));
758         if (!rec)
759                 return ERR_PTR(-ENOMEM);
760         memcpy(rec, orig_rec, sizeof(*rec));
761         rec->refs = 1;
762         INIT_LIST_HEAD(&rec->backrefs);
763         INIT_LIST_HEAD(&rec->orphan_extents);
764         rec->holes = RB_ROOT;
765
766         list_for_each_entry(orig, &orig_rec->backrefs, list) {
767                 size = sizeof(*orig) + orig->namelen + 1;
768                 backref = malloc(size);
769                 if (!backref) {
770                         ret = -ENOMEM;
771                         goto cleanup;
772                 }
773                 memcpy(backref, orig, size);
774                 list_add_tail(&backref->list, &rec->backrefs);
775         }
776         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
777                 dst_orphan = malloc(sizeof(*dst_orphan));
778                 if (!dst_orphan) {
779                         ret = -ENOMEM;
780                         goto cleanup;
781                 }
782                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
783                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
784         }
785         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
786         BUG_ON(ret < 0);
787
788         return rec;
789
790 cleanup:
791         if (!list_empty(&rec->backrefs))
792                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
793                         list_del(&orig->list);
794                         free(orig);
795                 }
796
797         if (!list_empty(&rec->orphan_extents))
798                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
799                         list_del(&orig->list);
800                         free(orig);
801                 }
802
803         free(rec);
804
805         return ERR_PTR(ret);
806 }
807
808 static void print_orphan_data_extents(struct list_head *orphan_extents,
809                                       u64 objectid)
810 {
811         struct orphan_data_extent *orphan;
812
813         if (list_empty(orphan_extents))
814                 return;
815         printf("The following data extent is lost in tree %llu:\n",
816                objectid);
817         list_for_each_entry(orphan, orphan_extents, list) {
818                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
819                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
820                        orphan->disk_len);
821         }
822 }
823
824 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
825 {
826         u64 root_objectid = root->root_key.objectid;
827         int errors = rec->errors;
828
829         if (!errors)
830                 return;
831         /* reloc root errors, we print its corresponding fs root objectid*/
832         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
833                 root_objectid = root->root_key.offset;
834                 fprintf(stderr, "reloc");
835         }
836         fprintf(stderr, "root %llu inode %llu errors %x",
837                 (unsigned long long) root_objectid,
838                 (unsigned long long) rec->ino, rec->errors);
839
840         if (errors & I_ERR_NO_INODE_ITEM)
841                 fprintf(stderr, ", no inode item");
842         if (errors & I_ERR_NO_ORPHAN_ITEM)
843                 fprintf(stderr, ", no orphan item");
844         if (errors & I_ERR_DUP_INODE_ITEM)
845                 fprintf(stderr, ", dup inode item");
846         if (errors & I_ERR_DUP_DIR_INDEX)
847                 fprintf(stderr, ", dup dir index");
848         if (errors & I_ERR_ODD_DIR_ITEM)
849                 fprintf(stderr, ", odd dir item");
850         if (errors & I_ERR_ODD_FILE_EXTENT)
851                 fprintf(stderr, ", odd file extent");
852         if (errors & I_ERR_BAD_FILE_EXTENT)
853                 fprintf(stderr, ", bad file extent");
854         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
855                 fprintf(stderr, ", file extent overlap");
856         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
857                 fprintf(stderr, ", file extent discount");
858         if (errors & I_ERR_DIR_ISIZE_WRONG)
859                 fprintf(stderr, ", dir isize wrong");
860         if (errors & I_ERR_FILE_NBYTES_WRONG)
861                 fprintf(stderr, ", nbytes wrong");
862         if (errors & I_ERR_ODD_CSUM_ITEM)
863                 fprintf(stderr, ", odd csum item");
864         if (errors & I_ERR_SOME_CSUM_MISSING)
865                 fprintf(stderr, ", some csum missing");
866         if (errors & I_ERR_LINK_COUNT_WRONG)
867                 fprintf(stderr, ", link count wrong");
868         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
869                 fprintf(stderr, ", orphan file extent");
870         fprintf(stderr, "\n");
871         /* Print the orphan extents if needed */
872         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
873                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
874
875         /* Print the holes if needed */
876         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
877                 struct file_extent_hole *hole;
878                 struct rb_node *node;
879                 int found = 0;
880
881                 node = rb_first(&rec->holes);
882                 fprintf(stderr, "Found file extent holes:\n");
883                 while (node) {
884                         found = 1;
885                         hole = rb_entry(node, struct file_extent_hole, node);
886                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
887                                 hole->start, hole->len);
888                         node = rb_next(node);
889                 }
890                 if (!found)
891                         fprintf(stderr, "\tstart: 0, len: %llu\n",
892                                 round_up(rec->isize, root->sectorsize));
893         }
894 }
895
896 static void print_ref_error(int errors)
897 {
898         if (errors & REF_ERR_NO_DIR_ITEM)
899                 fprintf(stderr, ", no dir item");
900         if (errors & REF_ERR_NO_DIR_INDEX)
901                 fprintf(stderr, ", no dir index");
902         if (errors & REF_ERR_NO_INODE_REF)
903                 fprintf(stderr, ", no inode ref");
904         if (errors & REF_ERR_DUP_DIR_ITEM)
905                 fprintf(stderr, ", dup dir item");
906         if (errors & REF_ERR_DUP_DIR_INDEX)
907                 fprintf(stderr, ", dup dir index");
908         if (errors & REF_ERR_DUP_INODE_REF)
909                 fprintf(stderr, ", dup inode ref");
910         if (errors & REF_ERR_INDEX_UNMATCH)
911                 fprintf(stderr, ", index mismatch");
912         if (errors & REF_ERR_FILETYPE_UNMATCH)
913                 fprintf(stderr, ", filetype mismatch");
914         if (errors & REF_ERR_NAME_TOO_LONG)
915                 fprintf(stderr, ", name too long");
916         if (errors & REF_ERR_NO_ROOT_REF)
917                 fprintf(stderr, ", no root ref");
918         if (errors & REF_ERR_NO_ROOT_BACKREF)
919                 fprintf(stderr, ", no root backref");
920         if (errors & REF_ERR_DUP_ROOT_REF)
921                 fprintf(stderr, ", dup root ref");
922         if (errors & REF_ERR_DUP_ROOT_BACKREF)
923                 fprintf(stderr, ", dup root backref");
924         fprintf(stderr, "\n");
925 }
926
927 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
928                                           u64 ino, int mod)
929 {
930         struct ptr_node *node;
931         struct cache_extent *cache;
932         struct inode_record *rec = NULL;
933         int ret;
934
935         cache = lookup_cache_extent(inode_cache, ino, 1);
936         if (cache) {
937                 node = container_of(cache, struct ptr_node, cache);
938                 rec = node->data;
939                 if (mod && rec->refs > 1) {
940                         node->data = clone_inode_rec(rec);
941                         if (IS_ERR(node->data))
942                                 return node->data;
943                         rec->refs--;
944                         rec = node->data;
945                 }
946         } else if (mod) {
947                 rec = calloc(1, sizeof(*rec));
948                 if (!rec)
949                         return ERR_PTR(-ENOMEM);
950                 rec->ino = ino;
951                 rec->extent_start = (u64)-1;
952                 rec->refs = 1;
953                 INIT_LIST_HEAD(&rec->backrefs);
954                 INIT_LIST_HEAD(&rec->orphan_extents);
955                 rec->holes = RB_ROOT;
956
957                 node = malloc(sizeof(*node));
958                 if (!node) {
959                         free(rec);
960                         return ERR_PTR(-ENOMEM);
961                 }
962                 node->cache.start = ino;
963                 node->cache.size = 1;
964                 node->data = rec;
965
966                 if (ino == BTRFS_FREE_INO_OBJECTID)
967                         rec->found_link = 1;
968
969                 ret = insert_cache_extent(inode_cache, &node->cache);
970                 if (ret)
971                         return ERR_PTR(-EEXIST);
972         }
973         return rec;
974 }
975
976 static void free_orphan_data_extents(struct list_head *orphan_extents)
977 {
978         struct orphan_data_extent *orphan;
979
980         while (!list_empty(orphan_extents)) {
981                 orphan = list_entry(orphan_extents->next,
982                                     struct orphan_data_extent, list);
983                 list_del(&orphan->list);
984                 free(orphan);
985         }
986 }
987
988 static void free_inode_rec(struct inode_record *rec)
989 {
990         struct inode_backref *backref;
991
992         if (--rec->refs > 0)
993                 return;
994
995         while (!list_empty(&rec->backrefs)) {
996                 backref = to_inode_backref(rec->backrefs.next);
997                 list_del(&backref->list);
998                 free(backref);
999         }
1000         free_orphan_data_extents(&rec->orphan_extents);
1001         free_file_extent_holes(&rec->holes);
1002         free(rec);
1003 }
1004
1005 static int can_free_inode_rec(struct inode_record *rec)
1006 {
1007         if (!rec->errors && rec->checked && rec->found_inode_item &&
1008             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1009                 return 1;
1010         return 0;
1011 }
1012
1013 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1014                                  struct inode_record *rec)
1015 {
1016         struct cache_extent *cache;
1017         struct inode_backref *tmp, *backref;
1018         struct ptr_node *node;
1019         unsigned char filetype;
1020
1021         if (!rec->found_inode_item)
1022                 return;
1023
1024         filetype = imode_to_type(rec->imode);
1025         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1026                 if (backref->found_dir_item && backref->found_dir_index) {
1027                         if (backref->filetype != filetype)
1028                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1029                         if (!backref->errors && backref->found_inode_ref &&
1030                             rec->nlink == rec->found_link) {
1031                                 list_del(&backref->list);
1032                                 free(backref);
1033                         }
1034                 }
1035         }
1036
1037         if (!rec->checked || rec->merging)
1038                 return;
1039
1040         if (S_ISDIR(rec->imode)) {
1041                 if (rec->found_size != rec->isize)
1042                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1043                 if (rec->found_file_extent)
1044                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1045         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1046                 if (rec->found_dir_item)
1047                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1048                 if (rec->found_size != rec->nbytes)
1049                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1050                 if (rec->nlink > 0 && !no_holes &&
1051                     (rec->extent_end < rec->isize ||
1052                      first_extent_gap(&rec->holes) < rec->isize))
1053                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1054         }
1055
1056         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1057                 if (rec->found_csum_item && rec->nodatasum)
1058                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1059                 if (rec->some_csum_missing && !rec->nodatasum)
1060                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1061         }
1062
1063         BUG_ON(rec->refs != 1);
1064         if (can_free_inode_rec(rec)) {
1065                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1066                 node = container_of(cache, struct ptr_node, cache);
1067                 BUG_ON(node->data != rec);
1068                 remove_cache_extent(inode_cache, &node->cache);
1069                 free(node);
1070                 free_inode_rec(rec);
1071         }
1072 }
1073
1074 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1075 {
1076         struct btrfs_path path;
1077         struct btrfs_key key;
1078         int ret;
1079
1080         key.objectid = BTRFS_ORPHAN_OBJECTID;
1081         key.type = BTRFS_ORPHAN_ITEM_KEY;
1082         key.offset = ino;
1083
1084         btrfs_init_path(&path);
1085         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1086         btrfs_release_path(&path);
1087         if (ret > 0)
1088                 ret = -ENOENT;
1089         return ret;
1090 }
1091
1092 static int process_inode_item(struct extent_buffer *eb,
1093                               int slot, struct btrfs_key *key,
1094                               struct shared_node *active_node)
1095 {
1096         struct inode_record *rec;
1097         struct btrfs_inode_item *item;
1098
1099         rec = active_node->current;
1100         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1101         if (rec->found_inode_item) {
1102                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1103                 return 1;
1104         }
1105         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1106         rec->nlink = btrfs_inode_nlink(eb, item);
1107         rec->isize = btrfs_inode_size(eb, item);
1108         rec->nbytes = btrfs_inode_nbytes(eb, item);
1109         rec->imode = btrfs_inode_mode(eb, item);
1110         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1111                 rec->nodatasum = 1;
1112         rec->found_inode_item = 1;
1113         if (rec->nlink == 0)
1114                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1115         maybe_free_inode_rec(&active_node->inode_cache, rec);
1116         return 0;
1117 }
1118
1119 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1120                                                 const char *name,
1121                                                 int namelen, u64 dir)
1122 {
1123         struct inode_backref *backref;
1124
1125         list_for_each_entry(backref, &rec->backrefs, list) {
1126                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1127                         break;
1128                 if (backref->dir != dir || backref->namelen != namelen)
1129                         continue;
1130                 if (memcmp(name, backref->name, namelen))
1131                         continue;
1132                 return backref;
1133         }
1134
1135         backref = malloc(sizeof(*backref) + namelen + 1);
1136         if (!backref)
1137                 return NULL;
1138         memset(backref, 0, sizeof(*backref));
1139         backref->dir = dir;
1140         backref->namelen = namelen;
1141         memcpy(backref->name, name, namelen);
1142         backref->name[namelen] = '\0';
1143         list_add_tail(&backref->list, &rec->backrefs);
1144         return backref;
1145 }
1146
1147 static int add_inode_backref(struct cache_tree *inode_cache,
1148                              u64 ino, u64 dir, u64 index,
1149                              const char *name, int namelen,
1150                              int filetype, int itemtype, int errors)
1151 {
1152         struct inode_record *rec;
1153         struct inode_backref *backref;
1154
1155         rec = get_inode_rec(inode_cache, ino, 1);
1156         BUG_ON(IS_ERR(rec));
1157         backref = get_inode_backref(rec, name, namelen, dir);
1158         BUG_ON(!backref);
1159         if (errors)
1160                 backref->errors |= errors;
1161         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1162                 if (backref->found_dir_index)
1163                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1164                 if (backref->found_inode_ref && backref->index != index)
1165                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1166                 if (backref->found_dir_item && backref->filetype != filetype)
1167                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1168
1169                 backref->index = index;
1170                 backref->filetype = filetype;
1171                 backref->found_dir_index = 1;
1172         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1173                 rec->found_link++;
1174                 if (backref->found_dir_item)
1175                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1176                 if (backref->found_dir_index && backref->filetype != filetype)
1177                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1178
1179                 backref->filetype = filetype;
1180                 backref->found_dir_item = 1;
1181         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1182                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1183                 if (backref->found_inode_ref)
1184                         backref->errors |= REF_ERR_DUP_INODE_REF;
1185                 if (backref->found_dir_index && backref->index != index)
1186                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1187                 else
1188                         backref->index = index;
1189
1190                 backref->ref_type = itemtype;
1191                 backref->found_inode_ref = 1;
1192         } else {
1193                 BUG_ON(1);
1194         }
1195
1196         maybe_free_inode_rec(inode_cache, rec);
1197         return 0;
1198 }
1199
1200 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1201                             struct cache_tree *dst_cache)
1202 {
1203         struct inode_backref *backref;
1204         u32 dir_count = 0;
1205         int ret = 0;
1206
1207         dst->merging = 1;
1208         list_for_each_entry(backref, &src->backrefs, list) {
1209                 if (backref->found_dir_index) {
1210                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1211                                         backref->index, backref->name,
1212                                         backref->namelen, backref->filetype,
1213                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1214                 }
1215                 if (backref->found_dir_item) {
1216                         dir_count++;
1217                         add_inode_backref(dst_cache, dst->ino,
1218                                         backref->dir, 0, backref->name,
1219                                         backref->namelen, backref->filetype,
1220                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1221                 }
1222                 if (backref->found_inode_ref) {
1223                         add_inode_backref(dst_cache, dst->ino,
1224                                         backref->dir, backref->index,
1225                                         backref->name, backref->namelen, 0,
1226                                         backref->ref_type, backref->errors);
1227                 }
1228         }
1229
1230         if (src->found_dir_item)
1231                 dst->found_dir_item = 1;
1232         if (src->found_file_extent)
1233                 dst->found_file_extent = 1;
1234         if (src->found_csum_item)
1235                 dst->found_csum_item = 1;
1236         if (src->some_csum_missing)
1237                 dst->some_csum_missing = 1;
1238         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1239                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1240                 if (ret < 0)
1241                         return ret;
1242         }
1243
1244         BUG_ON(src->found_link < dir_count);
1245         dst->found_link += src->found_link - dir_count;
1246         dst->found_size += src->found_size;
1247         if (src->extent_start != (u64)-1) {
1248                 if (dst->extent_start == (u64)-1) {
1249                         dst->extent_start = src->extent_start;
1250                         dst->extent_end = src->extent_end;
1251                 } else {
1252                         if (dst->extent_end > src->extent_start)
1253                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1254                         else if (dst->extent_end < src->extent_start) {
1255                                 ret = add_file_extent_hole(&dst->holes,
1256                                         dst->extent_end,
1257                                         src->extent_start - dst->extent_end);
1258                         }
1259                         if (dst->extent_end < src->extent_end)
1260                                 dst->extent_end = src->extent_end;
1261                 }
1262         }
1263
1264         dst->errors |= src->errors;
1265         if (src->found_inode_item) {
1266                 if (!dst->found_inode_item) {
1267                         dst->nlink = src->nlink;
1268                         dst->isize = src->isize;
1269                         dst->nbytes = src->nbytes;
1270                         dst->imode = src->imode;
1271                         dst->nodatasum = src->nodatasum;
1272                         dst->found_inode_item = 1;
1273                 } else {
1274                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1275                 }
1276         }
1277         dst->merging = 0;
1278
1279         return 0;
1280 }
1281
1282 static int splice_shared_node(struct shared_node *src_node,
1283                               struct shared_node *dst_node)
1284 {
1285         struct cache_extent *cache;
1286         struct ptr_node *node, *ins;
1287         struct cache_tree *src, *dst;
1288         struct inode_record *rec, *conflict;
1289         u64 current_ino = 0;
1290         int splice = 0;
1291         int ret;
1292
1293         if (--src_node->refs == 0)
1294                 splice = 1;
1295         if (src_node->current)
1296                 current_ino = src_node->current->ino;
1297
1298         src = &src_node->root_cache;
1299         dst = &dst_node->root_cache;
1300 again:
1301         cache = search_cache_extent(src, 0);
1302         while (cache) {
1303                 node = container_of(cache, struct ptr_node, cache);
1304                 rec = node->data;
1305                 cache = next_cache_extent(cache);
1306
1307                 if (splice) {
1308                         remove_cache_extent(src, &node->cache);
1309                         ins = node;
1310                 } else {
1311                         ins = malloc(sizeof(*ins));
1312                         BUG_ON(!ins);
1313                         ins->cache.start = node->cache.start;
1314                         ins->cache.size = node->cache.size;
1315                         ins->data = rec;
1316                         rec->refs++;
1317                 }
1318                 ret = insert_cache_extent(dst, &ins->cache);
1319                 if (ret == -EEXIST) {
1320                         conflict = get_inode_rec(dst, rec->ino, 1);
1321                         BUG_ON(IS_ERR(conflict));
1322                         merge_inode_recs(rec, conflict, dst);
1323                         if (rec->checked) {
1324                                 conflict->checked = 1;
1325                                 if (dst_node->current == conflict)
1326                                         dst_node->current = NULL;
1327                         }
1328                         maybe_free_inode_rec(dst, conflict);
1329                         free_inode_rec(rec);
1330                         free(ins);
1331                 } else {
1332                         BUG_ON(ret);
1333                 }
1334         }
1335
1336         if (src == &src_node->root_cache) {
1337                 src = &src_node->inode_cache;
1338                 dst = &dst_node->inode_cache;
1339                 goto again;
1340         }
1341
1342         if (current_ino > 0 && (!dst_node->current ||
1343             current_ino > dst_node->current->ino)) {
1344                 if (dst_node->current) {
1345                         dst_node->current->checked = 1;
1346                         maybe_free_inode_rec(dst, dst_node->current);
1347                 }
1348                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1349                 BUG_ON(IS_ERR(dst_node->current));
1350         }
1351         return 0;
1352 }
1353
1354 static void free_inode_ptr(struct cache_extent *cache)
1355 {
1356         struct ptr_node *node;
1357         struct inode_record *rec;
1358
1359         node = container_of(cache, struct ptr_node, cache);
1360         rec = node->data;
1361         free_inode_rec(rec);
1362         free(node);
1363 }
1364
1365 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1366
1367 static struct shared_node *find_shared_node(struct cache_tree *shared,
1368                                             u64 bytenr)
1369 {
1370         struct cache_extent *cache;
1371         struct shared_node *node;
1372
1373         cache = lookup_cache_extent(shared, bytenr, 1);
1374         if (cache) {
1375                 node = container_of(cache, struct shared_node, cache);
1376                 return node;
1377         }
1378         return NULL;
1379 }
1380
1381 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1382 {
1383         int ret;
1384         struct shared_node *node;
1385
1386         node = calloc(1, sizeof(*node));
1387         if (!node)
1388                 return -ENOMEM;
1389         node->cache.start = bytenr;
1390         node->cache.size = 1;
1391         cache_tree_init(&node->root_cache);
1392         cache_tree_init(&node->inode_cache);
1393         node->refs = refs;
1394
1395         ret = insert_cache_extent(shared, &node->cache);
1396
1397         return ret;
1398 }
1399
1400 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1401                              struct walk_control *wc, int level)
1402 {
1403         struct shared_node *node;
1404         struct shared_node *dest;
1405         int ret;
1406
1407         if (level == wc->active_node)
1408                 return 0;
1409
1410         BUG_ON(wc->active_node <= level);
1411         node = find_shared_node(&wc->shared, bytenr);
1412         if (!node) {
1413                 ret = add_shared_node(&wc->shared, bytenr, refs);
1414                 BUG_ON(ret);
1415                 node = find_shared_node(&wc->shared, bytenr);
1416                 wc->nodes[level] = node;
1417                 wc->active_node = level;
1418                 return 0;
1419         }
1420
1421         if (wc->root_level == wc->active_node &&
1422             btrfs_root_refs(&root->root_item) == 0) {
1423                 if (--node->refs == 0) {
1424                         free_inode_recs_tree(&node->root_cache);
1425                         free_inode_recs_tree(&node->inode_cache);
1426                         remove_cache_extent(&wc->shared, &node->cache);
1427                         free(node);
1428                 }
1429                 return 1;
1430         }
1431
1432         dest = wc->nodes[wc->active_node];
1433         splice_shared_node(node, dest);
1434         if (node->refs == 0) {
1435                 remove_cache_extent(&wc->shared, &node->cache);
1436                 free(node);
1437         }
1438         return 1;
1439 }
1440
1441 static int leave_shared_node(struct btrfs_root *root,
1442                              struct walk_control *wc, int level)
1443 {
1444         struct shared_node *node;
1445         struct shared_node *dest;
1446         int i;
1447
1448         if (level == wc->root_level)
1449                 return 0;
1450
1451         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1452                 if (wc->nodes[i])
1453                         break;
1454         }
1455         BUG_ON(i >= BTRFS_MAX_LEVEL);
1456
1457         node = wc->nodes[wc->active_node];
1458         wc->nodes[wc->active_node] = NULL;
1459         wc->active_node = i;
1460
1461         dest = wc->nodes[wc->active_node];
1462         if (wc->active_node < wc->root_level ||
1463             btrfs_root_refs(&root->root_item) > 0) {
1464                 BUG_ON(node->refs <= 1);
1465                 splice_shared_node(node, dest);
1466         } else {
1467                 BUG_ON(node->refs < 2);
1468                 node->refs--;
1469         }
1470         return 0;
1471 }
1472
1473 /*
1474  * Returns:
1475  * < 0 - on error
1476  * 1   - if the root with id child_root_id is a child of root parent_root_id
1477  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1478  *       has other root(s) as parent(s)
1479  * 2   - if the root child_root_id doesn't have any parent roots
1480  */
1481 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1482                          u64 child_root_id)
1483 {
1484         struct btrfs_path path;
1485         struct btrfs_key key;
1486         struct extent_buffer *leaf;
1487         int has_parent = 0;
1488         int ret;
1489
1490         btrfs_init_path(&path);
1491
1492         key.objectid = parent_root_id;
1493         key.type = BTRFS_ROOT_REF_KEY;
1494         key.offset = child_root_id;
1495         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1496                                 0, 0);
1497         if (ret < 0)
1498                 return ret;
1499         btrfs_release_path(&path);
1500         if (!ret)
1501                 return 1;
1502
1503         key.objectid = child_root_id;
1504         key.type = BTRFS_ROOT_BACKREF_KEY;
1505         key.offset = 0;
1506         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1507                                 0, 0);
1508         if (ret < 0)
1509                 goto out;
1510
1511         while (1) {
1512                 leaf = path.nodes[0];
1513                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1514                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1515                         if (ret)
1516                                 break;
1517                         leaf = path.nodes[0];
1518                 }
1519
1520                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1521                 if (key.objectid != child_root_id ||
1522                     key.type != BTRFS_ROOT_BACKREF_KEY)
1523                         break;
1524
1525                 has_parent = 1;
1526
1527                 if (key.offset == parent_root_id) {
1528                         btrfs_release_path(&path);
1529                         return 1;
1530                 }
1531
1532                 path.slots[0]++;
1533         }
1534 out:
1535         btrfs_release_path(&path);
1536         if (ret < 0)
1537                 return ret;
1538         return has_parent ? 0 : 2;
1539 }
1540
1541 static int process_dir_item(struct btrfs_root *root,
1542                             struct extent_buffer *eb,
1543                             int slot, struct btrfs_key *key,
1544                             struct shared_node *active_node)
1545 {
1546         u32 total;
1547         u32 cur = 0;
1548         u32 len;
1549         u32 name_len;
1550         u32 data_len;
1551         int error;
1552         int nritems = 0;
1553         int filetype;
1554         struct btrfs_dir_item *di;
1555         struct inode_record *rec;
1556         struct cache_tree *root_cache;
1557         struct cache_tree *inode_cache;
1558         struct btrfs_key location;
1559         char namebuf[BTRFS_NAME_LEN];
1560
1561         root_cache = &active_node->root_cache;
1562         inode_cache = &active_node->inode_cache;
1563         rec = active_node->current;
1564         rec->found_dir_item = 1;
1565
1566         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1567         total = btrfs_item_size_nr(eb, slot);
1568         while (cur < total) {
1569                 nritems++;
1570                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1571                 name_len = btrfs_dir_name_len(eb, di);
1572                 data_len = btrfs_dir_data_len(eb, di);
1573                 filetype = btrfs_dir_type(eb, di);
1574
1575                 rec->found_size += name_len;
1576                 if (name_len <= BTRFS_NAME_LEN) {
1577                         len = name_len;
1578                         error = 0;
1579                 } else {
1580                         len = BTRFS_NAME_LEN;
1581                         error = REF_ERR_NAME_TOO_LONG;
1582                 }
1583                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1584
1585                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1586                         add_inode_backref(inode_cache, location.objectid,
1587                                           key->objectid, key->offset, namebuf,
1588                                           len, filetype, key->type, error);
1589                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1590                         add_inode_backref(root_cache, location.objectid,
1591                                           key->objectid, key->offset,
1592                                           namebuf, len, filetype,
1593                                           key->type, error);
1594                 } else {
1595                         fprintf(stderr, "invalid location in dir item %u\n",
1596                                 location.type);
1597                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1598                                           key->objectid, key->offset, namebuf,
1599                                           len, filetype, key->type, error);
1600                 }
1601
1602                 len = sizeof(*di) + name_len + data_len;
1603                 di = (struct btrfs_dir_item *)((char *)di + len);
1604                 cur += len;
1605         }
1606         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1607                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1608
1609         return 0;
1610 }
1611
1612 static int process_inode_ref(struct extent_buffer *eb,
1613                              int slot, struct btrfs_key *key,
1614                              struct shared_node *active_node)
1615 {
1616         u32 total;
1617         u32 cur = 0;
1618         u32 len;
1619         u32 name_len;
1620         u64 index;
1621         int error;
1622         struct cache_tree *inode_cache;
1623         struct btrfs_inode_ref *ref;
1624         char namebuf[BTRFS_NAME_LEN];
1625
1626         inode_cache = &active_node->inode_cache;
1627
1628         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1629         total = btrfs_item_size_nr(eb, slot);
1630         while (cur < total) {
1631                 name_len = btrfs_inode_ref_name_len(eb, ref);
1632                 index = btrfs_inode_ref_index(eb, ref);
1633                 if (name_len <= BTRFS_NAME_LEN) {
1634                         len = name_len;
1635                         error = 0;
1636                 } else {
1637                         len = BTRFS_NAME_LEN;
1638                         error = REF_ERR_NAME_TOO_LONG;
1639                 }
1640                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1641                 add_inode_backref(inode_cache, key->objectid, key->offset,
1642                                   index, namebuf, len, 0, key->type, error);
1643
1644                 len = sizeof(*ref) + name_len;
1645                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1646                 cur += len;
1647         }
1648         return 0;
1649 }
1650
1651 static int process_inode_extref(struct extent_buffer *eb,
1652                                 int slot, struct btrfs_key *key,
1653                                 struct shared_node *active_node)
1654 {
1655         u32 total;
1656         u32 cur = 0;
1657         u32 len;
1658         u32 name_len;
1659         u64 index;
1660         u64 parent;
1661         int error;
1662         struct cache_tree *inode_cache;
1663         struct btrfs_inode_extref *extref;
1664         char namebuf[BTRFS_NAME_LEN];
1665
1666         inode_cache = &active_node->inode_cache;
1667
1668         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1669         total = btrfs_item_size_nr(eb, slot);
1670         while (cur < total) {
1671                 name_len = btrfs_inode_extref_name_len(eb, extref);
1672                 index = btrfs_inode_extref_index(eb, extref);
1673                 parent = btrfs_inode_extref_parent(eb, extref);
1674                 if (name_len <= BTRFS_NAME_LEN) {
1675                         len = name_len;
1676                         error = 0;
1677                 } else {
1678                         len = BTRFS_NAME_LEN;
1679                         error = REF_ERR_NAME_TOO_LONG;
1680                 }
1681                 read_extent_buffer(eb, namebuf,
1682                                    (unsigned long)(extref + 1), len);
1683                 add_inode_backref(inode_cache, key->objectid, parent,
1684                                   index, namebuf, len, 0, key->type, error);
1685
1686                 len = sizeof(*extref) + name_len;
1687                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1688                 cur += len;
1689         }
1690         return 0;
1691
1692 }
1693
1694 static int count_csum_range(struct btrfs_root *root, u64 start,
1695                             u64 len, u64 *found)
1696 {
1697         struct btrfs_key key;
1698         struct btrfs_path path;
1699         struct extent_buffer *leaf;
1700         int ret;
1701         size_t size;
1702         *found = 0;
1703         u64 csum_end;
1704         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1705
1706         btrfs_init_path(&path);
1707
1708         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1709         key.offset = start;
1710         key.type = BTRFS_EXTENT_CSUM_KEY;
1711
1712         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1713                                 &key, &path, 0, 0);
1714         if (ret < 0)
1715                 goto out;
1716         if (ret > 0 && path.slots[0] > 0) {
1717                 leaf = path.nodes[0];
1718                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1719                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1720                     key.type == BTRFS_EXTENT_CSUM_KEY)
1721                         path.slots[0]--;
1722         }
1723
1724         while (len > 0) {
1725                 leaf = path.nodes[0];
1726                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1727                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1728                         if (ret > 0)
1729                                 break;
1730                         else if (ret < 0)
1731                                 goto out;
1732                         leaf = path.nodes[0];
1733                 }
1734
1735                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1736                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1737                     key.type != BTRFS_EXTENT_CSUM_KEY)
1738                         break;
1739
1740                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1741                 if (key.offset >= start + len)
1742                         break;
1743
1744                 if (key.offset > start)
1745                         start = key.offset;
1746
1747                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1748                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1749                 if (csum_end > start) {
1750                         size = min(csum_end - start, len);
1751                         len -= size;
1752                         start += size;
1753                         *found += size;
1754                 }
1755
1756                 path.slots[0]++;
1757         }
1758 out:
1759         btrfs_release_path(&path);
1760         if (ret < 0)
1761                 return ret;
1762         return 0;
1763 }
1764
1765 static int process_file_extent(struct btrfs_root *root,
1766                                 struct extent_buffer *eb,
1767                                 int slot, struct btrfs_key *key,
1768                                 struct shared_node *active_node)
1769 {
1770         struct inode_record *rec;
1771         struct btrfs_file_extent_item *fi;
1772         u64 num_bytes = 0;
1773         u64 disk_bytenr = 0;
1774         u64 extent_offset = 0;
1775         u64 mask = root->sectorsize - 1;
1776         int extent_type;
1777         int ret;
1778
1779         rec = active_node->current;
1780         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1781         rec->found_file_extent = 1;
1782
1783         if (rec->extent_start == (u64)-1) {
1784                 rec->extent_start = key->offset;
1785                 rec->extent_end = key->offset;
1786         }
1787
1788         if (rec->extent_end > key->offset)
1789                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1790         else if (rec->extent_end < key->offset) {
1791                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1792                                            key->offset - rec->extent_end);
1793                 if (ret < 0)
1794                         return ret;
1795         }
1796
1797         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1798         extent_type = btrfs_file_extent_type(eb, fi);
1799
1800         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1801                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1802                 if (num_bytes == 0)
1803                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1804                 rec->found_size += num_bytes;
1805                 num_bytes = (num_bytes + mask) & ~mask;
1806         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1807                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1808                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1809                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1810                 extent_offset = btrfs_file_extent_offset(eb, fi);
1811                 if (num_bytes == 0 || (num_bytes & mask))
1812                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1813                 if (num_bytes + extent_offset >
1814                     btrfs_file_extent_ram_bytes(eb, fi))
1815                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1816                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1817                     (btrfs_file_extent_compression(eb, fi) ||
1818                      btrfs_file_extent_encryption(eb, fi) ||
1819                      btrfs_file_extent_other_encoding(eb, fi)))
1820                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1821                 if (disk_bytenr > 0)
1822                         rec->found_size += num_bytes;
1823         } else {
1824                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1825         }
1826         rec->extent_end = key->offset + num_bytes;
1827
1828         /*
1829          * The data reloc tree will copy full extents into its inode and then
1830          * copy the corresponding csums.  Because the extent it copied could be
1831          * a preallocated extent that hasn't been written to yet there may be no
1832          * csums to copy, ergo we won't have csums for our file extent.  This is
1833          * ok so just don't bother checking csums if the inode belongs to the
1834          * data reloc tree.
1835          */
1836         if (disk_bytenr > 0 &&
1837             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1838                 u64 found;
1839                 if (btrfs_file_extent_compression(eb, fi))
1840                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1841                 else
1842                         disk_bytenr += extent_offset;
1843
1844                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1845                 if (ret < 0)
1846                         return ret;
1847                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1848                         if (found > 0)
1849                                 rec->found_csum_item = 1;
1850                         if (found < num_bytes)
1851                                 rec->some_csum_missing = 1;
1852                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1853                         if (found > 0)
1854                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1855                 }
1856         }
1857         return 0;
1858 }
1859
1860 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1861                             struct walk_control *wc)
1862 {
1863         struct btrfs_key key;
1864         u32 nritems;
1865         int i;
1866         int ret = 0;
1867         struct cache_tree *inode_cache;
1868         struct shared_node *active_node;
1869
1870         if (wc->root_level == wc->active_node &&
1871             btrfs_root_refs(&root->root_item) == 0)
1872                 return 0;
1873
1874         active_node = wc->nodes[wc->active_node];
1875         inode_cache = &active_node->inode_cache;
1876         nritems = btrfs_header_nritems(eb);
1877         for (i = 0; i < nritems; i++) {
1878                 btrfs_item_key_to_cpu(eb, &key, i);
1879
1880                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1881                         continue;
1882                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1883                         continue;
1884
1885                 if (active_node->current == NULL ||
1886                     active_node->current->ino < key.objectid) {
1887                         if (active_node->current) {
1888                                 active_node->current->checked = 1;
1889                                 maybe_free_inode_rec(inode_cache,
1890                                                      active_node->current);
1891                         }
1892                         active_node->current = get_inode_rec(inode_cache,
1893                                                              key.objectid, 1);
1894                         BUG_ON(IS_ERR(active_node->current));
1895                 }
1896                 switch (key.type) {
1897                 case BTRFS_DIR_ITEM_KEY:
1898                 case BTRFS_DIR_INDEX_KEY:
1899                         ret = process_dir_item(root, eb, i, &key, active_node);
1900                         break;
1901                 case BTRFS_INODE_REF_KEY:
1902                         ret = process_inode_ref(eb, i, &key, active_node);
1903                         break;
1904                 case BTRFS_INODE_EXTREF_KEY:
1905                         ret = process_inode_extref(eb, i, &key, active_node);
1906                         break;
1907                 case BTRFS_INODE_ITEM_KEY:
1908                         ret = process_inode_item(eb, i, &key, active_node);
1909                         break;
1910                 case BTRFS_EXTENT_DATA_KEY:
1911                         ret = process_file_extent(root, eb, i, &key,
1912                                                   active_node);
1913                         break;
1914                 default:
1915                         break;
1916                 };
1917         }
1918         return ret;
1919 }
1920
1921 static void reada_walk_down(struct btrfs_root *root,
1922                             struct extent_buffer *node, int slot)
1923 {
1924         u64 bytenr;
1925         u64 ptr_gen;
1926         u32 nritems;
1927         u32 blocksize;
1928         int i;
1929         int level;
1930
1931         level = btrfs_header_level(node);
1932         if (level != 1)
1933                 return;
1934
1935         nritems = btrfs_header_nritems(node);
1936         blocksize = root->nodesize;
1937         for (i = slot; i < nritems; i++) {
1938                 bytenr = btrfs_node_blockptr(node, i);
1939                 ptr_gen = btrfs_node_ptr_generation(node, i);
1940                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1941         }
1942 }
1943
1944 /*
1945  * Check the child node/leaf by the following condition:
1946  * 1. the first item key of the node/leaf should be the same with the one
1947  *    in parent.
1948  * 2. block in parent node should match the child node/leaf.
1949  * 3. generation of parent node and child's header should be consistent.
1950  *
1951  * Or the child node/leaf pointed by the key in parent is not valid.
1952  *
1953  * We hope to check leaf owner too, but since subvol may share leaves,
1954  * which makes leaf owner check not so strong, key check should be
1955  * sufficient enough for that case.
1956  */
1957 static int check_child_node(struct btrfs_root *root,
1958                             struct extent_buffer *parent, int slot,
1959                             struct extent_buffer *child)
1960 {
1961         struct btrfs_key parent_key;
1962         struct btrfs_key child_key;
1963         int ret = 0;
1964
1965         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1966         if (btrfs_header_level(child) == 0)
1967                 btrfs_item_key_to_cpu(child, &child_key, 0);
1968         else
1969                 btrfs_node_key_to_cpu(child, &child_key, 0);
1970
1971         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1972                 ret = -EINVAL;
1973                 fprintf(stderr,
1974                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1975                         parent_key.objectid, parent_key.type, parent_key.offset,
1976                         child_key.objectid, child_key.type, child_key.offset);
1977         }
1978         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1979                 ret = -EINVAL;
1980                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1981                         btrfs_node_blockptr(parent, slot),
1982                         btrfs_header_bytenr(child));
1983         }
1984         if (btrfs_node_ptr_generation(parent, slot) !=
1985             btrfs_header_generation(child)) {
1986                 ret = -EINVAL;
1987                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1988                         btrfs_header_generation(child),
1989                         btrfs_node_ptr_generation(parent, slot));
1990         }
1991         return ret;
1992 }
1993
1994 struct node_refs {
1995         u64 bytenr[BTRFS_MAX_LEVEL];
1996         u64 refs[BTRFS_MAX_LEVEL];
1997 };
1998
1999 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2000                           struct walk_control *wc, int *level,
2001                           struct node_refs *nrefs)
2002 {
2003         enum btrfs_tree_block_status status;
2004         u64 bytenr;
2005         u64 ptr_gen;
2006         struct extent_buffer *next;
2007         struct extent_buffer *cur;
2008         u32 blocksize;
2009         int ret, err = 0;
2010         u64 refs;
2011
2012         WARN_ON(*level < 0);
2013         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2014
2015         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2016                 refs = nrefs->refs[*level];
2017                 ret = 0;
2018         } else {
2019                 ret = btrfs_lookup_extent_info(NULL, root,
2020                                        path->nodes[*level]->start,
2021                                        *level, 1, &refs, NULL);
2022                 if (ret < 0) {
2023                         err = ret;
2024                         goto out;
2025                 }
2026                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2027                 nrefs->refs[*level] = refs;
2028         }
2029
2030         if (refs > 1) {
2031                 ret = enter_shared_node(root, path->nodes[*level]->start,
2032                                         refs, wc, *level);
2033                 if (ret > 0) {
2034                         err = ret;
2035                         goto out;
2036                 }
2037         }
2038
2039         while (*level >= 0) {
2040                 WARN_ON(*level < 0);
2041                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2042                 cur = path->nodes[*level];
2043
2044                 if (btrfs_header_level(cur) != *level)
2045                         WARN_ON(1);
2046
2047                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2048                         break;
2049                 if (*level == 0) {
2050                         ret = process_one_leaf(root, cur, wc);
2051                         if (ret < 0)
2052                                 err = ret;
2053                         break;
2054                 }
2055                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2056                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2057                 blocksize = root->nodesize;
2058
2059                 if (bytenr == nrefs->bytenr[*level - 1]) {
2060                         refs = nrefs->refs[*level - 1];
2061                 } else {
2062                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2063                                         *level - 1, 1, &refs, NULL);
2064                         if (ret < 0) {
2065                                 refs = 0;
2066                         } else {
2067                                 nrefs->bytenr[*level - 1] = bytenr;
2068                                 nrefs->refs[*level - 1] = refs;
2069                         }
2070                 }
2071
2072                 if (refs > 1) {
2073                         ret = enter_shared_node(root, bytenr, refs,
2074                                                 wc, *level - 1);
2075                         if (ret > 0) {
2076                                 path->slots[*level]++;
2077                                 continue;
2078                         }
2079                 }
2080
2081                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2082                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2083                         free_extent_buffer(next);
2084                         reada_walk_down(root, cur, path->slots[*level]);
2085                         next = read_tree_block(root, bytenr, blocksize,
2086                                                ptr_gen);
2087                         if (!extent_buffer_uptodate(next)) {
2088                                 struct btrfs_key node_key;
2089
2090                                 btrfs_node_key_to_cpu(path->nodes[*level],
2091                                                       &node_key,
2092                                                       path->slots[*level]);
2093                                 btrfs_add_corrupt_extent_record(root->fs_info,
2094                                                 &node_key,
2095                                                 path->nodes[*level]->start,
2096                                                 root->nodesize, *level);
2097                                 err = -EIO;
2098                                 goto out;
2099                         }
2100                 }
2101
2102                 ret = check_child_node(root, cur, path->slots[*level], next);
2103                 if (ret) {
2104                         err = ret;
2105                         goto out;
2106                 }
2107
2108                 if (btrfs_is_leaf(next))
2109                         status = btrfs_check_leaf(root, NULL, next);
2110                 else
2111                         status = btrfs_check_node(root, NULL, next);
2112                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2113                         free_extent_buffer(next);
2114                         err = -EIO;
2115                         goto out;
2116                 }
2117
2118                 *level = *level - 1;
2119                 free_extent_buffer(path->nodes[*level]);
2120                 path->nodes[*level] = next;
2121                 path->slots[*level] = 0;
2122         }
2123 out:
2124         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2125         return err;
2126 }
2127
2128 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2129                         struct walk_control *wc, int *level)
2130 {
2131         int i;
2132         struct extent_buffer *leaf;
2133
2134         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2135                 leaf = path->nodes[i];
2136                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2137                         path->slots[i]++;
2138                         *level = i;
2139                         return 0;
2140                 } else {
2141                         free_extent_buffer(path->nodes[*level]);
2142                         path->nodes[*level] = NULL;
2143                         BUG_ON(*level > wc->active_node);
2144                         if (*level == wc->active_node)
2145                                 leave_shared_node(root, wc, *level);
2146                         *level = i + 1;
2147                 }
2148         }
2149         return 1;
2150 }
2151
2152 static int check_root_dir(struct inode_record *rec)
2153 {
2154         struct inode_backref *backref;
2155         int ret = -1;
2156
2157         if (!rec->found_inode_item || rec->errors)
2158                 goto out;
2159         if (rec->nlink != 1 || rec->found_link != 0)
2160                 goto out;
2161         if (list_empty(&rec->backrefs))
2162                 goto out;
2163         backref = to_inode_backref(rec->backrefs.next);
2164         if (!backref->found_inode_ref)
2165                 goto out;
2166         if (backref->index != 0 || backref->namelen != 2 ||
2167             memcmp(backref->name, "..", 2))
2168                 goto out;
2169         if (backref->found_dir_index || backref->found_dir_item)
2170                 goto out;
2171         ret = 0;
2172 out:
2173         return ret;
2174 }
2175
2176 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2177                               struct btrfs_root *root, struct btrfs_path *path,
2178                               struct inode_record *rec)
2179 {
2180         struct btrfs_inode_item *ei;
2181         struct btrfs_key key;
2182         int ret;
2183
2184         key.objectid = rec->ino;
2185         key.type = BTRFS_INODE_ITEM_KEY;
2186         key.offset = (u64)-1;
2187
2188         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2189         if (ret < 0)
2190                 goto out;
2191         if (ret) {
2192                 if (!path->slots[0]) {
2193                         ret = -ENOENT;
2194                         goto out;
2195                 }
2196                 path->slots[0]--;
2197                 ret = 0;
2198         }
2199         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2200         if (key.objectid != rec->ino) {
2201                 ret = -ENOENT;
2202                 goto out;
2203         }
2204
2205         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2206                             struct btrfs_inode_item);
2207         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2208         btrfs_mark_buffer_dirty(path->nodes[0]);
2209         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2210         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2211                root->root_key.objectid);
2212 out:
2213         btrfs_release_path(path);
2214         return ret;
2215 }
2216
2217 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2218                                     struct btrfs_root *root,
2219                                     struct btrfs_path *path,
2220                                     struct inode_record *rec)
2221 {
2222         int ret;
2223
2224         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2225         btrfs_release_path(path);
2226         if (!ret)
2227                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2228         return ret;
2229 }
2230
2231 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2232                                struct btrfs_root *root,
2233                                struct btrfs_path *path,
2234                                struct inode_record *rec)
2235 {
2236         struct btrfs_inode_item *ei;
2237         struct btrfs_key key;
2238         int ret = 0;
2239
2240         key.objectid = rec->ino;
2241         key.type = BTRFS_INODE_ITEM_KEY;
2242         key.offset = 0;
2243
2244         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2245         if (ret) {
2246                 if (ret > 0)
2247                         ret = -ENOENT;
2248                 goto out;
2249         }
2250
2251         /* Since ret == 0, no need to check anything */
2252         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2253                             struct btrfs_inode_item);
2254         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2255         btrfs_mark_buffer_dirty(path->nodes[0]);
2256         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2257         printf("reset nbytes for ino %llu root %llu\n",
2258                rec->ino, root->root_key.objectid);
2259 out:
2260         btrfs_release_path(path);
2261         return ret;
2262 }
2263
2264 static int add_missing_dir_index(struct btrfs_root *root,
2265                                  struct cache_tree *inode_cache,
2266                                  struct inode_record *rec,
2267                                  struct inode_backref *backref)
2268 {
2269         struct btrfs_path *path;
2270         struct btrfs_trans_handle *trans;
2271         struct btrfs_dir_item *dir_item;
2272         struct extent_buffer *leaf;
2273         struct btrfs_key key;
2274         struct btrfs_disk_key disk_key;
2275         struct inode_record *dir_rec;
2276         unsigned long name_ptr;
2277         u32 data_size = sizeof(*dir_item) + backref->namelen;
2278         int ret;
2279
2280         path = btrfs_alloc_path();
2281         if (!path)
2282                 return -ENOMEM;
2283
2284         trans = btrfs_start_transaction(root, 1);
2285         if (IS_ERR(trans)) {
2286                 btrfs_free_path(path);
2287                 return PTR_ERR(trans);
2288         }
2289
2290         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2291                 (unsigned long long)rec->ino);
2292         key.objectid = backref->dir;
2293         key.type = BTRFS_DIR_INDEX_KEY;
2294         key.offset = backref->index;
2295
2296         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2297         BUG_ON(ret);
2298
2299         leaf = path->nodes[0];
2300         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2301
2302         disk_key.objectid = cpu_to_le64(rec->ino);
2303         disk_key.type = BTRFS_INODE_ITEM_KEY;
2304         disk_key.offset = 0;
2305
2306         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2307         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2308         btrfs_set_dir_data_len(leaf, dir_item, 0);
2309         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2310         name_ptr = (unsigned long)(dir_item + 1);
2311         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2312         btrfs_mark_buffer_dirty(leaf);
2313         btrfs_free_path(path);
2314         btrfs_commit_transaction(trans, root);
2315
2316         backref->found_dir_index = 1;
2317         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2318         BUG_ON(IS_ERR(dir_rec));
2319         if (!dir_rec)
2320                 return 0;
2321         dir_rec->found_size += backref->namelen;
2322         if (dir_rec->found_size == dir_rec->isize &&
2323             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2324                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2325         if (dir_rec->found_size != dir_rec->isize)
2326                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2327
2328         return 0;
2329 }
2330
2331 static int delete_dir_index(struct btrfs_root *root,
2332                             struct cache_tree *inode_cache,
2333                             struct inode_record *rec,
2334                             struct inode_backref *backref)
2335 {
2336         struct btrfs_trans_handle *trans;
2337         struct btrfs_dir_item *di;
2338         struct btrfs_path *path;
2339         int ret = 0;
2340
2341         path = btrfs_alloc_path();
2342         if (!path)
2343                 return -ENOMEM;
2344
2345         trans = btrfs_start_transaction(root, 1);
2346         if (IS_ERR(trans)) {
2347                 btrfs_free_path(path);
2348                 return PTR_ERR(trans);
2349         }
2350
2351
2352         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2353                 (unsigned long long)backref->dir,
2354                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2355                 (unsigned long long)root->objectid);
2356
2357         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2358                                     backref->name, backref->namelen,
2359                                     backref->index, -1);
2360         if (IS_ERR(di)) {
2361                 ret = PTR_ERR(di);
2362                 btrfs_free_path(path);
2363                 btrfs_commit_transaction(trans, root);
2364                 if (ret == -ENOENT)
2365                         return 0;
2366                 return ret;
2367         }
2368
2369         if (!di)
2370                 ret = btrfs_del_item(trans, root, path);
2371         else
2372                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2373         BUG_ON(ret);
2374         btrfs_free_path(path);
2375         btrfs_commit_transaction(trans, root);
2376         return ret;
2377 }
2378
2379 static int create_inode_item(struct btrfs_root *root,
2380                              struct inode_record *rec,
2381                              struct inode_backref *backref, int root_dir)
2382 {
2383         struct btrfs_trans_handle *trans;
2384         struct btrfs_inode_item inode_item;
2385         time_t now = time(NULL);
2386         int ret;
2387
2388         trans = btrfs_start_transaction(root, 1);
2389         if (IS_ERR(trans)) {
2390                 ret = PTR_ERR(trans);
2391                 return ret;
2392         }
2393
2394         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2395                 "be incomplete, please check permissions and content after "
2396                 "the fsck completes.\n", (unsigned long long)root->objectid,
2397                 (unsigned long long)rec->ino);
2398
2399         memset(&inode_item, 0, sizeof(inode_item));
2400         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2401         if (root_dir)
2402                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2403         else
2404                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2405         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2406         if (rec->found_dir_item) {
2407                 if (rec->found_file_extent)
2408                         fprintf(stderr, "root %llu inode %llu has both a dir "
2409                                 "item and extents, unsure if it is a dir or a "
2410                                 "regular file so setting it as a directory\n",
2411                                 (unsigned long long)root->objectid,
2412                                 (unsigned long long)rec->ino);
2413                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2414                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2415         } else if (!rec->found_dir_item) {
2416                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2417                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2418         }
2419         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2420         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2421         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2422         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2423         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2424         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2425         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2426         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2427
2428         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2429         BUG_ON(ret);
2430         btrfs_commit_transaction(trans, root);
2431         return 0;
2432 }
2433
2434 static int repair_inode_backrefs(struct btrfs_root *root,
2435                                  struct inode_record *rec,
2436                                  struct cache_tree *inode_cache,
2437                                  int delete)
2438 {
2439         struct inode_backref *tmp, *backref;
2440         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2441         int ret = 0;
2442         int repaired = 0;
2443
2444         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2445                 if (!delete && rec->ino == root_dirid) {
2446                         if (!rec->found_inode_item) {
2447                                 ret = create_inode_item(root, rec, backref, 1);
2448                                 if (ret)
2449                                         break;
2450                                 repaired++;
2451                         }
2452                 }
2453
2454                 /* Index 0 for root dir's are special, don't mess with it */
2455                 if (rec->ino == root_dirid && backref->index == 0)
2456                         continue;
2457
2458                 if (delete &&
2459                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2460                      (backref->found_dir_index && backref->found_inode_ref &&
2461                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2462                         ret = delete_dir_index(root, inode_cache, rec, backref);
2463                         if (ret)
2464                                 break;
2465                         repaired++;
2466                         list_del(&backref->list);
2467                         free(backref);
2468                 }
2469
2470                 if (!delete && !backref->found_dir_index &&
2471                     backref->found_dir_item && backref->found_inode_ref) {
2472                         ret = add_missing_dir_index(root, inode_cache, rec,
2473                                                     backref);
2474                         if (ret)
2475                                 break;
2476                         repaired++;
2477                         if (backref->found_dir_item &&
2478                             backref->found_dir_index &&
2479                             backref->found_dir_index) {
2480                                 if (!backref->errors &&
2481                                     backref->found_inode_ref) {
2482                                         list_del(&backref->list);
2483                                         free(backref);
2484                                 }
2485                         }
2486                 }
2487
2488                 if (!delete && (!backref->found_dir_index &&
2489                                 !backref->found_dir_item &&
2490                                 backref->found_inode_ref)) {
2491                         struct btrfs_trans_handle *trans;
2492                         struct btrfs_key location;
2493
2494                         ret = check_dir_conflict(root, backref->name,
2495                                                  backref->namelen,
2496                                                  backref->dir,
2497                                                  backref->index);
2498                         if (ret) {
2499                                 /*
2500                                  * let nlink fixing routine to handle it,
2501                                  * which can do it better.
2502                                  */
2503                                 ret = 0;
2504                                 break;
2505                         }
2506                         location.objectid = rec->ino;
2507                         location.type = BTRFS_INODE_ITEM_KEY;
2508                         location.offset = 0;
2509
2510                         trans = btrfs_start_transaction(root, 1);
2511                         if (IS_ERR(trans)) {
2512                                 ret = PTR_ERR(trans);
2513                                 break;
2514                         }
2515                         fprintf(stderr, "adding missing dir index/item pair "
2516                                 "for inode %llu\n",
2517                                 (unsigned long long)rec->ino);
2518                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2519                                                     backref->namelen,
2520                                                     backref->dir, &location,
2521                                                     imode_to_type(rec->imode),
2522                                                     backref->index);
2523                         BUG_ON(ret);
2524                         btrfs_commit_transaction(trans, root);
2525                         repaired++;
2526                 }
2527
2528                 if (!delete && (backref->found_inode_ref &&
2529                                 backref->found_dir_index &&
2530                                 backref->found_dir_item &&
2531                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2532                                 !rec->found_inode_item)) {
2533                         ret = create_inode_item(root, rec, backref, 0);
2534                         if (ret)
2535                                 break;
2536                         repaired++;
2537                 }
2538
2539         }
2540         return ret ? ret : repaired;
2541 }
2542
2543 /*
2544  * To determine the file type for nlink/inode_item repair
2545  *
2546  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2547  * Return -ENOENT if file type is not found.
2548  */
2549 static int find_file_type(struct inode_record *rec, u8 *type)
2550 {
2551         struct inode_backref *backref;
2552
2553         /* For inode item recovered case */
2554         if (rec->found_inode_item) {
2555                 *type = imode_to_type(rec->imode);
2556                 return 0;
2557         }
2558
2559         list_for_each_entry(backref, &rec->backrefs, list) {
2560                 if (backref->found_dir_index || backref->found_dir_item) {
2561                         *type = backref->filetype;
2562                         return 0;
2563                 }
2564         }
2565         return -ENOENT;
2566 }
2567
2568 /*
2569  * To determine the file name for nlink repair
2570  *
2571  * Return 0 if file name is found, set name and namelen.
2572  * Return -ENOENT if file name is not found.
2573  */
2574 static int find_file_name(struct inode_record *rec,
2575                           char *name, int *namelen)
2576 {
2577         struct inode_backref *backref;
2578
2579         list_for_each_entry(backref, &rec->backrefs, list) {
2580                 if (backref->found_dir_index || backref->found_dir_item ||
2581                     backref->found_inode_ref) {
2582                         memcpy(name, backref->name, backref->namelen);
2583                         *namelen = backref->namelen;
2584                         return 0;
2585                 }
2586         }
2587         return -ENOENT;
2588 }
2589
2590 /* Reset the nlink of the inode to the correct one */
2591 static int reset_nlink(struct btrfs_trans_handle *trans,
2592                        struct btrfs_root *root,
2593                        struct btrfs_path *path,
2594                        struct inode_record *rec)
2595 {
2596         struct inode_backref *backref;
2597         struct inode_backref *tmp;
2598         struct btrfs_key key;
2599         struct btrfs_inode_item *inode_item;
2600         int ret = 0;
2601
2602         /* We don't believe this either, reset it and iterate backref */
2603         rec->found_link = 0;
2604
2605         /* Remove all backref including the valid ones */
2606         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2607                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2608                                    backref->index, backref->name,
2609                                    backref->namelen, 0);
2610                 if (ret < 0)
2611                         goto out;
2612
2613                 /* remove invalid backref, so it won't be added back */
2614                 if (!(backref->found_dir_index &&
2615                       backref->found_dir_item &&
2616                       backref->found_inode_ref)) {
2617                         list_del(&backref->list);
2618                         free(backref);
2619                 } else {
2620                         rec->found_link++;
2621                 }
2622         }
2623
2624         /* Set nlink to 0 */
2625         key.objectid = rec->ino;
2626         key.type = BTRFS_INODE_ITEM_KEY;
2627         key.offset = 0;
2628         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2629         if (ret < 0)
2630                 goto out;
2631         if (ret > 0) {
2632                 ret = -ENOENT;
2633                 goto out;
2634         }
2635         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2636                                     struct btrfs_inode_item);
2637         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2638         btrfs_mark_buffer_dirty(path->nodes[0]);
2639         btrfs_release_path(path);
2640
2641         /*
2642          * Add back valid inode_ref/dir_item/dir_index,
2643          * add_link() will handle the nlink inc, so new nlink must be correct
2644          */
2645         list_for_each_entry(backref, &rec->backrefs, list) {
2646                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2647                                      backref->name, backref->namelen,
2648                                      backref->filetype, &backref->index, 1);
2649                 if (ret < 0)
2650                         goto out;
2651         }
2652 out:
2653         btrfs_release_path(path);
2654         return ret;
2655 }
2656
2657 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2658                                struct btrfs_root *root,
2659                                struct btrfs_path *path,
2660                                struct inode_record *rec)
2661 {
2662         char *dir_name = "lost+found";
2663         char namebuf[BTRFS_NAME_LEN] = {0};
2664         u64 lost_found_ino;
2665         u32 mode = 0700;
2666         u8 type = 0;
2667         int namelen = 0;
2668         int name_recovered = 0;
2669         int type_recovered = 0;
2670         int ret = 0;
2671
2672         /*
2673          * Get file name and type first before these invalid inode ref
2674          * are deleted by remove_all_invalid_backref()
2675          */
2676         name_recovered = !find_file_name(rec, namebuf, &namelen);
2677         type_recovered = !find_file_type(rec, &type);
2678
2679         if (!name_recovered) {
2680                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2681                        rec->ino, rec->ino);
2682                 namelen = count_digits(rec->ino);
2683                 sprintf(namebuf, "%llu", rec->ino);
2684                 name_recovered = 1;
2685         }
2686         if (!type_recovered) {
2687                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2688                        rec->ino);
2689                 type = BTRFS_FT_REG_FILE;
2690                 type_recovered = 1;
2691         }
2692
2693         ret = reset_nlink(trans, root, path, rec);
2694         if (ret < 0) {
2695                 fprintf(stderr,
2696                         "Failed to reset nlink for inode %llu: %s\n",
2697                         rec->ino, strerror(-ret));
2698                 goto out;
2699         }
2700
2701         if (rec->found_link == 0) {
2702                 lost_found_ino = root->highest_inode;
2703                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2704                         ret = -EOVERFLOW;
2705                         goto out;
2706                 }
2707                 lost_found_ino++;
2708                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2709                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2710                                   mode);
2711                 if (ret < 0) {
2712                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2713                                 dir_name, strerror(-ret));
2714                         goto out;
2715                 }
2716                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2717                                      namebuf, namelen, type, NULL, 1);
2718                 /*
2719                  * Add ".INO" suffix several times to handle case where
2720                  * "FILENAME.INO" is already taken by another file.
2721                  */
2722                 while (ret == -EEXIST) {
2723                         /*
2724                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2725                          */
2726                         if (namelen + count_digits(rec->ino) + 1 >
2727                             BTRFS_NAME_LEN) {
2728                                 ret = -EFBIG;
2729                                 goto out;
2730                         }
2731                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2732                                  ".%llu", rec->ino);
2733                         namelen += count_digits(rec->ino) + 1;
2734                         ret = btrfs_add_link(trans, root, rec->ino,
2735                                              lost_found_ino, namebuf,
2736                                              namelen, type, NULL, 1);
2737                 }
2738                 if (ret < 0) {
2739                         fprintf(stderr,
2740                                 "Failed to link the inode %llu to %s dir: %s\n",
2741                                 rec->ino, dir_name, strerror(-ret));
2742                         goto out;
2743                 }
2744                 /*
2745                  * Just increase the found_link, don't actually add the
2746                  * backref. This will make things easier and this inode
2747                  * record will be freed after the repair is done.
2748                  * So fsck will not report problem about this inode.
2749                  */
2750                 rec->found_link++;
2751                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2752                        namelen, namebuf, dir_name);
2753         }
2754         printf("Fixed the nlink of inode %llu\n", rec->ino);
2755 out:
2756         /*
2757          * Clear the flag anyway, or we will loop forever for the same inode
2758          * as it will not be removed from the bad inode list and the dead loop
2759          * happens.
2760          */
2761         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2762         btrfs_release_path(path);
2763         return ret;
2764 }
2765
2766 /*
2767  * Check if there is any normal(reg or prealloc) file extent for given
2768  * ino.
2769  * This is used to determine the file type when neither its dir_index/item or
2770  * inode_item exists.
2771  *
2772  * This will *NOT* report error, if any error happens, just consider it does
2773  * not have any normal file extent.
2774  */
2775 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2776 {
2777         struct btrfs_path *path;
2778         struct btrfs_key key;
2779         struct btrfs_key found_key;
2780         struct btrfs_file_extent_item *fi;
2781         u8 type;
2782         int ret = 0;
2783
2784         path = btrfs_alloc_path();
2785         if (!path)
2786                 goto out;
2787         key.objectid = ino;
2788         key.type = BTRFS_EXTENT_DATA_KEY;
2789         key.offset = 0;
2790
2791         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2792         if (ret < 0) {
2793                 ret = 0;
2794                 goto out;
2795         }
2796         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2797                 ret = btrfs_next_leaf(root, path);
2798                 if (ret) {
2799                         ret = 0;
2800                         goto out;
2801                 }
2802         }
2803         while (1) {
2804                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2805                                       path->slots[0]);
2806                 if (found_key.objectid != ino ||
2807                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2808                         break;
2809                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2810                                     struct btrfs_file_extent_item);
2811                 type = btrfs_file_extent_type(path->nodes[0], fi);
2812                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2813                         ret = 1;
2814                         goto out;
2815                 }
2816         }
2817 out:
2818         btrfs_free_path(path);
2819         return ret;
2820 }
2821
2822 static u32 btrfs_type_to_imode(u8 type)
2823 {
2824         static u32 imode_by_btrfs_type[] = {
2825                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2826                 [BTRFS_FT_DIR]          = S_IFDIR,
2827                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2828                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2829                 [BTRFS_FT_FIFO]         = S_IFIFO,
2830                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2831                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2832         };
2833
2834         return imode_by_btrfs_type[(type)];
2835 }
2836
2837 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2838                                 struct btrfs_root *root,
2839                                 struct btrfs_path *path,
2840                                 struct inode_record *rec)
2841 {
2842         u8 filetype;
2843         u32 mode = 0700;
2844         int type_recovered = 0;
2845         int ret = 0;
2846
2847         printf("Trying to rebuild inode:%llu\n", rec->ino);
2848
2849         type_recovered = !find_file_type(rec, &filetype);
2850
2851         /*
2852          * Try to determine inode type if type not found.
2853          *
2854          * For found regular file extent, it must be FILE.
2855          * For found dir_item/index, it must be DIR.
2856          *
2857          * For undetermined one, use FILE as fallback.
2858          *
2859          * TODO:
2860          * 1. If found backref(inode_index/item is already handled) to it,
2861          *    it must be DIR.
2862          *    Need new inode-inode ref structure to allow search for that.
2863          */
2864         if (!type_recovered) {
2865                 if (rec->found_file_extent &&
2866                     find_normal_file_extent(root, rec->ino)) {
2867                         type_recovered = 1;
2868                         filetype = BTRFS_FT_REG_FILE;
2869                 } else if (rec->found_dir_item) {
2870                         type_recovered = 1;
2871                         filetype = BTRFS_FT_DIR;
2872                 } else if (!list_empty(&rec->orphan_extents)) {
2873                         type_recovered = 1;
2874                         filetype = BTRFS_FT_REG_FILE;
2875                 } else{
2876                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2877                                rec->ino);
2878                         type_recovered = 1;
2879                         filetype = BTRFS_FT_REG_FILE;
2880                 }
2881         }
2882
2883         ret = btrfs_new_inode(trans, root, rec->ino,
2884                               mode | btrfs_type_to_imode(filetype));
2885         if (ret < 0)
2886                 goto out;
2887
2888         /*
2889          * Here inode rebuild is done, we only rebuild the inode item,
2890          * don't repair the nlink(like move to lost+found).
2891          * That is the job of nlink repair.
2892          *
2893          * We just fill the record and return
2894          */
2895         rec->found_dir_item = 1;
2896         rec->imode = mode | btrfs_type_to_imode(filetype);
2897         rec->nlink = 0;
2898         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2899         /* Ensure the inode_nlinks repair function will be called */
2900         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2901 out:
2902         return ret;
2903 }
2904
2905 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2906                                       struct btrfs_root *root,
2907                                       struct btrfs_path *path,
2908                                       struct inode_record *rec)
2909 {
2910         struct orphan_data_extent *orphan;
2911         struct orphan_data_extent *tmp;
2912         int ret = 0;
2913
2914         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2915                 /*
2916                  * Check for conflicting file extents
2917                  *
2918                  * Here we don't know whether the extents is compressed or not,
2919                  * so we can only assume it not compressed nor data offset,
2920                  * and use its disk_len as extent length.
2921                  */
2922                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2923                                        orphan->offset, orphan->disk_len, 0);
2924                 btrfs_release_path(path);
2925                 if (ret < 0)
2926                         goto out;
2927                 if (!ret) {
2928                         fprintf(stderr,
2929                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2930                                 orphan->disk_bytenr, orphan->disk_len);
2931                         ret = btrfs_free_extent(trans,
2932                                         root->fs_info->extent_root,
2933                                         orphan->disk_bytenr, orphan->disk_len,
2934                                         0, root->objectid, orphan->objectid,
2935                                         orphan->offset);
2936                         if (ret < 0)
2937                                 goto out;
2938                 }
2939                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2940                                 orphan->offset, orphan->disk_bytenr,
2941                                 orphan->disk_len, orphan->disk_len);
2942                 if (ret < 0)
2943                         goto out;
2944
2945                 /* Update file size info */
2946                 rec->found_size += orphan->disk_len;
2947                 if (rec->found_size == rec->nbytes)
2948                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2949
2950                 /* Update the file extent hole info too */
2951                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2952                                            orphan->disk_len);
2953                 if (ret < 0)
2954                         goto out;
2955                 if (RB_EMPTY_ROOT(&rec->holes))
2956                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2957
2958                 list_del(&orphan->list);
2959                 free(orphan);
2960         }
2961         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2962 out:
2963         return ret;
2964 }
2965
2966 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2967                                         struct btrfs_root *root,
2968                                         struct btrfs_path *path,
2969                                         struct inode_record *rec)
2970 {
2971         struct rb_node *node;
2972         struct file_extent_hole *hole;
2973         int found = 0;
2974         int ret = 0;
2975
2976         node = rb_first(&rec->holes);
2977
2978         while (node) {
2979                 found = 1;
2980                 hole = rb_entry(node, struct file_extent_hole, node);
2981                 ret = btrfs_punch_hole(trans, root, rec->ino,
2982                                        hole->start, hole->len);
2983                 if (ret < 0)
2984                         goto out;
2985                 ret = del_file_extent_hole(&rec->holes, hole->start,
2986                                            hole->len);
2987                 if (ret < 0)
2988                         goto out;
2989                 if (RB_EMPTY_ROOT(&rec->holes))
2990                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2991                 node = rb_first(&rec->holes);
2992         }
2993         /* special case for a file losing all its file extent */
2994         if (!found) {
2995                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2996                                        round_up(rec->isize, root->sectorsize));
2997                 if (ret < 0)
2998                         goto out;
2999         }
3000         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3001                rec->ino, root->objectid);
3002 out:
3003         return ret;
3004 }
3005
3006 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3007 {
3008         struct btrfs_trans_handle *trans;
3009         struct btrfs_path *path;
3010         int ret = 0;
3011
3012         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3013                              I_ERR_NO_ORPHAN_ITEM |
3014                              I_ERR_LINK_COUNT_WRONG |
3015                              I_ERR_NO_INODE_ITEM |
3016                              I_ERR_FILE_EXTENT_ORPHAN |
3017                              I_ERR_FILE_EXTENT_DISCOUNT|
3018                              I_ERR_FILE_NBYTES_WRONG)))
3019                 return rec->errors;
3020
3021         path = btrfs_alloc_path();
3022         if (!path)
3023                 return -ENOMEM;
3024
3025         /*
3026          * For nlink repair, it may create a dir and add link, so
3027          * 2 for parent(256)'s dir_index and dir_item
3028          * 2 for lost+found dir's inode_item and inode_ref
3029          * 1 for the new inode_ref of the file
3030          * 2 for lost+found dir's dir_index and dir_item for the file
3031          */
3032         trans = btrfs_start_transaction(root, 7);
3033         if (IS_ERR(trans)) {
3034                 btrfs_free_path(path);
3035                 return PTR_ERR(trans);
3036         }
3037
3038         if (rec->errors & I_ERR_NO_INODE_ITEM)
3039                 ret = repair_inode_no_item(trans, root, path, rec);
3040         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3041                 ret = repair_inode_orphan_extent(trans, root, path, rec);
3042         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3043                 ret = repair_inode_discount_extent(trans, root, path, rec);
3044         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3045                 ret = repair_inode_isize(trans, root, path, rec);
3046         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3047                 ret = repair_inode_orphan_item(trans, root, path, rec);
3048         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3049                 ret = repair_inode_nlinks(trans, root, path, rec);
3050         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3051                 ret = repair_inode_nbytes(trans, root, path, rec);
3052         btrfs_commit_transaction(trans, root);
3053         btrfs_free_path(path);
3054         return ret;
3055 }
3056
3057 static int check_inode_recs(struct btrfs_root *root,
3058                             struct cache_tree *inode_cache)
3059 {
3060         struct cache_extent *cache;
3061         struct ptr_node *node;
3062         struct inode_record *rec;
3063         struct inode_backref *backref;
3064         int stage = 0;
3065         int ret = 0;
3066         int err = 0;
3067         u64 error = 0;
3068         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3069
3070         if (btrfs_root_refs(&root->root_item) == 0) {
3071                 if (!cache_tree_empty(inode_cache))
3072                         fprintf(stderr, "warning line %d\n", __LINE__);
3073                 return 0;
3074         }
3075
3076         /*
3077          * We need to record the highest inode number for later 'lost+found'
3078          * dir creation.
3079          * We must select an ino not used/referred by any existing inode, or
3080          * 'lost+found' ino may be a missing ino in a corrupted leaf,
3081          * this may cause 'lost+found' dir has wrong nlinks.
3082          */
3083         cache = last_cache_extent(inode_cache);
3084         if (cache) {
3085                 node = container_of(cache, struct ptr_node, cache);
3086                 rec = node->data;
3087                 if (rec->ino > root->highest_inode)
3088                         root->highest_inode = rec->ino;
3089         }
3090
3091         /*
3092          * We need to repair backrefs first because we could change some of the
3093          * errors in the inode recs.
3094          *
3095          * We also need to go through and delete invalid backrefs first and then
3096          * add the correct ones second.  We do this because we may get EEXIST
3097          * when adding back the correct index because we hadn't yet deleted the
3098          * invalid index.
3099          *
3100          * For example, if we were missing a dir index then the directories
3101          * isize would be wrong, so if we fixed the isize to what we thought it
3102          * would be and then fixed the backref we'd still have a invalid fs, so
3103          * we need to add back the dir index and then check to see if the isize
3104          * is still wrong.
3105          */
3106         while (stage < 3) {
3107                 stage++;
3108                 if (stage == 3 && !err)
3109                         break;
3110
3111                 cache = search_cache_extent(inode_cache, 0);
3112                 while (repair && cache) {
3113                         node = container_of(cache, struct ptr_node, cache);
3114                         rec = node->data;
3115                         cache = next_cache_extent(cache);
3116
3117                         /* Need to free everything up and rescan */
3118                         if (stage == 3) {
3119                                 remove_cache_extent(inode_cache, &node->cache);
3120                                 free(node);
3121                                 free_inode_rec(rec);
3122                                 continue;
3123                         }
3124
3125                         if (list_empty(&rec->backrefs))
3126                                 continue;
3127
3128                         ret = repair_inode_backrefs(root, rec, inode_cache,
3129                                                     stage == 1);
3130                         if (ret < 0) {
3131                                 err = ret;
3132                                 stage = 2;
3133                                 break;
3134                         } if (ret > 0) {
3135                                 err = -EAGAIN;
3136                         }
3137                 }
3138         }
3139         if (err)
3140                 return err;
3141
3142         rec = get_inode_rec(inode_cache, root_dirid, 0);
3143         BUG_ON(IS_ERR(rec));
3144         if (rec) {
3145                 ret = check_root_dir(rec);
3146                 if (ret) {
3147                         fprintf(stderr, "root %llu root dir %llu error\n",
3148                                 (unsigned long long)root->root_key.objectid,
3149                                 (unsigned long long)root_dirid);
3150                         print_inode_error(root, rec);
3151                         error++;
3152                 }
3153         } else {
3154                 if (repair) {
3155                         struct btrfs_trans_handle *trans;
3156
3157                         trans = btrfs_start_transaction(root, 1);
3158                         if (IS_ERR(trans)) {
3159                                 err = PTR_ERR(trans);
3160                                 return err;
3161                         }
3162
3163                         fprintf(stderr,
3164                                 "root %llu missing its root dir, recreating\n",
3165                                 (unsigned long long)root->objectid);
3166
3167                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3168                         BUG_ON(ret);
3169
3170                         btrfs_commit_transaction(trans, root);
3171                         return -EAGAIN;
3172                 }
3173
3174                 fprintf(stderr, "root %llu root dir %llu not found\n",
3175                         (unsigned long long)root->root_key.objectid,
3176                         (unsigned long long)root_dirid);
3177         }
3178
3179         while (1) {
3180                 cache = search_cache_extent(inode_cache, 0);
3181                 if (!cache)
3182                         break;
3183                 node = container_of(cache, struct ptr_node, cache);
3184                 rec = node->data;
3185                 remove_cache_extent(inode_cache, &node->cache);
3186                 free(node);
3187                 if (rec->ino == root_dirid ||
3188                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3189                         free_inode_rec(rec);
3190                         continue;
3191                 }
3192
3193                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3194                         ret = check_orphan_item(root, rec->ino);
3195                         if (ret == 0)
3196                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3197                         if (can_free_inode_rec(rec)) {
3198                                 free_inode_rec(rec);
3199                                 continue;
3200                         }
3201                 }
3202
3203                 if (!rec->found_inode_item)
3204                         rec->errors |= I_ERR_NO_INODE_ITEM;
3205                 if (rec->found_link != rec->nlink)
3206                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3207                 if (repair) {
3208                         ret = try_repair_inode(root, rec);
3209                         if (ret == 0 && can_free_inode_rec(rec)) {
3210                                 free_inode_rec(rec);
3211                                 continue;
3212                         }
3213                         ret = 0;
3214                 }
3215
3216                 if (!(repair && ret == 0))
3217                         error++;
3218                 print_inode_error(root, rec);
3219                 list_for_each_entry(backref, &rec->backrefs, list) {
3220                         if (!backref->found_dir_item)
3221                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3222                         if (!backref->found_dir_index)
3223                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3224                         if (!backref->found_inode_ref)
3225                                 backref->errors |= REF_ERR_NO_INODE_REF;
3226                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3227                                 " namelen %u name %s filetype %d errors %x",
3228                                 (unsigned long long)backref->dir,
3229                                 (unsigned long long)backref->index,
3230                                 backref->namelen, backref->name,
3231                                 backref->filetype, backref->errors);
3232                         print_ref_error(backref->errors);
3233                 }
3234                 free_inode_rec(rec);
3235         }
3236         return (error > 0) ? -1 : 0;
3237 }
3238
3239 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3240                                         u64 objectid)
3241 {
3242         struct cache_extent *cache;
3243         struct root_record *rec = NULL;
3244         int ret;
3245
3246         cache = lookup_cache_extent(root_cache, objectid, 1);
3247         if (cache) {
3248                 rec = container_of(cache, struct root_record, cache);
3249         } else {
3250                 rec = calloc(1, sizeof(*rec));
3251                 if (!rec)
3252                         return ERR_PTR(-ENOMEM);
3253                 rec->objectid = objectid;
3254                 INIT_LIST_HEAD(&rec->backrefs);
3255                 rec->cache.start = objectid;
3256                 rec->cache.size = 1;
3257
3258                 ret = insert_cache_extent(root_cache, &rec->cache);
3259                 if (ret)
3260                         return ERR_PTR(-EEXIST);
3261         }
3262         return rec;
3263 }
3264
3265 static struct root_backref *get_root_backref(struct root_record *rec,
3266                                              u64 ref_root, u64 dir, u64 index,
3267                                              const char *name, int namelen)
3268 {
3269         struct root_backref *backref;
3270
3271         list_for_each_entry(backref, &rec->backrefs, list) {
3272                 if (backref->ref_root != ref_root || backref->dir != dir ||
3273                     backref->namelen != namelen)
3274                         continue;
3275                 if (memcmp(name, backref->name, namelen))
3276                         continue;
3277                 return backref;
3278         }
3279
3280         backref = calloc(1, sizeof(*backref) + namelen + 1);
3281         if (!backref)
3282                 return NULL;
3283         backref->ref_root = ref_root;
3284         backref->dir = dir;
3285         backref->index = index;
3286         backref->namelen = namelen;
3287         memcpy(backref->name, name, namelen);
3288         backref->name[namelen] = '\0';
3289         list_add_tail(&backref->list, &rec->backrefs);
3290         return backref;
3291 }
3292
3293 static void free_root_record(struct cache_extent *cache)
3294 {
3295         struct root_record *rec;
3296         struct root_backref *backref;
3297
3298         rec = container_of(cache, struct root_record, cache);
3299         while (!list_empty(&rec->backrefs)) {
3300                 backref = to_root_backref(rec->backrefs.next);
3301                 list_del(&backref->list);
3302                 free(backref);
3303         }
3304
3305         kfree(rec);
3306 }
3307
3308 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3309
3310 static int add_root_backref(struct cache_tree *root_cache,
3311                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3312                             const char *name, int namelen,
3313                             int item_type, int errors)
3314 {
3315         struct root_record *rec;
3316         struct root_backref *backref;
3317
3318         rec = get_root_rec(root_cache, root_id);
3319         BUG_ON(IS_ERR(rec));
3320         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3321         BUG_ON(!backref);
3322
3323         backref->errors |= errors;
3324
3325         if (item_type != BTRFS_DIR_ITEM_KEY) {
3326                 if (backref->found_dir_index || backref->found_back_ref ||
3327                     backref->found_forward_ref) {
3328                         if (backref->index != index)
3329                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3330                 } else {
3331                         backref->index = index;
3332                 }
3333         }
3334
3335         if (item_type == BTRFS_DIR_ITEM_KEY) {
3336                 if (backref->found_forward_ref)
3337                         rec->found_ref++;
3338                 backref->found_dir_item = 1;
3339         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3340                 backref->found_dir_index = 1;
3341         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3342                 if (backref->found_forward_ref)
3343                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3344                 else if (backref->found_dir_item)
3345                         rec->found_ref++;
3346                 backref->found_forward_ref = 1;
3347         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3348                 if (backref->found_back_ref)
3349                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3350                 backref->found_back_ref = 1;
3351         } else {
3352                 BUG_ON(1);
3353         }
3354
3355         if (backref->found_forward_ref && backref->found_dir_item)
3356                 backref->reachable = 1;
3357         return 0;
3358 }
3359
3360 static int merge_root_recs(struct btrfs_root *root,
3361                            struct cache_tree *src_cache,
3362                            struct cache_tree *dst_cache)
3363 {
3364         struct cache_extent *cache;
3365         struct ptr_node *node;
3366         struct inode_record *rec;
3367         struct inode_backref *backref;
3368         int ret = 0;
3369
3370         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3371                 free_inode_recs_tree(src_cache);
3372                 return 0;
3373         }
3374
3375         while (1) {
3376                 cache = search_cache_extent(src_cache, 0);
3377                 if (!cache)
3378                         break;
3379                 node = container_of(cache, struct ptr_node, cache);
3380                 rec = node->data;
3381                 remove_cache_extent(src_cache, &node->cache);
3382                 free(node);
3383
3384                 ret = is_child_root(root, root->objectid, rec->ino);
3385                 if (ret < 0)
3386                         break;
3387                 else if (ret == 0)
3388                         goto skip;
3389
3390                 list_for_each_entry(backref, &rec->backrefs, list) {
3391                         BUG_ON(backref->found_inode_ref);
3392                         if (backref->found_dir_item)
3393                                 add_root_backref(dst_cache, rec->ino,
3394                                         root->root_key.objectid, backref->dir,
3395                                         backref->index, backref->name,
3396                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3397                                         backref->errors);
3398                         if (backref->found_dir_index)
3399                                 add_root_backref(dst_cache, rec->ino,
3400                                         root->root_key.objectid, backref->dir,
3401                                         backref->index, backref->name,
3402                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3403                                         backref->errors);
3404                 }
3405 skip:
3406                 free_inode_rec(rec);
3407         }
3408         if (ret < 0)
3409                 return ret;
3410         return 0;
3411 }
3412
3413 static int check_root_refs(struct btrfs_root *root,
3414                            struct cache_tree *root_cache)
3415 {
3416         struct root_record *rec;
3417         struct root_record *ref_root;
3418         struct root_backref *backref;
3419         struct cache_extent *cache;
3420         int loop = 1;
3421         int ret;
3422         int error;
3423         int errors = 0;
3424
3425         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3426         BUG_ON(IS_ERR(rec));
3427         rec->found_ref = 1;
3428
3429         /* fixme: this can not detect circular references */
3430         while (loop) {
3431                 loop = 0;
3432                 cache = search_cache_extent(root_cache, 0);
3433                 while (1) {
3434                         if (!cache)
3435                                 break;
3436                         rec = container_of(cache, struct root_record, cache);
3437                         cache = next_cache_extent(cache);
3438
3439                         if (rec->found_ref == 0)
3440                                 continue;
3441
3442                         list_for_each_entry(backref, &rec->backrefs, list) {
3443                                 if (!backref->reachable)
3444                                         continue;
3445
3446                                 ref_root = get_root_rec(root_cache,
3447                                                         backref->ref_root);
3448                                 BUG_ON(IS_ERR(ref_root));
3449                                 if (ref_root->found_ref > 0)
3450                                         continue;
3451
3452                                 backref->reachable = 0;
3453                                 rec->found_ref--;
3454                                 if (rec->found_ref == 0)
3455                                         loop = 1;
3456                         }
3457                 }
3458         }
3459
3460         cache = search_cache_extent(root_cache, 0);
3461         while (1) {
3462                 if (!cache)
3463                         break;
3464                 rec = container_of(cache, struct root_record, cache);
3465                 cache = next_cache_extent(cache);
3466
3467                 if (rec->found_ref == 0 &&
3468                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3469                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3470                         ret = check_orphan_item(root->fs_info->tree_root,
3471                                                 rec->objectid);
3472                         if (ret == 0)
3473                                 continue;
3474
3475                         /*
3476                          * If we don't have a root item then we likely just have
3477                          * a dir item in a snapshot for this root but no actual
3478                          * ref key or anything so it's meaningless.
3479                          */
3480                         if (!rec->found_root_item)
3481                                 continue;
3482                         errors++;
3483                         fprintf(stderr, "fs tree %llu not referenced\n",
3484                                 (unsigned long long)rec->objectid);
3485                 }
3486
3487                 error = 0;
3488                 if (rec->found_ref > 0 && !rec->found_root_item)
3489                         error = 1;
3490                 list_for_each_entry(backref, &rec->backrefs, list) {
3491                         if (!backref->found_dir_item)
3492                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3493                         if (!backref->found_dir_index)
3494                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3495                         if (!backref->found_back_ref)
3496                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3497                         if (!backref->found_forward_ref)
3498                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3499                         if (backref->reachable && backref->errors)
3500                                 error = 1;
3501                 }
3502                 if (!error)
3503                         continue;
3504
3505                 errors++;
3506                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3507                         (unsigned long long)rec->objectid, rec->found_ref,
3508                          rec->found_root_item ? "" : "not found");
3509
3510                 list_for_each_entry(backref, &rec->backrefs, list) {
3511                         if (!backref->reachable)
3512                                 continue;
3513                         if (!backref->errors && rec->found_root_item)
3514                                 continue;
3515                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3516                                 " index %llu namelen %u name %s errors %x\n",
3517                                 (unsigned long long)backref->ref_root,
3518                                 (unsigned long long)backref->dir,
3519                                 (unsigned long long)backref->index,
3520                                 backref->namelen, backref->name,
3521                                 backref->errors);
3522                         print_ref_error(backref->errors);
3523                 }
3524         }
3525         return errors > 0 ? 1 : 0;
3526 }
3527
3528 static int process_root_ref(struct extent_buffer *eb, int slot,
3529                             struct btrfs_key *key,
3530                             struct cache_tree *root_cache)
3531 {
3532         u64 dirid;
3533         u64 index;
3534         u32 len;
3535         u32 name_len;
3536         struct btrfs_root_ref *ref;
3537         char namebuf[BTRFS_NAME_LEN];
3538         int error;
3539
3540         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3541
3542         dirid = btrfs_root_ref_dirid(eb, ref);
3543         index = btrfs_root_ref_sequence(eb, ref);
3544         name_len = btrfs_root_ref_name_len(eb, ref);
3545
3546         if (name_len <= BTRFS_NAME_LEN) {
3547                 len = name_len;
3548                 error = 0;
3549         } else {
3550                 len = BTRFS_NAME_LEN;
3551                 error = REF_ERR_NAME_TOO_LONG;
3552         }
3553         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3554
3555         if (key->type == BTRFS_ROOT_REF_KEY) {
3556                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3557                                  index, namebuf, len, key->type, error);
3558         } else {
3559                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3560                                  index, namebuf, len, key->type, error);
3561         }
3562         return 0;
3563 }
3564
3565 static void free_corrupt_block(struct cache_extent *cache)
3566 {
3567         struct btrfs_corrupt_block *corrupt;
3568
3569         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3570         free(corrupt);
3571 }
3572
3573 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3574
3575 /*
3576  * Repair the btree of the given root.
3577  *
3578  * The fix is to remove the node key in corrupt_blocks cache_tree.
3579  * and rebalance the tree.
3580  * After the fix, the btree should be writeable.
3581  */
3582 static int repair_btree(struct btrfs_root *root,
3583                         struct cache_tree *corrupt_blocks)
3584 {
3585         struct btrfs_trans_handle *trans;
3586         struct btrfs_path *path;
3587         struct btrfs_corrupt_block *corrupt;
3588         struct cache_extent *cache;
3589         struct btrfs_key key;
3590         u64 offset;
3591         int level;
3592         int ret = 0;
3593
3594         if (cache_tree_empty(corrupt_blocks))
3595                 return 0;
3596
3597         path = btrfs_alloc_path();
3598         if (!path)
3599                 return -ENOMEM;
3600
3601         trans = btrfs_start_transaction(root, 1);
3602         if (IS_ERR(trans)) {
3603                 ret = PTR_ERR(trans);
3604                 fprintf(stderr, "Error starting transaction: %s\n",
3605                         strerror(-ret));
3606                 goto out_free_path;
3607         }
3608         cache = first_cache_extent(corrupt_blocks);
3609         while (cache) {
3610                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3611                                        cache);
3612                 level = corrupt->level;
3613                 path->lowest_level = level;
3614                 key.objectid = corrupt->key.objectid;
3615                 key.type = corrupt->key.type;
3616                 key.offset = corrupt->key.offset;
3617
3618                 /*
3619                  * Here we don't want to do any tree balance, since it may
3620                  * cause a balance with corrupted brother leaf/node,
3621                  * so ins_len set to 0 here.
3622                  * Balance will be done after all corrupt node/leaf is deleted.
3623                  */
3624                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3625                 if (ret < 0)
3626                         goto out;
3627                 offset = btrfs_node_blockptr(path->nodes[level],
3628                                              path->slots[level]);
3629
3630                 /* Remove the ptr */
3631                 ret = btrfs_del_ptr(trans, root, path, level,
3632                                     path->slots[level]);
3633                 if (ret < 0)
3634                         goto out;
3635                 /*
3636                  * Remove the corresponding extent
3637                  * return value is not concerned.
3638                  */
3639                 btrfs_release_path(path);
3640                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3641                                         0, root->root_key.objectid,
3642                                         level - 1, 0);
3643                 cache = next_cache_extent(cache);
3644         }
3645
3646         /* Balance the btree using btrfs_search_slot() */
3647         cache = first_cache_extent(corrupt_blocks);
3648         while (cache) {
3649                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3650                                        cache);
3651                 memcpy(&key, &corrupt->key, sizeof(key));
3652                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3653                 if (ret < 0)
3654                         goto out;
3655                 /* return will always >0 since it won't find the item */
3656                 ret = 0;
3657                 btrfs_release_path(path);
3658                 cache = next_cache_extent(cache);
3659         }
3660 out:
3661         btrfs_commit_transaction(trans, root);
3662 out_free_path:
3663         btrfs_free_path(path);
3664         return ret;
3665 }
3666
3667 static int check_fs_root(struct btrfs_root *root,
3668                          struct cache_tree *root_cache,
3669                          struct walk_control *wc)
3670 {
3671         int ret = 0;
3672         int err = 0;
3673         int wret;
3674         int level;
3675         struct btrfs_path path;
3676         struct shared_node root_node;
3677         struct root_record *rec;
3678         struct btrfs_root_item *root_item = &root->root_item;
3679         struct cache_tree corrupt_blocks;
3680         struct orphan_data_extent *orphan;
3681         struct orphan_data_extent *tmp;
3682         enum btrfs_tree_block_status status;
3683         struct node_refs nrefs;
3684
3685         /*
3686          * Reuse the corrupt_block cache tree to record corrupted tree block
3687          *
3688          * Unlike the usage in extent tree check, here we do it in a per
3689          * fs/subvol tree base.
3690          */
3691         cache_tree_init(&corrupt_blocks);
3692         root->fs_info->corrupt_blocks = &corrupt_blocks;
3693
3694         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3695                 rec = get_root_rec(root_cache, root->root_key.objectid);
3696                 BUG_ON(IS_ERR(rec));
3697                 if (btrfs_root_refs(root_item) > 0)
3698                         rec->found_root_item = 1;
3699         }
3700
3701         btrfs_init_path(&path);
3702         memset(&root_node, 0, sizeof(root_node));
3703         cache_tree_init(&root_node.root_cache);
3704         cache_tree_init(&root_node.inode_cache);
3705         memset(&nrefs, 0, sizeof(nrefs));
3706
3707         /* Move the orphan extent record to corresponding inode_record */
3708         list_for_each_entry_safe(orphan, tmp,
3709                                  &root->orphan_data_extents, list) {
3710                 struct inode_record *inode;
3711
3712                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3713                                       1);
3714                 BUG_ON(IS_ERR(inode));
3715                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3716                 list_move(&orphan->list, &inode->orphan_extents);
3717         }
3718
3719         level = btrfs_header_level(root->node);
3720         memset(wc->nodes, 0, sizeof(wc->nodes));
3721         wc->nodes[level] = &root_node;
3722         wc->active_node = level;
3723         wc->root_level = level;
3724
3725         /* We may not have checked the root block, lets do that now */
3726         if (btrfs_is_leaf(root->node))
3727                 status = btrfs_check_leaf(root, NULL, root->node);
3728         else
3729                 status = btrfs_check_node(root, NULL, root->node);
3730         if (status != BTRFS_TREE_BLOCK_CLEAN)
3731                 return -EIO;
3732
3733         if (btrfs_root_refs(root_item) > 0 ||
3734             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3735                 path.nodes[level] = root->node;
3736                 extent_buffer_get(root->node);
3737                 path.slots[level] = 0;
3738         } else {
3739                 struct btrfs_key key;
3740                 struct btrfs_disk_key found_key;
3741
3742                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3743                 level = root_item->drop_level;
3744                 path.lowest_level = level;
3745                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3746                 if (wret < 0)
3747                         goto skip_walking;
3748                 btrfs_node_key(path.nodes[level], &found_key,
3749                                 path.slots[level]);
3750                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3751                                         sizeof(found_key)));
3752         }
3753
3754         while (1) {
3755                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3756                 if (wret < 0)
3757                         ret = wret;
3758                 if (wret != 0)
3759                         break;
3760
3761                 wret = walk_up_tree(root, &path, wc, &level);
3762                 if (wret < 0)
3763                         ret = wret;
3764                 if (wret != 0)
3765                         break;
3766         }
3767 skip_walking:
3768         btrfs_release_path(&path);
3769
3770         if (!cache_tree_empty(&corrupt_blocks)) {
3771                 struct cache_extent *cache;
3772                 struct btrfs_corrupt_block *corrupt;
3773
3774                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3775                        root->root_key.objectid);
3776                 cache = first_cache_extent(&corrupt_blocks);
3777                 while (cache) {
3778                         corrupt = container_of(cache,
3779                                                struct btrfs_corrupt_block,
3780                                                cache);
3781                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3782                                cache->start, corrupt->level,
3783                                corrupt->key.objectid, corrupt->key.type,
3784                                corrupt->key.offset);
3785                         cache = next_cache_extent(cache);
3786                 }
3787                 if (repair) {
3788                         printf("Try to repair the btree for root %llu\n",
3789                                root->root_key.objectid);
3790                         ret = repair_btree(root, &corrupt_blocks);
3791                         if (ret < 0)
3792                                 fprintf(stderr, "Failed to repair btree: %s\n",
3793                                         strerror(-ret));
3794                         if (!ret)
3795                                 printf("Btree for root %llu is fixed\n",
3796                                        root->root_key.objectid);
3797                 }
3798         }
3799
3800         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3801         if (err < 0)
3802                 ret = err;
3803
3804         if (root_node.current) {
3805                 root_node.current->checked = 1;
3806                 maybe_free_inode_rec(&root_node.inode_cache,
3807                                 root_node.current);
3808         }
3809
3810         err = check_inode_recs(root, &root_node.inode_cache);
3811         if (!ret)
3812                 ret = err;
3813
3814         free_corrupt_blocks_tree(&corrupt_blocks);
3815         root->fs_info->corrupt_blocks = NULL;
3816         free_orphan_data_extents(&root->orphan_data_extents);
3817         return ret;
3818 }
3819
3820 static int fs_root_objectid(u64 objectid)
3821 {
3822         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3823             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3824                 return 1;
3825         return is_fstree(objectid);
3826 }
3827
3828 static int check_fs_roots(struct btrfs_root *root,
3829                           struct cache_tree *root_cache)
3830 {
3831         struct btrfs_path path;
3832         struct btrfs_key key;
3833         struct walk_control wc;
3834         struct extent_buffer *leaf, *tree_node;
3835         struct btrfs_root *tmp_root;
3836         struct btrfs_root *tree_root = root->fs_info->tree_root;
3837         int ret;
3838         int err = 0;
3839
3840         if (ctx.progress_enabled) {
3841                 ctx.tp = TASK_FS_ROOTS;
3842                 task_start(ctx.info);
3843         }
3844
3845         /*
3846          * Just in case we made any changes to the extent tree that weren't
3847          * reflected into the free space cache yet.
3848          */
3849         if (repair)
3850                 reset_cached_block_groups(root->fs_info);
3851         memset(&wc, 0, sizeof(wc));
3852         cache_tree_init(&wc.shared);
3853         btrfs_init_path(&path);
3854
3855 again:
3856         key.offset = 0;
3857         key.objectid = 0;
3858         key.type = BTRFS_ROOT_ITEM_KEY;
3859         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3860         if (ret < 0) {
3861                 err = 1;
3862                 goto out;
3863         }
3864         tree_node = tree_root->node;
3865         while (1) {
3866                 if (tree_node != tree_root->node) {
3867                         free_root_recs_tree(root_cache);
3868                         btrfs_release_path(&path);
3869                         goto again;
3870                 }
3871                 leaf = path.nodes[0];
3872                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3873                         ret = btrfs_next_leaf(tree_root, &path);
3874                         if (ret) {
3875                                 if (ret < 0)
3876                                         err = 1;
3877                                 break;
3878                         }
3879                         leaf = path.nodes[0];
3880                 }
3881                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3882                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3883                     fs_root_objectid(key.objectid)) {
3884                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3885                                 tmp_root = btrfs_read_fs_root_no_cache(
3886                                                 root->fs_info, &key);
3887                         } else {
3888                                 key.offset = (u64)-1;
3889                                 tmp_root = btrfs_read_fs_root(
3890                                                 root->fs_info, &key);
3891                         }
3892                         if (IS_ERR(tmp_root)) {
3893                                 err = 1;
3894                                 goto next;
3895                         }
3896                         ret = check_fs_root(tmp_root, root_cache, &wc);
3897                         if (ret == -EAGAIN) {
3898                                 free_root_recs_tree(root_cache);
3899                                 btrfs_release_path(&path);
3900                                 goto again;
3901                         }
3902                         if (ret)
3903                                 err = 1;
3904                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3905                                 btrfs_free_fs_root(tmp_root);
3906                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3907                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3908                         process_root_ref(leaf, path.slots[0], &key,
3909                                          root_cache);
3910                 }
3911 next:
3912                 path.slots[0]++;
3913         }
3914 out:
3915         btrfs_release_path(&path);
3916         if (err)
3917                 free_extent_cache_tree(&wc.shared);
3918         if (!cache_tree_empty(&wc.shared))
3919                 fprintf(stderr, "warning line %d\n", __LINE__);
3920
3921         task_stop(ctx.info);
3922
3923         return err;
3924 }
3925
3926 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3927 {
3928         struct rb_node *n;
3929         struct extent_backref *back;
3930         struct tree_backref *tback;
3931         struct data_backref *dback;
3932         u64 found = 0;
3933         int err = 0;
3934
3935         for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
3936                 back = rb_node_to_extent_backref(n);
3937                 if (!back->found_extent_tree) {
3938                         err = 1;
3939                         if (!print_errs)
3940                                 goto out;
3941                         if (back->is_data) {
3942                                 dback = to_data_backref(back);
3943                                 fprintf(stderr, "Backref %llu %s %llu"
3944                                         " owner %llu offset %llu num_refs %lu"
3945                                         " not found in extent tree\n",
3946                                         (unsigned long long)rec->start,
3947                                         back->full_backref ?
3948                                         "parent" : "root",
3949                                         back->full_backref ?
3950                                         (unsigned long long)dback->parent:
3951                                         (unsigned long long)dback->root,
3952                                         (unsigned long long)dback->owner,
3953                                         (unsigned long long)dback->offset,
3954                                         (unsigned long)dback->num_refs);
3955                         } else {
3956                                 tback = to_tree_backref(back);
3957                                 fprintf(stderr, "Backref %llu parent %llu"
3958                                         " root %llu not found in extent tree\n",
3959                                         (unsigned long long)rec->start,
3960                                         (unsigned long long)tback->parent,
3961                                         (unsigned long long)tback->root);
3962                         }
3963                 }
3964                 if (!back->is_data && !back->found_ref) {
3965                         err = 1;
3966                         if (!print_errs)
3967                                 goto out;
3968                         tback = to_tree_backref(back);
3969                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3970                                 (unsigned long long)rec->start,
3971                                 back->full_backref ? "parent" : "root",
3972                                 back->full_backref ?
3973                                 (unsigned long long)tback->parent :
3974                                 (unsigned long long)tback->root, back);
3975                 }
3976                 if (back->is_data) {
3977                         dback = to_data_backref(back);
3978                         if (dback->found_ref != dback->num_refs) {
3979                                 err = 1;
3980                                 if (!print_errs)
3981                                         goto out;
3982                                 fprintf(stderr, "Incorrect local backref count"
3983                                         " on %llu %s %llu owner %llu"
3984                                         " offset %llu found %u wanted %u back %p\n",
3985                                         (unsigned long long)rec->start,
3986                                         back->full_backref ?
3987                                         "parent" : "root",
3988                                         back->full_backref ?
3989                                         (unsigned long long)dback->parent:
3990                                         (unsigned long long)dback->root,
3991                                         (unsigned long long)dback->owner,
3992                                         (unsigned long long)dback->offset,
3993                                         dback->found_ref, dback->num_refs, back);
3994                         }
3995                         if (dback->disk_bytenr != rec->start) {
3996                                 err = 1;
3997                                 if (!print_errs)
3998                                         goto out;
3999                                 fprintf(stderr, "Backref disk bytenr does not"
4000                                         " match extent record, bytenr=%llu, "
4001                                         "ref bytenr=%llu\n",
4002                                         (unsigned long long)rec->start,
4003                                         (unsigned long long)dback->disk_bytenr);
4004                         }
4005
4006                         if (dback->bytes != rec->nr) {
4007                                 err = 1;
4008                                 if (!print_errs)
4009                                         goto out;
4010                                 fprintf(stderr, "Backref bytes do not match "
4011                                         "extent backref, bytenr=%llu, ref "
4012                                         "bytes=%llu, backref bytes=%llu\n",
4013                                         (unsigned long long)rec->start,
4014                                         (unsigned long long)rec->nr,
4015                                         (unsigned long long)dback->bytes);
4016                         }
4017                 }
4018                 if (!back->is_data) {
4019                         found += 1;
4020                 } else {
4021                         dback = to_data_backref(back);
4022                         found += dback->found_ref;
4023                 }
4024         }
4025         if (found != rec->refs) {
4026                 err = 1;
4027                 if (!print_errs)
4028                         goto out;
4029                 fprintf(stderr, "Incorrect global backref count "
4030                         "on %llu found %llu wanted %llu\n",
4031                         (unsigned long long)rec->start,
4032                         (unsigned long long)found,
4033                         (unsigned long long)rec->refs);
4034         }
4035 out:
4036         return err;
4037 }
4038
4039 static void __free_one_backref(struct rb_node *node)
4040 {
4041         struct extent_backref *back = rb_node_to_extent_backref(node);
4042
4043         free(back);
4044 }
4045
4046 static void free_all_extent_backrefs(struct extent_record *rec)
4047 {
4048         rb_free_nodes(&rec->backref_tree, __free_one_backref);
4049 }
4050
4051 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4052                                      struct cache_tree *extent_cache)
4053 {
4054         struct cache_extent *cache;
4055         struct extent_record *rec;
4056
4057         while (1) {
4058                 cache = first_cache_extent(extent_cache);
4059                 if (!cache)
4060                         break;
4061                 rec = container_of(cache, struct extent_record, cache);
4062                 remove_cache_extent(extent_cache, cache);
4063                 free_all_extent_backrefs(rec);
4064                 free(rec);
4065         }
4066 }
4067
4068 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4069                                  struct extent_record *rec)
4070 {
4071         if (rec->content_checked && rec->owner_ref_checked &&
4072             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4073             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4074             !rec->bad_full_backref && !rec->crossing_stripes &&
4075             !rec->wrong_chunk_type) {
4076                 remove_cache_extent(extent_cache, &rec->cache);
4077                 free_all_extent_backrefs(rec);
4078                 list_del_init(&rec->list);
4079                 free(rec);
4080         }
4081         return 0;
4082 }
4083
4084 static int check_owner_ref(struct btrfs_root *root,
4085                             struct extent_record *rec,
4086                             struct extent_buffer *buf)
4087 {
4088         struct extent_backref *node, *tmp;
4089         struct tree_backref *back;
4090         struct btrfs_root *ref_root;
4091         struct btrfs_key key;
4092         struct btrfs_path path;
4093         struct extent_buffer *parent;
4094         int level;
4095         int found = 0;
4096         int ret;
4097
4098         rbtree_postorder_for_each_entry_safe(node, tmp,
4099                                              &rec->backref_tree, node) {
4100                 if (node->is_data)
4101                         continue;
4102                 if (!node->found_ref)
4103                         continue;
4104                 if (node->full_backref)
4105                         continue;
4106                 back = to_tree_backref(node);
4107                 if (btrfs_header_owner(buf) == back->root)
4108                         return 0;
4109         }
4110         BUG_ON(rec->is_root);
4111
4112         /* try to find the block by search corresponding fs tree */
4113         key.objectid = btrfs_header_owner(buf);
4114         key.type = BTRFS_ROOT_ITEM_KEY;
4115         key.offset = (u64)-1;
4116
4117         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4118         if (IS_ERR(ref_root))
4119                 return 1;
4120
4121         level = btrfs_header_level(buf);
4122         if (level == 0)
4123                 btrfs_item_key_to_cpu(buf, &key, 0);
4124         else
4125                 btrfs_node_key_to_cpu(buf, &key, 0);
4126
4127         btrfs_init_path(&path);
4128         path.lowest_level = level + 1;
4129         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4130         if (ret < 0)
4131                 return 0;
4132
4133         parent = path.nodes[level + 1];
4134         if (parent && buf->start == btrfs_node_blockptr(parent,
4135                                                         path.slots[level + 1]))
4136                 found = 1;
4137
4138         btrfs_release_path(&path);
4139         return found ? 0 : 1;
4140 }
4141
4142 static int is_extent_tree_record(struct extent_record *rec)
4143 {
4144         struct extent_backref *ref, *tmp;
4145         struct tree_backref *back;
4146         int is_extent = 0;
4147
4148         rbtree_postorder_for_each_entry_safe(ref, tmp,
4149                                              &rec->backref_tree, node) {
4150                 if (ref->is_data)
4151                         return 0;
4152                 back = to_tree_backref(ref);
4153                 if (ref->full_backref)
4154                         return 0;
4155                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4156                         is_extent = 1;
4157         }
4158         return is_extent;
4159 }
4160
4161
4162 static int record_bad_block_io(struct btrfs_fs_info *info,
4163                                struct cache_tree *extent_cache,
4164                                u64 start, u64 len)
4165 {
4166         struct extent_record *rec;
4167         struct cache_extent *cache;
4168         struct btrfs_key key;
4169
4170         cache = lookup_cache_extent(extent_cache, start, len);
4171         if (!cache)
4172                 return 0;
4173
4174         rec = container_of(cache, struct extent_record, cache);
4175         if (!is_extent_tree_record(rec))
4176                 return 0;
4177
4178         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4179         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4180 }
4181
4182 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4183                        struct extent_buffer *buf, int slot)
4184 {
4185         if (btrfs_header_level(buf)) {
4186                 struct btrfs_key_ptr ptr1, ptr2;
4187
4188                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4189                                    sizeof(struct btrfs_key_ptr));
4190                 read_extent_buffer(buf, &ptr2,
4191                                    btrfs_node_key_ptr_offset(slot + 1),
4192                                    sizeof(struct btrfs_key_ptr));
4193                 write_extent_buffer(buf, &ptr1,
4194                                     btrfs_node_key_ptr_offset(slot + 1),
4195                                     sizeof(struct btrfs_key_ptr));
4196                 write_extent_buffer(buf, &ptr2,
4197                                     btrfs_node_key_ptr_offset(slot),
4198                                     sizeof(struct btrfs_key_ptr));
4199                 if (slot == 0) {
4200                         struct btrfs_disk_key key;
4201                         btrfs_node_key(buf, &key, 0);
4202                         btrfs_fixup_low_keys(root, path, &key,
4203                                              btrfs_header_level(buf) + 1);
4204                 }
4205         } else {
4206                 struct btrfs_item *item1, *item2;
4207                 struct btrfs_key k1, k2;
4208                 char *item1_data, *item2_data;
4209                 u32 item1_offset, item2_offset, item1_size, item2_size;
4210
4211                 item1 = btrfs_item_nr(slot);
4212                 item2 = btrfs_item_nr(slot + 1);
4213                 btrfs_item_key_to_cpu(buf, &k1, slot);
4214                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4215                 item1_offset = btrfs_item_offset(buf, item1);
4216                 item2_offset = btrfs_item_offset(buf, item2);
4217                 item1_size = btrfs_item_size(buf, item1);
4218                 item2_size = btrfs_item_size(buf, item2);
4219
4220                 item1_data = malloc(item1_size);
4221                 if (!item1_data)
4222                         return -ENOMEM;
4223                 item2_data = malloc(item2_size);
4224                 if (!item2_data) {
4225                         free(item1_data);
4226                         return -ENOMEM;
4227                 }
4228
4229                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4230                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4231
4232                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4233                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4234                 free(item1_data);
4235                 free(item2_data);
4236
4237                 btrfs_set_item_offset(buf, item1, item2_offset);
4238                 btrfs_set_item_offset(buf, item2, item1_offset);
4239                 btrfs_set_item_size(buf, item1, item2_size);
4240                 btrfs_set_item_size(buf, item2, item1_size);
4241
4242                 path->slots[0] = slot;
4243                 btrfs_set_item_key_unsafe(root, path, &k2);
4244                 path->slots[0] = slot + 1;
4245                 btrfs_set_item_key_unsafe(root, path, &k1);
4246         }
4247         return 0;
4248 }
4249
4250 static int fix_key_order(struct btrfs_trans_handle *trans,
4251                          struct btrfs_root *root,
4252                          struct btrfs_path *path)
4253 {
4254         struct extent_buffer *buf;
4255         struct btrfs_key k1, k2;
4256         int i;
4257         int level = path->lowest_level;
4258         int ret = -EIO;
4259
4260         buf = path->nodes[level];
4261         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4262                 if (level) {
4263                         btrfs_node_key_to_cpu(buf, &k1, i);
4264                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4265                 } else {
4266                         btrfs_item_key_to_cpu(buf, &k1, i);
4267                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4268                 }
4269                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4270                         continue;
4271                 ret = swap_values(root, path, buf, i);
4272                 if (ret)
4273                         break;
4274                 btrfs_mark_buffer_dirty(buf);
4275                 i = 0;
4276         }
4277         return ret;
4278 }
4279
4280 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4281                              struct btrfs_root *root,
4282                              struct btrfs_path *path,
4283                              struct extent_buffer *buf, int slot)
4284 {
4285         struct btrfs_key key;
4286         int nritems = btrfs_header_nritems(buf);
4287
4288         btrfs_item_key_to_cpu(buf, &key, slot);
4289
4290         /* These are all the keys we can deal with missing. */
4291         if (key.type != BTRFS_DIR_INDEX_KEY &&
4292             key.type != BTRFS_EXTENT_ITEM_KEY &&
4293             key.type != BTRFS_METADATA_ITEM_KEY &&
4294             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4295             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4296                 return -1;
4297
4298         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4299                (unsigned long long)key.objectid, key.type,
4300                (unsigned long long)key.offset, slot, buf->start);
4301         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4302                               btrfs_item_nr_offset(slot + 1),
4303                               sizeof(struct btrfs_item) *
4304                               (nritems - slot - 1));
4305         btrfs_set_header_nritems(buf, nritems - 1);
4306         if (slot == 0) {
4307                 struct btrfs_disk_key disk_key;
4308
4309                 btrfs_item_key(buf, &disk_key, 0);
4310                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4311         }
4312         btrfs_mark_buffer_dirty(buf);
4313         return 0;
4314 }
4315
4316 static int fix_item_offset(struct btrfs_trans_handle *trans,
4317                            struct btrfs_root *root,
4318                            struct btrfs_path *path)
4319 {
4320         struct extent_buffer *buf;
4321         int i;
4322         int ret = 0;
4323
4324         /* We should only get this for leaves */
4325         BUG_ON(path->lowest_level);
4326         buf = path->nodes[0];
4327 again:
4328         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4329                 unsigned int shift = 0, offset;
4330
4331                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4332                     BTRFS_LEAF_DATA_SIZE(root)) {
4333                         if (btrfs_item_end_nr(buf, i) >
4334                             BTRFS_LEAF_DATA_SIZE(root)) {
4335                                 ret = delete_bogus_item(trans, root, path,
4336                                                         buf, i);
4337                                 if (!ret)
4338                                         goto again;
4339                                 fprintf(stderr, "item is off the end of the "
4340                                         "leaf, can't fix\n");
4341                                 ret = -EIO;
4342                                 break;
4343                         }
4344                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4345                                 btrfs_item_end_nr(buf, i);
4346                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4347                            btrfs_item_offset_nr(buf, i - 1)) {
4348                         if (btrfs_item_end_nr(buf, i) >
4349                             btrfs_item_offset_nr(buf, i - 1)) {
4350                                 ret = delete_bogus_item(trans, root, path,
4351                                                         buf, i);
4352                                 if (!ret)
4353                                         goto again;
4354                                 fprintf(stderr, "items overlap, can't fix\n");
4355                                 ret = -EIO;
4356                                 break;
4357                         }
4358                         shift = btrfs_item_offset_nr(buf, i - 1) -
4359                                 btrfs_item_end_nr(buf, i);
4360                 }
4361                 if (!shift)
4362                         continue;
4363
4364                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4365                        i, shift, (unsigned long long)buf->start);
4366                 offset = btrfs_item_offset_nr(buf, i);
4367                 memmove_extent_buffer(buf,
4368                                       btrfs_leaf_data(buf) + offset + shift,
4369                                       btrfs_leaf_data(buf) + offset,
4370                                       btrfs_item_size_nr(buf, i));
4371                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4372                                       offset + shift);
4373                 btrfs_mark_buffer_dirty(buf);
4374         }
4375
4376         /*
4377          * We may have moved things, in which case we want to exit so we don't
4378          * write those changes out.  Once we have proper abort functionality in
4379          * progs this can be changed to something nicer.
4380          */
4381         BUG_ON(ret);
4382         return ret;
4383 }
4384
4385 /*
4386  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4387  * then just return -EIO.
4388  */
4389 static int try_to_fix_bad_block(struct btrfs_root *root,
4390                                 struct extent_buffer *buf,
4391                                 enum btrfs_tree_block_status status)
4392 {
4393         struct btrfs_trans_handle *trans;
4394         struct ulist *roots;
4395         struct ulist_node *node;
4396         struct btrfs_root *search_root;
4397         struct btrfs_path *path;
4398         struct ulist_iterator iter;
4399         struct btrfs_key root_key, key;
4400         int ret;
4401
4402         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4403             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4404                 return -EIO;
4405
4406         path = btrfs_alloc_path();
4407         if (!path)
4408                 return -EIO;
4409
4410         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4411                                    0, &roots);
4412         if (ret) {
4413                 btrfs_free_path(path);
4414                 return -EIO;
4415         }
4416
4417         ULIST_ITER_INIT(&iter);
4418         while ((node = ulist_next(roots, &iter))) {
4419                 root_key.objectid = node->val;
4420                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4421                 root_key.offset = (u64)-1;
4422
4423                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4424                 if (IS_ERR(root)) {
4425                         ret = -EIO;
4426                         break;
4427                 }
4428
4429
4430                 trans = btrfs_start_transaction(search_root, 0);
4431                 if (IS_ERR(trans)) {
4432                         ret = PTR_ERR(trans);
4433                         break;
4434                 }
4435
4436                 path->lowest_level = btrfs_header_level(buf);
4437                 path->skip_check_block = 1;
4438                 if (path->lowest_level)
4439                         btrfs_node_key_to_cpu(buf, &key, 0);
4440                 else
4441                         btrfs_item_key_to_cpu(buf, &key, 0);
4442                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4443                 if (ret) {
4444                         ret = -EIO;
4445                         btrfs_commit_transaction(trans, search_root);
4446                         break;
4447                 }
4448                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4449                         ret = fix_key_order(trans, search_root, path);
4450                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4451                         ret = fix_item_offset(trans, search_root, path);
4452                 if (ret) {
4453                         btrfs_commit_transaction(trans, search_root);
4454                         break;
4455                 }
4456                 btrfs_release_path(path);
4457                 btrfs_commit_transaction(trans, search_root);
4458         }
4459         ulist_free(roots);
4460         btrfs_free_path(path);
4461         return ret;
4462 }
4463
4464 static int check_block(struct btrfs_root *root,
4465                        struct cache_tree *extent_cache,
4466                        struct extent_buffer *buf, u64 flags)
4467 {
4468         struct extent_record *rec;
4469         struct cache_extent *cache;
4470         struct btrfs_key key;
4471         enum btrfs_tree_block_status status;
4472         int ret = 0;
4473         int level;
4474
4475         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4476         if (!cache)
4477                 return 1;
4478         rec = container_of(cache, struct extent_record, cache);
4479         rec->generation = btrfs_header_generation(buf);
4480
4481         level = btrfs_header_level(buf);
4482         if (btrfs_header_nritems(buf) > 0) {
4483
4484                 if (level == 0)
4485                         btrfs_item_key_to_cpu(buf, &key, 0);
4486                 else
4487                         btrfs_node_key_to_cpu(buf, &key, 0);
4488
4489                 rec->info_objectid = key.objectid;
4490         }
4491         rec->info_level = level;
4492
4493         if (btrfs_is_leaf(buf))
4494                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4495         else
4496                 status = btrfs_check_node(root, &rec->parent_key, buf);
4497
4498         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4499                 if (repair)
4500                         status = try_to_fix_bad_block(root, buf, status);
4501                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4502                         ret = -EIO;
4503                         fprintf(stderr, "bad block %llu\n",
4504                                 (unsigned long long)buf->start);
4505                 } else {
4506                         /*
4507                          * Signal to callers we need to start the scan over
4508                          * again since we'll have cowed blocks.
4509                          */
4510                         ret = -EAGAIN;
4511                 }
4512         } else {
4513                 rec->content_checked = 1;
4514                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4515                         rec->owner_ref_checked = 1;
4516                 else {
4517                         ret = check_owner_ref(root, rec, buf);
4518                         if (!ret)
4519                                 rec->owner_ref_checked = 1;
4520                 }
4521         }
4522         if (!ret)
4523                 maybe_free_extent_rec(extent_cache, rec);
4524         return ret;
4525 }
4526
4527
4528 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4529                                                 u64 parent, u64 root)
4530 {
4531         struct rb_node *node;
4532         struct tree_backref *back = NULL;
4533         struct tree_backref match = {
4534                 .node = {
4535                         .is_data = 0,
4536                 },
4537         };
4538
4539         if (parent) {
4540                 match.parent = parent;
4541                 match.node.full_backref = 1;
4542         } else {
4543                 match.root = root;
4544         }
4545
4546         node = rb_search(&rec->backref_tree, &match.node.node,
4547                          (rb_compare_keys)compare_extent_backref, NULL);
4548         if (node)
4549                 back = to_tree_backref(rb_node_to_extent_backref(node));
4550
4551         return back;
4552 }
4553
4554 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4555                                                 u64 parent, u64 root)
4556 {
4557         struct tree_backref *ref = malloc(sizeof(*ref));
4558
4559         if (!ref)
4560                 return NULL;
4561         memset(&ref->node, 0, sizeof(ref->node));
4562         if (parent > 0) {
4563                 ref->parent = parent;
4564                 ref->node.full_backref = 1;
4565         } else {
4566                 ref->root = root;
4567                 ref->node.full_backref = 0;
4568         }
4569         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4570
4571         return ref;
4572 }
4573
4574 static struct data_backref *find_data_backref(struct extent_record *rec,
4575                                                 u64 parent, u64 root,
4576                                                 u64 owner, u64 offset,
4577                                                 int found_ref,
4578                                                 u64 disk_bytenr, u64 bytes)
4579 {
4580         struct rb_node *node;
4581         struct data_backref *back = NULL;
4582         struct data_backref match = {
4583                 .node = {
4584                         .is_data = 1,
4585                 },
4586                 .owner = owner,
4587                 .offset = offset,
4588                 .bytes = bytes,
4589                 .found_ref = found_ref,
4590                 .disk_bytenr = disk_bytenr,
4591         };
4592
4593         if (parent) {
4594                 match.parent = parent;
4595                 match.node.full_backref = 1;
4596         } else {
4597                 match.root = root;
4598         }
4599
4600         node = rb_search(&rec->backref_tree, &match.node.node,
4601                          (rb_compare_keys)compare_extent_backref, NULL);
4602         if (node)
4603                 back = to_data_backref(rb_node_to_extent_backref(node));
4604
4605         return back;
4606 }
4607
4608 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4609                                                 u64 parent, u64 root,
4610                                                 u64 owner, u64 offset,
4611                                                 u64 max_size)
4612 {
4613         struct data_backref *ref = malloc(sizeof(*ref));
4614
4615         if (!ref)
4616                 return NULL;
4617         memset(&ref->node, 0, sizeof(ref->node));
4618         ref->node.is_data = 1;
4619
4620         if (parent > 0) {
4621                 ref->parent = parent;
4622                 ref->owner = 0;
4623                 ref->offset = 0;
4624                 ref->node.full_backref = 1;
4625         } else {
4626                 ref->root = root;
4627                 ref->owner = owner;
4628                 ref->offset = offset;
4629                 ref->node.full_backref = 0;
4630         }
4631         ref->bytes = max_size;
4632         ref->found_ref = 0;
4633         ref->num_refs = 0;
4634         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4635         if (max_size > rec->max_size)
4636                 rec->max_size = max_size;
4637         return ref;
4638 }
4639
4640 /* Check if the type of extent matches with its chunk */
4641 static void check_extent_type(struct extent_record *rec)
4642 {
4643         struct btrfs_block_group_cache *bg_cache;
4644
4645         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4646         if (!bg_cache)
4647                 return;
4648
4649         /* data extent, check chunk directly*/
4650         if (!rec->metadata) {
4651                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4652                         rec->wrong_chunk_type = 1;
4653                 return;
4654         }
4655
4656         /* metadata extent, check the obvious case first */
4657         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4658                                  BTRFS_BLOCK_GROUP_METADATA))) {
4659                 rec->wrong_chunk_type = 1;
4660                 return;
4661         }
4662
4663         /*
4664          * Check SYSTEM extent, as it's also marked as metadata, we can only
4665          * make sure it's a SYSTEM extent by its backref
4666          */
4667         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4668                 struct extent_backref *node;
4669                 struct tree_backref *tback;
4670                 u64 bg_type;
4671
4672                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4673                 if (node->is_data) {
4674                         /* tree block shouldn't have data backref */
4675                         rec->wrong_chunk_type = 1;
4676                         return;
4677                 }
4678                 tback = container_of(node, struct tree_backref, node);
4679
4680                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4681                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4682                 else
4683                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4684                 if (!(bg_cache->flags & bg_type))
4685                         rec->wrong_chunk_type = 1;
4686         }
4687 }
4688
4689 /*
4690  * Allocate a new extent record, fill default values from @tmpl and insert int
4691  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4692  * the cache, otherwise it fails.
4693  */
4694 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4695                 struct extent_record *tmpl)
4696 {
4697         struct extent_record *rec;
4698         int ret = 0;
4699
4700         rec = malloc(sizeof(*rec));
4701         if (!rec)
4702                 return -ENOMEM;
4703         rec->start = tmpl->start;
4704         rec->max_size = tmpl->max_size;
4705         rec->nr = max(tmpl->nr, tmpl->max_size);
4706         rec->found_rec = tmpl->found_rec;
4707         rec->content_checked = tmpl->content_checked;
4708         rec->owner_ref_checked = tmpl->owner_ref_checked;
4709         rec->num_duplicates = 0;
4710         rec->metadata = tmpl->metadata;
4711         rec->flag_block_full_backref = FLAG_UNSET;
4712         rec->bad_full_backref = 0;
4713         rec->crossing_stripes = 0;
4714         rec->wrong_chunk_type = 0;
4715         rec->is_root = tmpl->is_root;
4716         rec->refs = tmpl->refs;
4717         rec->extent_item_refs = tmpl->extent_item_refs;
4718         rec->parent_generation = tmpl->parent_generation;
4719         INIT_LIST_HEAD(&rec->backrefs);
4720         INIT_LIST_HEAD(&rec->dups);
4721         INIT_LIST_HEAD(&rec->list);
4722         rec->backref_tree = RB_ROOT;
4723         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4724         rec->cache.start = tmpl->start;
4725         rec->cache.size = tmpl->nr;
4726         ret = insert_cache_extent(extent_cache, &rec->cache);
4727         BUG_ON(ret);
4728         bytes_used += rec->nr;
4729
4730         if (tmpl->metadata)
4731                 rec->crossing_stripes = check_crossing_stripes(rec->start,
4732                                 global_info->tree_root->nodesize);
4733         check_extent_type(rec);
4734         return ret;
4735 }
4736
4737 /*
4738  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4739  * some are hints:
4740  * - refs              - if found, increase refs
4741  * - is_root           - if found, set
4742  * - content_checked   - if found, set
4743  * - owner_ref_checked - if found, set
4744  *
4745  * If not found, create a new one, initialize and insert.
4746  */
4747 static int add_extent_rec(struct cache_tree *extent_cache,
4748                 struct extent_record *tmpl)
4749 {
4750         struct extent_record *rec;
4751         struct cache_extent *cache;
4752         int ret = 0;
4753         int dup = 0;
4754
4755         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4756         if (cache) {
4757                 rec = container_of(cache, struct extent_record, cache);
4758                 if (tmpl->refs)
4759                         rec->refs++;
4760                 if (rec->nr == 1)
4761                         rec->nr = max(tmpl->nr, tmpl->max_size);
4762
4763                 /*
4764                  * We need to make sure to reset nr to whatever the extent
4765                  * record says was the real size, this way we can compare it to
4766                  * the backrefs.
4767                  */
4768                 if (tmpl->found_rec) {
4769                         if (tmpl->start != rec->start || rec->found_rec) {
4770                                 struct extent_record *tmp;
4771
4772                                 dup = 1;
4773                                 if (list_empty(&rec->list))
4774                                         list_add_tail(&rec->list,
4775                                                       &duplicate_extents);
4776
4777                                 /*
4778                                  * We have to do this song and dance in case we
4779                                  * find an extent record that falls inside of
4780                                  * our current extent record but does not have
4781                                  * the same objectid.
4782                                  */
4783                                 tmp = malloc(sizeof(*tmp));
4784                                 if (!tmp)
4785                                         return -ENOMEM;
4786                                 tmp->start = tmpl->start;
4787                                 tmp->max_size = tmpl->max_size;
4788                                 tmp->nr = tmpl->nr;
4789                                 tmp->found_rec = 1;
4790                                 tmp->metadata = tmpl->metadata;
4791                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4792                                 INIT_LIST_HEAD(&tmp->list);
4793                                 list_add_tail(&tmp->list, &rec->dups);
4794                                 rec->num_duplicates++;
4795                         } else {
4796                                 rec->nr = tmpl->nr;
4797                                 rec->found_rec = 1;
4798                         }
4799                 }
4800
4801                 if (tmpl->extent_item_refs && !dup) {
4802                         if (rec->extent_item_refs) {
4803                                 fprintf(stderr, "block %llu rec "
4804                                         "extent_item_refs %llu, passed %llu\n",
4805                                         (unsigned long long)tmpl->start,
4806                                         (unsigned long long)
4807                                                         rec->extent_item_refs,
4808                                         (unsigned long long)tmpl->extent_item_refs);
4809                         }
4810                         rec->extent_item_refs = tmpl->extent_item_refs;
4811                 }
4812                 if (tmpl->is_root)
4813                         rec->is_root = 1;
4814                 if (tmpl->content_checked)
4815                         rec->content_checked = 1;
4816                 if (tmpl->owner_ref_checked)
4817                         rec->owner_ref_checked = 1;
4818                 memcpy(&rec->parent_key, &tmpl->parent_key,
4819                                 sizeof(tmpl->parent_key));
4820                 if (tmpl->parent_generation)
4821                         rec->parent_generation = tmpl->parent_generation;
4822                 if (rec->max_size < tmpl->max_size)
4823                         rec->max_size = tmpl->max_size;
4824
4825                 /*
4826                  * A metadata extent can't cross stripe_len boundary, otherwise
4827                  * kernel scrub won't be able to handle it.
4828                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4829                  * it.
4830                  */
4831                 if (tmpl->metadata)
4832                         rec->crossing_stripes = check_crossing_stripes(
4833                                 rec->start, global_info->tree_root->nodesize);
4834                 check_extent_type(rec);
4835                 maybe_free_extent_rec(extent_cache, rec);
4836                 return ret;
4837         }
4838
4839         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4840
4841         return ret;
4842 }
4843
4844 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4845                             u64 parent, u64 root, int found_ref)
4846 {
4847         struct extent_record *rec;
4848         struct tree_backref *back;
4849         struct cache_extent *cache;
4850
4851         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4852         if (!cache) {
4853                 struct extent_record tmpl;
4854
4855                 memset(&tmpl, 0, sizeof(tmpl));
4856                 tmpl.start = bytenr;
4857                 tmpl.nr = 1;
4858                 tmpl.metadata = 1;
4859
4860                 add_extent_rec_nolookup(extent_cache, &tmpl);
4861
4862                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4863                 if (!cache)
4864                         abort();
4865         }
4866
4867         rec = container_of(cache, struct extent_record, cache);
4868         if (rec->start != bytenr) {
4869                 abort();
4870         }
4871
4872         back = find_tree_backref(rec, parent, root);
4873         if (!back) {
4874                 back = alloc_tree_backref(rec, parent, root);
4875                 BUG_ON(!back);
4876         }
4877
4878         if (found_ref) {
4879                 if (back->node.found_ref) {
4880                         fprintf(stderr, "Extent back ref already exists "
4881                                 "for %llu parent %llu root %llu \n",
4882                                 (unsigned long long)bytenr,
4883                                 (unsigned long long)parent,
4884                                 (unsigned long long)root);
4885                 }
4886                 back->node.found_ref = 1;
4887         } else {
4888                 if (back->node.found_extent_tree) {
4889                         fprintf(stderr, "Extent back ref already exists "
4890                                 "for %llu parent %llu root %llu \n",
4891                                 (unsigned long long)bytenr,
4892                                 (unsigned long long)parent,
4893                                 (unsigned long long)root);
4894                 }
4895                 back->node.found_extent_tree = 1;
4896         }
4897         check_extent_type(rec);
4898         maybe_free_extent_rec(extent_cache, rec);
4899         return 0;
4900 }
4901
4902 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4903                             u64 parent, u64 root, u64 owner, u64 offset,
4904                             u32 num_refs, int found_ref, u64 max_size)
4905 {
4906         struct extent_record *rec;
4907         struct data_backref *back;
4908         struct cache_extent *cache;
4909
4910         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4911         if (!cache) {
4912                 struct extent_record tmpl;
4913
4914                 memset(&tmpl, 0, sizeof(tmpl));
4915                 tmpl.start = bytenr;
4916                 tmpl.nr = 1;
4917                 tmpl.max_size = max_size;
4918
4919                 add_extent_rec_nolookup(extent_cache, &tmpl);
4920
4921                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4922                 if (!cache)
4923                         abort();
4924         }
4925
4926         rec = container_of(cache, struct extent_record, cache);
4927         if (rec->max_size < max_size)
4928                 rec->max_size = max_size;
4929
4930         /*
4931          * If found_ref is set then max_size is the real size and must match the
4932          * existing refs.  So if we have already found a ref then we need to
4933          * make sure that this ref matches the existing one, otherwise we need
4934          * to add a new backref so we can notice that the backrefs don't match
4935          * and we need to figure out who is telling the truth.  This is to
4936          * account for that awful fsync bug I introduced where we'd end up with
4937          * a btrfs_file_extent_item that would have its length include multiple
4938          * prealloc extents or point inside of a prealloc extent.
4939          */
4940         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4941                                  bytenr, max_size);
4942         if (!back) {
4943                 back = alloc_data_backref(rec, parent, root, owner, offset,
4944                                           max_size);
4945                 BUG_ON(!back);
4946         }
4947
4948         if (found_ref) {
4949                 BUG_ON(num_refs != 1);
4950                 if (back->node.found_ref)
4951                         BUG_ON(back->bytes != max_size);
4952                 back->node.found_ref = 1;
4953                 back->found_ref += 1;
4954                 back->bytes = max_size;
4955                 back->disk_bytenr = bytenr;
4956                 rec->refs += 1;
4957                 rec->content_checked = 1;
4958                 rec->owner_ref_checked = 1;
4959         } else {
4960                 if (back->node.found_extent_tree) {
4961                         fprintf(stderr, "Extent back ref already exists "
4962                                 "for %llu parent %llu root %llu "
4963                                 "owner %llu offset %llu num_refs %lu\n",
4964                                 (unsigned long long)bytenr,
4965                                 (unsigned long long)parent,
4966                                 (unsigned long long)root,
4967                                 (unsigned long long)owner,
4968                                 (unsigned long long)offset,
4969                                 (unsigned long)num_refs);
4970                 }
4971                 back->num_refs = num_refs;
4972                 back->node.found_extent_tree = 1;
4973         }
4974         maybe_free_extent_rec(extent_cache, rec);
4975         return 0;
4976 }
4977
4978 static int add_pending(struct cache_tree *pending,
4979                        struct cache_tree *seen, u64 bytenr, u32 size)
4980 {
4981         int ret;
4982         ret = add_cache_extent(seen, bytenr, size);
4983         if (ret)
4984                 return ret;
4985         add_cache_extent(pending, bytenr, size);
4986         return 0;
4987 }
4988
4989 static int pick_next_pending(struct cache_tree *pending,
4990                         struct cache_tree *reada,
4991                         struct cache_tree *nodes,
4992                         u64 last, struct block_info *bits, int bits_nr,
4993                         int *reada_bits)
4994 {
4995         unsigned long node_start = last;
4996         struct cache_extent *cache;
4997         int ret;
4998
4999         cache = search_cache_extent(reada, 0);
5000         if (cache) {
5001                 bits[0].start = cache->start;
5002                 bits[0].size = cache->size;
5003                 *reada_bits = 1;
5004                 return 1;
5005         }
5006         *reada_bits = 0;
5007         if (node_start > 32768)
5008                 node_start -= 32768;
5009
5010         cache = search_cache_extent(nodes, node_start);
5011         if (!cache)
5012                 cache = search_cache_extent(nodes, 0);
5013
5014         if (!cache) {
5015                  cache = search_cache_extent(pending, 0);
5016                  if (!cache)
5017                          return 0;
5018                  ret = 0;
5019                  do {
5020                          bits[ret].start = cache->start;
5021                          bits[ret].size = cache->size;
5022                          cache = next_cache_extent(cache);
5023                          ret++;
5024                  } while (cache && ret < bits_nr);
5025                  return ret;
5026         }
5027
5028         ret = 0;
5029         do {
5030                 bits[ret].start = cache->start;
5031                 bits[ret].size = cache->size;
5032                 cache = next_cache_extent(cache);
5033                 ret++;
5034         } while (cache && ret < bits_nr);
5035
5036         if (bits_nr - ret > 8) {
5037                 u64 lookup = bits[0].start + bits[0].size;
5038                 struct cache_extent *next;
5039                 next = search_cache_extent(pending, lookup);
5040                 while(next) {
5041                         if (next->start - lookup > 32768)
5042                                 break;
5043                         bits[ret].start = next->start;
5044                         bits[ret].size = next->size;
5045                         lookup = next->start + next->size;
5046                         ret++;
5047                         if (ret == bits_nr)
5048                                 break;
5049                         next = next_cache_extent(next);
5050                         if (!next)
5051                                 break;
5052                 }
5053         }
5054         return ret;
5055 }
5056
5057 static void free_chunk_record(struct cache_extent *cache)
5058 {
5059         struct chunk_record *rec;
5060
5061         rec = container_of(cache, struct chunk_record, cache);
5062         list_del_init(&rec->list);
5063         list_del_init(&rec->dextents);
5064         free(rec);
5065 }
5066
5067 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5068 {
5069         cache_tree_free_extents(chunk_cache, free_chunk_record);
5070 }
5071
5072 static void free_device_record(struct rb_node *node)
5073 {
5074         struct device_record *rec;
5075
5076         rec = container_of(node, struct device_record, node);
5077         free(rec);
5078 }
5079
5080 FREE_RB_BASED_TREE(device_cache, free_device_record);
5081
5082 int insert_block_group_record(struct block_group_tree *tree,
5083                               struct block_group_record *bg_rec)
5084 {
5085         int ret;
5086
5087         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5088         if (ret)
5089                 return ret;
5090
5091         list_add_tail(&bg_rec->list, &tree->block_groups);
5092         return 0;
5093 }
5094
5095 static void free_block_group_record(struct cache_extent *cache)
5096 {
5097         struct block_group_record *rec;
5098
5099         rec = container_of(cache, struct block_group_record, cache);
5100         list_del_init(&rec->list);
5101         free(rec);
5102 }
5103
5104 void free_block_group_tree(struct block_group_tree *tree)
5105 {
5106         cache_tree_free_extents(&tree->tree, free_block_group_record);
5107 }
5108
5109 int insert_device_extent_record(struct device_extent_tree *tree,
5110                                 struct device_extent_record *de_rec)
5111 {
5112         int ret;
5113
5114         /*
5115          * Device extent is a bit different from the other extents, because
5116          * the extents which belong to the different devices may have the
5117          * same start and size, so we need use the special extent cache
5118          * search/insert functions.
5119          */
5120         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5121         if (ret)
5122                 return ret;
5123
5124         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5125         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5126         return 0;
5127 }
5128
5129 static void free_device_extent_record(struct cache_extent *cache)
5130 {
5131         struct device_extent_record *rec;
5132
5133         rec = container_of(cache, struct device_extent_record, cache);
5134         if (!list_empty(&rec->chunk_list))
5135                 list_del_init(&rec->chunk_list);
5136         if (!list_empty(&rec->device_list))
5137                 list_del_init(&rec->device_list);
5138         free(rec);
5139 }
5140
5141 void free_device_extent_tree(struct device_extent_tree *tree)
5142 {
5143         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5144 }
5145
5146 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5147 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5148                                  struct extent_buffer *leaf, int slot)
5149 {
5150         struct btrfs_extent_ref_v0 *ref0;
5151         struct btrfs_key key;
5152
5153         btrfs_item_key_to_cpu(leaf, &key, slot);
5154         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5155         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5156                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5157         } else {
5158                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5159                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5160         }
5161         return 0;
5162 }
5163 #endif
5164
5165 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5166                                             struct btrfs_key *key,
5167                                             int slot)
5168 {
5169         struct btrfs_chunk *ptr;
5170         struct chunk_record *rec;
5171         int num_stripes, i;
5172
5173         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5174         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5175
5176         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5177         if (!rec) {
5178                 fprintf(stderr, "memory allocation failed\n");
5179                 exit(-1);
5180         }
5181
5182         INIT_LIST_HEAD(&rec->list);
5183         INIT_LIST_HEAD(&rec->dextents);
5184         rec->bg_rec = NULL;
5185
5186         rec->cache.start = key->offset;
5187         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5188
5189         rec->generation = btrfs_header_generation(leaf);
5190
5191         rec->objectid = key->objectid;
5192         rec->type = key->type;
5193         rec->offset = key->offset;
5194
5195         rec->length = rec->cache.size;
5196         rec->owner = btrfs_chunk_owner(leaf, ptr);
5197         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5198         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5199         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5200         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5201         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5202         rec->num_stripes = num_stripes;
5203         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5204
5205         for (i = 0; i < rec->num_stripes; ++i) {
5206                 rec->stripes[i].devid =
5207                         btrfs_stripe_devid_nr(leaf, ptr, i);
5208                 rec->stripes[i].offset =
5209                         btrfs_stripe_offset_nr(leaf, ptr, i);
5210                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5211                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5212                                 BTRFS_UUID_SIZE);
5213         }
5214
5215         return rec;
5216 }
5217
5218 static int process_chunk_item(struct cache_tree *chunk_cache,
5219                               struct btrfs_key *key, struct extent_buffer *eb,
5220                               int slot)
5221 {
5222         struct chunk_record *rec;
5223         int ret = 0;
5224
5225         rec = btrfs_new_chunk_record(eb, key, slot);
5226         ret = insert_cache_extent(chunk_cache, &rec->cache);
5227         if (ret) {
5228                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5229                         rec->offset, rec->length);
5230                 free(rec);
5231         }
5232
5233         return ret;
5234 }
5235
5236 static int process_device_item(struct rb_root *dev_cache,
5237                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5238 {
5239         struct btrfs_dev_item *ptr;
5240         struct device_record *rec;
5241         int ret = 0;
5242
5243         ptr = btrfs_item_ptr(eb,
5244                 slot, struct btrfs_dev_item);
5245
5246         rec = malloc(sizeof(*rec));
5247         if (!rec) {
5248                 fprintf(stderr, "memory allocation failed\n");
5249                 return -ENOMEM;
5250         }
5251
5252         rec->devid = key->offset;
5253         rec->generation = btrfs_header_generation(eb);
5254
5255         rec->objectid = key->objectid;
5256         rec->type = key->type;
5257         rec->offset = key->offset;
5258
5259         rec->devid = btrfs_device_id(eb, ptr);
5260         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5261         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5262
5263         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5264         if (ret) {
5265                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5266                 free(rec);
5267         }
5268
5269         return ret;
5270 }
5271
5272 struct block_group_record *
5273 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5274                              int slot)
5275 {
5276         struct btrfs_block_group_item *ptr;
5277         struct block_group_record *rec;
5278
5279         rec = calloc(1, sizeof(*rec));
5280         if (!rec) {
5281                 fprintf(stderr, "memory allocation failed\n");
5282                 exit(-1);
5283         }
5284
5285         rec->cache.start = key->objectid;
5286         rec->cache.size = key->offset;
5287
5288         rec->generation = btrfs_header_generation(leaf);
5289
5290         rec->objectid = key->objectid;
5291         rec->type = key->type;
5292         rec->offset = key->offset;
5293
5294         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5295         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5296
5297         INIT_LIST_HEAD(&rec->list);
5298
5299         return rec;
5300 }
5301
5302 static int process_block_group_item(struct block_group_tree *block_group_cache,
5303                                     struct btrfs_key *key,
5304                                     struct extent_buffer *eb, int slot)
5305 {
5306         struct block_group_record *rec;
5307         int ret = 0;
5308
5309         rec = btrfs_new_block_group_record(eb, key, slot);
5310         ret = insert_block_group_record(block_group_cache, rec);
5311         if (ret) {
5312                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5313                         rec->objectid, rec->offset);
5314                 free(rec);
5315         }
5316
5317         return ret;
5318 }
5319
5320 struct device_extent_record *
5321 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5322                                struct btrfs_key *key, int slot)
5323 {
5324         struct device_extent_record *rec;
5325         struct btrfs_dev_extent *ptr;
5326
5327         rec = calloc(1, sizeof(*rec));
5328         if (!rec) {
5329                 fprintf(stderr, "memory allocation failed\n");
5330                 exit(-1);
5331         }
5332
5333         rec->cache.objectid = key->objectid;
5334         rec->cache.start = key->offset;
5335
5336         rec->generation = btrfs_header_generation(leaf);
5337
5338         rec->objectid = key->objectid;
5339         rec->type = key->type;
5340         rec->offset = key->offset;
5341
5342         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5343         rec->chunk_objecteid =
5344                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5345         rec->chunk_offset =
5346                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5347         rec->length = btrfs_dev_extent_length(leaf, ptr);
5348         rec->cache.size = rec->length;
5349
5350         INIT_LIST_HEAD(&rec->chunk_list);
5351         INIT_LIST_HEAD(&rec->device_list);
5352
5353         return rec;
5354 }
5355
5356 static int
5357 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5358                            struct btrfs_key *key, struct extent_buffer *eb,
5359                            int slot)
5360 {
5361         struct device_extent_record *rec;
5362         int ret;
5363
5364         rec = btrfs_new_device_extent_record(eb, key, slot);
5365         ret = insert_device_extent_record(dev_extent_cache, rec);
5366         if (ret) {
5367                 fprintf(stderr,
5368                         "Device extent[%llu, %llu, %llu] existed.\n",
5369                         rec->objectid, rec->offset, rec->length);
5370                 free(rec);
5371         }
5372
5373         return ret;
5374 }
5375
5376 static int process_extent_item(struct btrfs_root *root,
5377                                struct cache_tree *extent_cache,
5378                                struct extent_buffer *eb, int slot)
5379 {
5380         struct btrfs_extent_item *ei;
5381         struct btrfs_extent_inline_ref *iref;
5382         struct btrfs_extent_data_ref *dref;
5383         struct btrfs_shared_data_ref *sref;
5384         struct btrfs_key key;
5385         struct extent_record tmpl;
5386         unsigned long end;
5387         unsigned long ptr;
5388         int type;
5389         u32 item_size = btrfs_item_size_nr(eb, slot);
5390         u64 refs = 0;
5391         u64 offset;
5392         u64 num_bytes;
5393         int metadata = 0;
5394
5395         btrfs_item_key_to_cpu(eb, &key, slot);
5396
5397         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5398                 metadata = 1;
5399                 num_bytes = root->nodesize;
5400         } else {
5401                 num_bytes = key.offset;
5402         }
5403
5404         if (item_size < sizeof(*ei)) {
5405 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5406                 struct btrfs_extent_item_v0 *ei0;
5407                 BUG_ON(item_size != sizeof(*ei0));
5408                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5409                 refs = btrfs_extent_refs_v0(eb, ei0);
5410 #else
5411                 BUG();
5412 #endif
5413                 memset(&tmpl, 0, sizeof(tmpl));
5414                 tmpl.start = key.objectid;
5415                 tmpl.nr = num_bytes;
5416                 tmpl.extent_item_refs = refs;
5417                 tmpl.metadata = metadata;
5418                 tmpl.found_rec = 1;
5419                 tmpl.max_size = num_bytes;
5420
5421                 return add_extent_rec(extent_cache, &tmpl);
5422         }
5423
5424         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5425         refs = btrfs_extent_refs(eb, ei);
5426         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5427                 metadata = 1;
5428         else
5429                 metadata = 0;
5430
5431         memset(&tmpl, 0, sizeof(tmpl));
5432         tmpl.start = key.objectid;
5433         tmpl.nr = num_bytes;
5434         tmpl.extent_item_refs = refs;
5435         tmpl.metadata = metadata;
5436         tmpl.found_rec = 1;
5437         tmpl.max_size = num_bytes;
5438         add_extent_rec(extent_cache, &tmpl);
5439
5440         ptr = (unsigned long)(ei + 1);
5441         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5442             key.type == BTRFS_EXTENT_ITEM_KEY)
5443                 ptr += sizeof(struct btrfs_tree_block_info);
5444
5445         end = (unsigned long)ei + item_size;
5446         while (ptr < end) {
5447                 iref = (struct btrfs_extent_inline_ref *)ptr;
5448                 type = btrfs_extent_inline_ref_type(eb, iref);
5449                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5450                 switch (type) {
5451                 case BTRFS_TREE_BLOCK_REF_KEY:
5452                         add_tree_backref(extent_cache, key.objectid,
5453                                          0, offset, 0);
5454                         break;
5455                 case BTRFS_SHARED_BLOCK_REF_KEY:
5456                         add_tree_backref(extent_cache, key.objectid,
5457                                          offset, 0, 0);
5458                         break;
5459                 case BTRFS_EXTENT_DATA_REF_KEY:
5460                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5461                         add_data_backref(extent_cache, key.objectid, 0,
5462                                         btrfs_extent_data_ref_root(eb, dref),
5463                                         btrfs_extent_data_ref_objectid(eb,
5464                                                                        dref),
5465                                         btrfs_extent_data_ref_offset(eb, dref),
5466                                         btrfs_extent_data_ref_count(eb, dref),
5467                                         0, num_bytes);
5468                         break;
5469                 case BTRFS_SHARED_DATA_REF_KEY:
5470                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5471                         add_data_backref(extent_cache, key.objectid, offset,
5472                                         0, 0, 0,
5473                                         btrfs_shared_data_ref_count(eb, sref),
5474                                         0, num_bytes);
5475                         break;
5476                 default:
5477                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5478                                 key.objectid, key.type, num_bytes);
5479                         goto out;
5480                 }
5481                 ptr += btrfs_extent_inline_ref_size(type);
5482         }
5483         WARN_ON(ptr > end);
5484 out:
5485         return 0;
5486 }
5487
5488 static int check_cache_range(struct btrfs_root *root,
5489                              struct btrfs_block_group_cache *cache,
5490                              u64 offset, u64 bytes)
5491 {
5492         struct btrfs_free_space *entry;
5493         u64 *logical;
5494         u64 bytenr;
5495         int stripe_len;
5496         int i, nr, ret;
5497
5498         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5499                 bytenr = btrfs_sb_offset(i);
5500                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5501                                        cache->key.objectid, bytenr, 0,
5502                                        &logical, &nr, &stripe_len);
5503                 if (ret)
5504                         return ret;
5505
5506                 while (nr--) {
5507                         if (logical[nr] + stripe_len <= offset)
5508                                 continue;
5509                         if (offset + bytes <= logical[nr])
5510                                 continue;
5511                         if (logical[nr] == offset) {
5512                                 if (stripe_len >= bytes) {
5513                                         kfree(logical);
5514                                         return 0;
5515                                 }
5516                                 bytes -= stripe_len;
5517                                 offset += stripe_len;
5518                         } else if (logical[nr] < offset) {
5519                                 if (logical[nr] + stripe_len >=
5520                                     offset + bytes) {
5521                                         kfree(logical);
5522                                         return 0;
5523                                 }
5524                                 bytes = (offset + bytes) -
5525                                         (logical[nr] + stripe_len);
5526                                 offset = logical[nr] + stripe_len;
5527                         } else {
5528                                 /*
5529                                  * Could be tricky, the super may land in the
5530                                  * middle of the area we're checking.  First
5531                                  * check the easiest case, it's at the end.
5532                                  */
5533                                 if (logical[nr] + stripe_len >=
5534                                     bytes + offset) {
5535                                         bytes = logical[nr] - offset;
5536                                         continue;
5537                                 }
5538
5539                                 /* Check the left side */
5540                                 ret = check_cache_range(root, cache,
5541                                                         offset,
5542                                                         logical[nr] - offset);
5543                                 if (ret) {
5544                                         kfree(logical);
5545                                         return ret;
5546                                 }
5547
5548                                 /* Now we continue with the right side */
5549                                 bytes = (offset + bytes) -
5550                                         (logical[nr] + stripe_len);
5551                                 offset = logical[nr] + stripe_len;
5552                         }
5553                 }
5554
5555                 kfree(logical);
5556         }
5557
5558         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5559         if (!entry) {
5560                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5561                         offset, offset+bytes);
5562                 return -EINVAL;
5563         }
5564
5565         if (entry->offset != offset) {
5566                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5567                         entry->offset);
5568                 return -EINVAL;
5569         }
5570
5571         if (entry->bytes != bytes) {
5572                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5573                         bytes, entry->bytes, offset);
5574                 return -EINVAL;
5575         }
5576
5577         unlink_free_space(cache->free_space_ctl, entry);
5578         free(entry);
5579         return 0;
5580 }
5581
5582 static int verify_space_cache(struct btrfs_root *root,
5583                               struct btrfs_block_group_cache *cache)
5584 {
5585         struct btrfs_path *path;
5586         struct extent_buffer *leaf;
5587         struct btrfs_key key;
5588         u64 last;
5589         int ret = 0;
5590
5591         path = btrfs_alloc_path();
5592         if (!path)
5593                 return -ENOMEM;
5594
5595         root = root->fs_info->extent_root;
5596
5597         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5598
5599         key.objectid = last;
5600         key.offset = 0;
5601         key.type = BTRFS_EXTENT_ITEM_KEY;
5602
5603         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5604         if (ret < 0)
5605                 goto out;
5606         ret = 0;
5607         while (1) {
5608                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5609                         ret = btrfs_next_leaf(root, path);
5610                         if (ret < 0)
5611                                 goto out;
5612                         if (ret > 0) {
5613                                 ret = 0;
5614                                 break;
5615                         }
5616                 }
5617                 leaf = path->nodes[0];
5618                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5619                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5620                         break;
5621                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5622                     key.type != BTRFS_METADATA_ITEM_KEY) {
5623                         path->slots[0]++;
5624                         continue;
5625                 }
5626
5627                 if (last == key.objectid) {
5628                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5629                                 last = key.objectid + key.offset;
5630                         else
5631                                 last = key.objectid + root->nodesize;
5632                         path->slots[0]++;
5633                         continue;
5634                 }
5635
5636                 ret = check_cache_range(root, cache, last,
5637                                         key.objectid - last);
5638                 if (ret)
5639                         break;
5640                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5641                         last = key.objectid + key.offset;
5642                 else
5643                         last = key.objectid + root->nodesize;
5644                 path->slots[0]++;
5645         }
5646
5647         if (last < cache->key.objectid + cache->key.offset)
5648                 ret = check_cache_range(root, cache, last,
5649                                         cache->key.objectid +
5650                                         cache->key.offset - last);
5651
5652 out:
5653         btrfs_free_path(path);
5654
5655         if (!ret &&
5656             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5657                 fprintf(stderr, "There are still entries left in the space "
5658                         "cache\n");
5659                 ret = -EINVAL;
5660         }
5661
5662         return ret;
5663 }
5664
5665 static int check_space_cache(struct btrfs_root *root)
5666 {
5667         struct btrfs_block_group_cache *cache;
5668         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5669         int ret;
5670         int error = 0;
5671
5672         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5673             btrfs_super_generation(root->fs_info->super_copy) !=
5674             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5675                 printf("cache and super generation don't match, space cache "
5676                        "will be invalidated\n");
5677                 return 0;
5678         }
5679
5680         if (ctx.progress_enabled) {
5681                 ctx.tp = TASK_FREE_SPACE;
5682                 task_start(ctx.info);
5683         }
5684
5685         while (1) {
5686                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5687                 if (!cache)
5688                         break;
5689
5690                 start = cache->key.objectid + cache->key.offset;
5691                 if (!cache->free_space_ctl) {
5692                         if (btrfs_init_free_space_ctl(cache,
5693                                                       root->sectorsize)) {
5694                                 ret = -ENOMEM;
5695                                 break;
5696                         }
5697                 } else {
5698                         btrfs_remove_free_space_cache(cache);
5699                 }
5700
5701                 if (btrfs_fs_compat_ro(root->fs_info,
5702                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5703                         ret = exclude_super_stripes(root, cache);
5704                         if (ret) {
5705                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5706                                         strerror(-ret));
5707                                 error++;
5708                                 continue;
5709                         }
5710                         ret = load_free_space_tree(root->fs_info, cache);
5711                         free_excluded_extents(root, cache);
5712                         if (ret < 0) {
5713                                 fprintf(stderr, "could not load free space tree: %s\n",
5714                                         strerror(-ret));
5715                                 error++;
5716                                 continue;
5717                         }
5718                         error += ret;
5719                 } else {
5720                         ret = load_free_space_cache(root->fs_info, cache);
5721                         if (!ret)
5722                                 continue;
5723                 }
5724
5725                 ret = verify_space_cache(root, cache);
5726                 if (ret) {
5727                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5728                                 cache->key.objectid);
5729                         error++;
5730                 }
5731         }
5732
5733         task_stop(ctx.info);
5734
5735         return error ? -EINVAL : 0;
5736 }
5737
5738 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5739                         u64 num_bytes, unsigned long leaf_offset,
5740                         struct extent_buffer *eb) {
5741
5742         u64 offset = 0;
5743         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5744         char *data;
5745         unsigned long csum_offset;
5746         u32 csum;
5747         u32 csum_expected;
5748         u64 read_len;
5749         u64 data_checked = 0;
5750         u64 tmp;
5751         int ret = 0;
5752         int mirror;
5753         int num_copies;
5754
5755         if (num_bytes % root->sectorsize)
5756                 return -EINVAL;
5757
5758         data = malloc(num_bytes);
5759         if (!data)
5760                 return -ENOMEM;
5761
5762         while (offset < num_bytes) {
5763                 mirror = 0;
5764 again:
5765                 read_len = num_bytes - offset;
5766                 /* read as much space once a time */
5767                 ret = read_extent_data(root, data + offset,
5768                                 bytenr + offset, &read_len, mirror);
5769                 if (ret)
5770                         goto out;
5771                 data_checked = 0;
5772                 /* verify every 4k data's checksum */
5773                 while (data_checked < read_len) {
5774                         csum = ~(u32)0;
5775                         tmp = offset + data_checked;
5776
5777                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5778                                                csum, root->sectorsize);
5779                         btrfs_csum_final(csum, (char *)&csum);
5780
5781                         csum_offset = leaf_offset +
5782                                  tmp / root->sectorsize * csum_size;
5783                         read_extent_buffer(eb, (char *)&csum_expected,
5784                                            csum_offset, csum_size);
5785                         /* try another mirror */
5786                         if (csum != csum_expected) {
5787                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5788                                                 mirror, bytenr + tmp,
5789                                                 csum, csum_expected);
5790                                 num_copies = btrfs_num_copies(
5791                                                 &root->fs_info->mapping_tree,
5792                                                 bytenr, num_bytes);
5793                                 if (mirror < num_copies - 1) {
5794                                         mirror += 1;
5795                                         goto again;
5796                                 }
5797                         }
5798                         data_checked += root->sectorsize;
5799                 }
5800                 offset += read_len;
5801         }
5802 out:
5803         free(data);
5804         return ret;
5805 }
5806
5807 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5808                                u64 num_bytes)
5809 {
5810         struct btrfs_path *path;
5811         struct extent_buffer *leaf;
5812         struct btrfs_key key;
5813         int ret;
5814
5815         path = btrfs_alloc_path();
5816         if (!path) {
5817                 fprintf(stderr, "Error allocating path\n");
5818                 return -ENOMEM;
5819         }
5820
5821         key.objectid = bytenr;
5822         key.type = BTRFS_EXTENT_ITEM_KEY;
5823         key.offset = (u64)-1;
5824
5825 again:
5826         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5827                                 0, 0);
5828         if (ret < 0) {
5829                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5830                 btrfs_free_path(path);
5831                 return ret;
5832         } else if (ret) {
5833                 if (path->slots[0] > 0) {
5834                         path->slots[0]--;
5835                 } else {
5836                         ret = btrfs_prev_leaf(root, path);
5837                         if (ret < 0) {
5838                                 goto out;
5839                         } else if (ret > 0) {
5840                                 ret = 0;
5841                                 goto out;
5842                         }
5843                 }
5844         }
5845
5846         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5847
5848         /*
5849          * Block group items come before extent items if they have the same
5850          * bytenr, so walk back one more just in case.  Dear future traveller,
5851          * first congrats on mastering time travel.  Now if it's not too much
5852          * trouble could you go back to 2006 and tell Chris to make the
5853          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5854          * EXTENT_ITEM_KEY please?
5855          */
5856         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5857                 if (path->slots[0] > 0) {
5858                         path->slots[0]--;
5859                 } else {
5860                         ret = btrfs_prev_leaf(root, path);
5861                         if (ret < 0) {
5862                                 goto out;
5863                         } else if (ret > 0) {
5864                                 ret = 0;
5865                                 goto out;
5866                         }
5867                 }
5868                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5869         }
5870
5871         while (num_bytes) {
5872                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5873                         ret = btrfs_next_leaf(root, path);
5874                         if (ret < 0) {
5875                                 fprintf(stderr, "Error going to next leaf "
5876                                         "%d\n", ret);
5877                                 btrfs_free_path(path);
5878                                 return ret;
5879                         } else if (ret) {
5880                                 break;
5881                         }
5882                 }
5883                 leaf = path->nodes[0];
5884                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5885                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5886                         path->slots[0]++;
5887                         continue;
5888                 }
5889                 if (key.objectid + key.offset < bytenr) {
5890                         path->slots[0]++;
5891                         continue;
5892                 }
5893                 if (key.objectid > bytenr + num_bytes)
5894                         break;
5895
5896                 if (key.objectid == bytenr) {
5897                         if (key.offset >= num_bytes) {
5898                                 num_bytes = 0;
5899                                 break;
5900                         }
5901                         num_bytes -= key.offset;
5902                         bytenr += key.offset;
5903                 } else if (key.objectid < bytenr) {
5904                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5905                                 num_bytes = 0;
5906                                 break;
5907                         }
5908                         num_bytes = (bytenr + num_bytes) -
5909                                 (key.objectid + key.offset);
5910                         bytenr = key.objectid + key.offset;
5911                 } else {
5912                         if (key.objectid + key.offset < bytenr + num_bytes) {
5913                                 u64 new_start = key.objectid + key.offset;
5914                                 u64 new_bytes = bytenr + num_bytes - new_start;
5915
5916                                 /*
5917                                  * Weird case, the extent is in the middle of
5918                                  * our range, we'll have to search one side
5919                                  * and then the other.  Not sure if this happens
5920                                  * in real life, but no harm in coding it up
5921                                  * anyway just in case.
5922                                  */
5923                                 btrfs_release_path(path);
5924                                 ret = check_extent_exists(root, new_start,
5925                                                           new_bytes);
5926                                 if (ret) {
5927                                         fprintf(stderr, "Right section didn't "
5928                                                 "have a record\n");
5929                                         break;
5930                                 }
5931                                 num_bytes = key.objectid - bytenr;
5932                                 goto again;
5933                         }
5934                         num_bytes = key.objectid - bytenr;
5935                 }
5936                 path->slots[0]++;
5937         }
5938         ret = 0;
5939
5940 out:
5941         if (num_bytes && !ret) {
5942                 fprintf(stderr, "There are no extents for csum range "
5943                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5944                 ret = 1;
5945         }
5946
5947         btrfs_free_path(path);
5948         return ret;
5949 }
5950
5951 static int check_csums(struct btrfs_root *root)
5952 {
5953         struct btrfs_path *path;
5954         struct extent_buffer *leaf;
5955         struct btrfs_key key;
5956         u64 offset = 0, num_bytes = 0;
5957         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5958         int errors = 0;
5959         int ret;
5960         u64 data_len;
5961         unsigned long leaf_offset;
5962
5963         root = root->fs_info->csum_root;
5964         if (!extent_buffer_uptodate(root->node)) {
5965                 fprintf(stderr, "No valid csum tree found\n");
5966                 return -ENOENT;
5967         }
5968
5969         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5970         key.type = BTRFS_EXTENT_CSUM_KEY;
5971         key.offset = 0;
5972
5973         path = btrfs_alloc_path();
5974         if (!path)
5975                 return -ENOMEM;
5976
5977         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5978         if (ret < 0) {
5979                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5980                 btrfs_free_path(path);
5981                 return ret;
5982         }
5983
5984         if (ret > 0 && path->slots[0])
5985                 path->slots[0]--;
5986         ret = 0;
5987
5988         while (1) {
5989                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5990                         ret = btrfs_next_leaf(root, path);
5991                         if (ret < 0) {
5992                                 fprintf(stderr, "Error going to next leaf "
5993                                         "%d\n", ret);
5994                                 break;
5995                         }
5996                         if (ret)
5997                                 break;
5998                 }
5999                 leaf = path->nodes[0];
6000
6001                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6002                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
6003                         path->slots[0]++;
6004                         continue;
6005                 }
6006
6007                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
6008                               csum_size) * root->sectorsize;
6009                 if (!check_data_csum)
6010                         goto skip_csum_check;
6011                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
6012                 ret = check_extent_csums(root, key.offset, data_len,
6013                                          leaf_offset, leaf);
6014                 if (ret)
6015                         break;
6016 skip_csum_check:
6017                 if (!num_bytes) {
6018                         offset = key.offset;
6019                 } else if (key.offset != offset + num_bytes) {
6020                         ret = check_extent_exists(root, offset, num_bytes);
6021                         if (ret) {
6022                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6023                                         "there is no extent record\n",
6024                                         offset, offset+num_bytes);
6025                                 errors++;
6026                         }
6027                         offset = key.offset;
6028                         num_bytes = 0;
6029                 }
6030                 num_bytes += data_len;
6031                 path->slots[0]++;
6032         }
6033
6034         btrfs_free_path(path);
6035         return errors;
6036 }
6037
6038 static int is_dropped_key(struct btrfs_key *key,
6039                           struct btrfs_key *drop_key) {
6040         if (key->objectid < drop_key->objectid)
6041                 return 1;
6042         else if (key->objectid == drop_key->objectid) {
6043                 if (key->type < drop_key->type)
6044                         return 1;
6045                 else if (key->type == drop_key->type) {
6046                         if (key->offset < drop_key->offset)
6047                                 return 1;
6048                 }
6049         }
6050         return 0;
6051 }
6052
6053 /*
6054  * Here are the rules for FULL_BACKREF.
6055  *
6056  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6057  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6058  *      FULL_BACKREF set.
6059  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6060  *    if it happened after the relocation occurred since we'll have dropped the
6061  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6062  *    have no real way to know for sure.
6063  *
6064  * We process the blocks one root at a time, and we start from the lowest root
6065  * objectid and go to the highest.  So we can just lookup the owner backref for
6066  * the record and if we don't find it then we know it doesn't exist and we have
6067  * a FULL BACKREF.
6068  *
6069  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6070  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6071  * be set or not and then we can check later once we've gathered all the refs.
6072  */
6073 static int calc_extent_flag(struct btrfs_root *root,
6074                            struct cache_tree *extent_cache,
6075                            struct extent_buffer *buf,
6076                            struct root_item_record *ri,
6077                            u64 *flags)
6078 {
6079         struct extent_record *rec;
6080         struct cache_extent *cache;
6081         struct tree_backref *tback;
6082         u64 owner = 0;
6083
6084         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6085         /* we have added this extent before */
6086         BUG_ON(!cache);
6087         rec = container_of(cache, struct extent_record, cache);
6088
6089         /*
6090          * Except file/reloc tree, we can not have
6091          * FULL BACKREF MODE
6092          */
6093         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6094                 goto normal;
6095         /*
6096          * root node
6097          */
6098         if (buf->start == ri->bytenr)
6099                 goto normal;
6100
6101         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6102                 goto full_backref;
6103
6104         owner = btrfs_header_owner(buf);
6105         if (owner == ri->objectid)
6106                 goto normal;
6107
6108         tback = find_tree_backref(rec, 0, owner);
6109         if (!tback)
6110                 goto full_backref;
6111 normal:
6112         *flags = 0;
6113         if (rec->flag_block_full_backref != FLAG_UNSET &&
6114             rec->flag_block_full_backref != 0)
6115                 rec->bad_full_backref = 1;
6116         return 0;
6117 full_backref:
6118         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6119         if (rec->flag_block_full_backref != FLAG_UNSET &&
6120             rec->flag_block_full_backref != 1)
6121                 rec->bad_full_backref = 1;
6122         return 0;
6123 }
6124
6125 static int run_next_block(struct btrfs_root *root,
6126                           struct block_info *bits,
6127                           int bits_nr,
6128                           u64 *last,
6129                           struct cache_tree *pending,
6130                           struct cache_tree *seen,
6131                           struct cache_tree *reada,
6132                           struct cache_tree *nodes,
6133                           struct cache_tree *extent_cache,
6134                           struct cache_tree *chunk_cache,
6135                           struct rb_root *dev_cache,
6136                           struct block_group_tree *block_group_cache,
6137                           struct device_extent_tree *dev_extent_cache,
6138                           struct root_item_record *ri)
6139 {
6140         struct extent_buffer *buf;
6141         struct extent_record *rec = NULL;
6142         u64 bytenr;
6143         u32 size;
6144         u64 parent;
6145         u64 owner;
6146         u64 flags;
6147         u64 ptr;
6148         u64 gen = 0;
6149         int ret = 0;
6150         int i;
6151         int nritems;
6152         struct btrfs_key key;
6153         struct cache_extent *cache;
6154         int reada_bits;
6155
6156         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6157                                     bits_nr, &reada_bits);
6158         if (nritems == 0)
6159                 return 1;
6160
6161         if (!reada_bits) {
6162                 for(i = 0; i < nritems; i++) {
6163                         ret = add_cache_extent(reada, bits[i].start,
6164                                                bits[i].size);
6165                         if (ret == -EEXIST)
6166                                 continue;
6167
6168                         /* fixme, get the parent transid */
6169                         readahead_tree_block(root, bits[i].start,
6170                                              bits[i].size, 0);
6171                 }
6172         }
6173         *last = bits[0].start;
6174         bytenr = bits[0].start;
6175         size = bits[0].size;
6176
6177         cache = lookup_cache_extent(pending, bytenr, size);
6178         if (cache) {
6179                 remove_cache_extent(pending, cache);
6180                 free(cache);
6181         }
6182         cache = lookup_cache_extent(reada, bytenr, size);
6183         if (cache) {
6184                 remove_cache_extent(reada, cache);
6185                 free(cache);
6186         }
6187         cache = lookup_cache_extent(nodes, bytenr, size);
6188         if (cache) {
6189                 remove_cache_extent(nodes, cache);
6190                 free(cache);
6191         }
6192         cache = lookup_cache_extent(extent_cache, bytenr, size);
6193         if (cache) {
6194                 rec = container_of(cache, struct extent_record, cache);
6195                 gen = rec->parent_generation;
6196         }
6197
6198         /* fixme, get the real parent transid */
6199         buf = read_tree_block(root, bytenr, size, gen);
6200         if (!extent_buffer_uptodate(buf)) {
6201                 record_bad_block_io(root->fs_info,
6202                                     extent_cache, bytenr, size);
6203                 goto out;
6204         }
6205
6206         nritems = btrfs_header_nritems(buf);
6207
6208         flags = 0;
6209         if (!init_extent_tree) {
6210                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6211                                        btrfs_header_level(buf), 1, NULL,
6212                                        &flags);
6213                 if (ret < 0) {
6214                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6215                         if (ret < 0) {
6216                                 fprintf(stderr, "Couldn't calc extent flags\n");
6217                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6218                         }
6219                 }
6220         } else {
6221                 flags = 0;
6222                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6223                 if (ret < 0) {
6224                         fprintf(stderr, "Couldn't calc extent flags\n");
6225                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6226                 }
6227         }
6228
6229         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6230                 if (ri != NULL &&
6231                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6232                     ri->objectid == btrfs_header_owner(buf)) {
6233                         /*
6234                          * Ok we got to this block from it's original owner and
6235                          * we have FULL_BACKREF set.  Relocation can leave
6236                          * converted blocks over so this is altogether possible,
6237                          * however it's not possible if the generation > the
6238                          * last snapshot, so check for this case.
6239                          */
6240                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6241                             btrfs_header_generation(buf) > ri->last_snapshot) {
6242                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6243                                 rec->bad_full_backref = 1;
6244                         }
6245                 }
6246         } else {
6247                 if (ri != NULL &&
6248                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6249                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6250                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6251                         rec->bad_full_backref = 1;
6252                 }
6253         }
6254
6255         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6256                 rec->flag_block_full_backref = 1;
6257                 parent = bytenr;
6258                 owner = 0;
6259         } else {
6260                 rec->flag_block_full_backref = 0;
6261                 parent = 0;
6262                 owner = btrfs_header_owner(buf);
6263         }
6264
6265         ret = check_block(root, extent_cache, buf, flags);
6266         if (ret)
6267                 goto out;
6268
6269         if (btrfs_is_leaf(buf)) {
6270                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6271                 for (i = 0; i < nritems; i++) {
6272                         struct btrfs_file_extent_item *fi;
6273                         btrfs_item_key_to_cpu(buf, &key, i);
6274                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6275                                 process_extent_item(root, extent_cache, buf,
6276                                                     i);
6277                                 continue;
6278                         }
6279                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6280                                 process_extent_item(root, extent_cache, buf,
6281                                                     i);
6282                                 continue;
6283                         }
6284                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6285                                 total_csum_bytes +=
6286                                         btrfs_item_size_nr(buf, i);
6287                                 continue;
6288                         }
6289                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6290                                 process_chunk_item(chunk_cache, &key, buf, i);
6291                                 continue;
6292                         }
6293                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6294                                 process_device_item(dev_cache, &key, buf, i);
6295                                 continue;
6296                         }
6297                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6298                                 process_block_group_item(block_group_cache,
6299                                         &key, buf, i);
6300                                 continue;
6301                         }
6302                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6303                                 process_device_extent_item(dev_extent_cache,
6304                                         &key, buf, i);
6305                                 continue;
6306
6307                         }
6308                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6309 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6310                                 process_extent_ref_v0(extent_cache, buf, i);
6311 #else
6312                                 BUG();
6313 #endif
6314                                 continue;
6315                         }
6316
6317                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6318                                 add_tree_backref(extent_cache, key.objectid, 0,
6319                                                  key.offset, 0);
6320                                 continue;
6321                         }
6322                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6323                                 add_tree_backref(extent_cache, key.objectid,
6324                                                  key.offset, 0, 0);
6325                                 continue;
6326                         }
6327                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6328                                 struct btrfs_extent_data_ref *ref;
6329                                 ref = btrfs_item_ptr(buf, i,
6330                                                 struct btrfs_extent_data_ref);
6331                                 add_data_backref(extent_cache,
6332                                         key.objectid, 0,
6333                                         btrfs_extent_data_ref_root(buf, ref),
6334                                         btrfs_extent_data_ref_objectid(buf,
6335                                                                        ref),
6336                                         btrfs_extent_data_ref_offset(buf, ref),
6337                                         btrfs_extent_data_ref_count(buf, ref),
6338                                         0, root->sectorsize);
6339                                 continue;
6340                         }
6341                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6342                                 struct btrfs_shared_data_ref *ref;
6343                                 ref = btrfs_item_ptr(buf, i,
6344                                                 struct btrfs_shared_data_ref);
6345                                 add_data_backref(extent_cache,
6346                                         key.objectid, key.offset, 0, 0, 0,
6347                                         btrfs_shared_data_ref_count(buf, ref),
6348                                         0, root->sectorsize);
6349                                 continue;
6350                         }
6351                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6352                                 struct bad_item *bad;
6353
6354                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6355                                         continue;
6356                                 if (!owner)
6357                                         continue;
6358                                 bad = malloc(sizeof(struct bad_item));
6359                                 if (!bad)
6360                                         continue;
6361                                 INIT_LIST_HEAD(&bad->list);
6362                                 memcpy(&bad->key, &key,
6363                                        sizeof(struct btrfs_key));
6364                                 bad->root_id = owner;
6365                                 list_add_tail(&bad->list, &delete_items);
6366                                 continue;
6367                         }
6368                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6369                                 continue;
6370                         fi = btrfs_item_ptr(buf, i,
6371                                             struct btrfs_file_extent_item);
6372                         if (btrfs_file_extent_type(buf, fi) ==
6373                             BTRFS_FILE_EXTENT_INLINE)
6374                                 continue;
6375                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6376                                 continue;
6377
6378                         data_bytes_allocated +=
6379                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6380                         if (data_bytes_allocated < root->sectorsize) {
6381                                 abort();
6382                         }
6383                         data_bytes_referenced +=
6384                                 btrfs_file_extent_num_bytes(buf, fi);
6385                         add_data_backref(extent_cache,
6386                                 btrfs_file_extent_disk_bytenr(buf, fi),
6387                                 parent, owner, key.objectid, key.offset -
6388                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6389                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6390                 }
6391         } else {
6392                 int level;
6393                 struct btrfs_key first_key;
6394
6395                 first_key.objectid = 0;
6396
6397                 if (nritems > 0)
6398                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6399                 level = btrfs_header_level(buf);
6400                 for (i = 0; i < nritems; i++) {
6401                         struct extent_record tmpl;
6402
6403                         ptr = btrfs_node_blockptr(buf, i);
6404                         size = root->nodesize;
6405                         btrfs_node_key_to_cpu(buf, &key, i);
6406                         if (ri != NULL) {
6407                                 if ((level == ri->drop_level)
6408                                     && is_dropped_key(&key, &ri->drop_key)) {
6409                                         continue;
6410                                 }
6411                         }
6412
6413                         memset(&tmpl, 0, sizeof(tmpl));
6414                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6415                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6416                         tmpl.start = ptr;
6417                         tmpl.nr = size;
6418                         tmpl.refs = 1;
6419                         tmpl.metadata = 1;
6420                         tmpl.max_size = size;
6421                         ret = add_extent_rec(extent_cache, &tmpl);
6422                         BUG_ON(ret);
6423
6424                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6425
6426                         if (level > 1) {
6427                                 add_pending(nodes, seen, ptr, size);
6428                         } else {
6429                                 add_pending(pending, seen, ptr, size);
6430                         }
6431                 }
6432                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6433                                       nritems) * sizeof(struct btrfs_key_ptr);
6434         }
6435         total_btree_bytes += buf->len;
6436         if (fs_root_objectid(btrfs_header_owner(buf)))
6437                 total_fs_tree_bytes += buf->len;
6438         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6439                 total_extent_tree_bytes += buf->len;
6440         if (!found_old_backref &&
6441             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6442             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6443             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6444                 found_old_backref = 1;
6445 out:
6446         free_extent_buffer(buf);
6447         return ret;
6448 }
6449
6450 static int add_root_to_pending(struct extent_buffer *buf,
6451                                struct cache_tree *extent_cache,
6452                                struct cache_tree *pending,
6453                                struct cache_tree *seen,
6454                                struct cache_tree *nodes,
6455                                u64 objectid)
6456 {
6457         struct extent_record tmpl;
6458
6459         if (btrfs_header_level(buf) > 0)
6460                 add_pending(nodes, seen, buf->start, buf->len);
6461         else
6462                 add_pending(pending, seen, buf->start, buf->len);
6463
6464         memset(&tmpl, 0, sizeof(tmpl));
6465         tmpl.start = buf->start;
6466         tmpl.nr = buf->len;
6467         tmpl.is_root = 1;
6468         tmpl.refs = 1;
6469         tmpl.metadata = 1;
6470         tmpl.max_size = buf->len;
6471         add_extent_rec(extent_cache, &tmpl);
6472
6473         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6474             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6475                 add_tree_backref(extent_cache, buf->start, buf->start,
6476                                  0, 1);
6477         else
6478                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6479         return 0;
6480 }
6481
6482 /* as we fix the tree, we might be deleting blocks that
6483  * we're tracking for repair.  This hook makes sure we
6484  * remove any backrefs for blocks as we are fixing them.
6485  */
6486 static int free_extent_hook(struct btrfs_trans_handle *trans,
6487                             struct btrfs_root *root,
6488                             u64 bytenr, u64 num_bytes, u64 parent,
6489                             u64 root_objectid, u64 owner, u64 offset,
6490                             int refs_to_drop)
6491 {
6492         struct extent_record *rec;
6493         struct cache_extent *cache;
6494         int is_data;
6495         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6496
6497         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6498         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6499         if (!cache)
6500                 return 0;
6501
6502         rec = container_of(cache, struct extent_record, cache);
6503         if (is_data) {
6504                 struct data_backref *back;
6505                 back = find_data_backref(rec, parent, root_objectid, owner,
6506                                          offset, 1, bytenr, num_bytes);
6507                 if (!back)
6508                         goto out;
6509                 if (back->node.found_ref) {
6510                         back->found_ref -= refs_to_drop;
6511                         if (rec->refs)
6512                                 rec->refs -= refs_to_drop;
6513                 }
6514                 if (back->node.found_extent_tree) {
6515                         back->num_refs -= refs_to_drop;
6516                         if (rec->extent_item_refs)
6517                                 rec->extent_item_refs -= refs_to_drop;
6518                 }
6519                 if (back->found_ref == 0)
6520                         back->node.found_ref = 0;
6521                 if (back->num_refs == 0)
6522                         back->node.found_extent_tree = 0;
6523
6524                 if (!back->node.found_extent_tree && back->node.found_ref) {
6525                         rb_erase(&back->node.node, &rec->backref_tree);
6526                         free(back);
6527                 }
6528         } else {
6529                 struct tree_backref *back;
6530                 back = find_tree_backref(rec, parent, root_objectid);
6531                 if (!back)
6532                         goto out;
6533                 if (back->node.found_ref) {
6534                         if (rec->refs)
6535                                 rec->refs--;
6536                         back->node.found_ref = 0;
6537                 }
6538                 if (back->node.found_extent_tree) {
6539                         if (rec->extent_item_refs)
6540                                 rec->extent_item_refs--;
6541                         back->node.found_extent_tree = 0;
6542                 }
6543                 if (!back->node.found_extent_tree && back->node.found_ref) {
6544                         rb_erase(&back->node.node, &rec->backref_tree);
6545                         free(back);
6546                 }
6547         }
6548         maybe_free_extent_rec(extent_cache, rec);
6549 out:
6550         return 0;
6551 }
6552
6553 static int delete_extent_records(struct btrfs_trans_handle *trans,
6554                                  struct btrfs_root *root,
6555                                  struct btrfs_path *path,
6556                                  u64 bytenr, u64 new_len)
6557 {
6558         struct btrfs_key key;
6559         struct btrfs_key found_key;
6560         struct extent_buffer *leaf;
6561         int ret;
6562         int slot;
6563
6564
6565         key.objectid = bytenr;
6566         key.type = (u8)-1;
6567         key.offset = (u64)-1;
6568
6569         while(1) {
6570                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6571                                         &key, path, 0, 1);
6572                 if (ret < 0)
6573                         break;
6574
6575                 if (ret > 0) {
6576                         ret = 0;
6577                         if (path->slots[0] == 0)
6578                                 break;
6579                         path->slots[0]--;
6580                 }
6581                 ret = 0;
6582
6583                 leaf = path->nodes[0];
6584                 slot = path->slots[0];
6585
6586                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6587                 if (found_key.objectid != bytenr)
6588                         break;
6589
6590                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6591                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6592                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6593                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6594                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6595                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6596                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6597                         btrfs_release_path(path);
6598                         if (found_key.type == 0) {
6599                                 if (found_key.offset == 0)
6600                                         break;
6601                                 key.offset = found_key.offset - 1;
6602                                 key.type = found_key.type;
6603                         }
6604                         key.type = found_key.type - 1;
6605                         key.offset = (u64)-1;
6606                         continue;
6607                 }
6608
6609                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6610                         found_key.objectid, found_key.type, found_key.offset);
6611
6612                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6613                 if (ret)
6614                         break;
6615                 btrfs_release_path(path);
6616
6617                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6618                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6619                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6620                                 found_key.offset : root->nodesize;
6621
6622                         ret = btrfs_update_block_group(trans, root, bytenr,
6623                                                        bytes, 0, 0);
6624                         if (ret)
6625                                 break;
6626                 }
6627         }
6628
6629         btrfs_release_path(path);
6630         return ret;
6631 }
6632
6633 /*
6634  * for a single backref, this will allocate a new extent
6635  * and add the backref to it.
6636  */
6637 static int record_extent(struct btrfs_trans_handle *trans,
6638                          struct btrfs_fs_info *info,
6639                          struct btrfs_path *path,
6640                          struct extent_record *rec,
6641                          struct extent_backref *back,
6642                          int allocated, u64 flags)
6643 {
6644         int ret;
6645         struct btrfs_root *extent_root = info->extent_root;
6646         struct extent_buffer *leaf;
6647         struct btrfs_key ins_key;
6648         struct btrfs_extent_item *ei;
6649         struct tree_backref *tback;
6650         struct data_backref *dback;
6651         struct btrfs_tree_block_info *bi;
6652
6653         if (!back->is_data)
6654                 rec->max_size = max_t(u64, rec->max_size,
6655                                     info->extent_root->nodesize);
6656
6657         if (!allocated) {
6658                 u32 item_size = sizeof(*ei);
6659
6660                 if (!back->is_data)
6661                         item_size += sizeof(*bi);
6662
6663                 ins_key.objectid = rec->start;
6664                 ins_key.offset = rec->max_size;
6665                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6666
6667                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6668                                         &ins_key, item_size);
6669                 if (ret)
6670                         goto fail;
6671
6672                 leaf = path->nodes[0];
6673                 ei = btrfs_item_ptr(leaf, path->slots[0],
6674                                     struct btrfs_extent_item);
6675
6676                 btrfs_set_extent_refs(leaf, ei, 0);
6677                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6678
6679                 if (back->is_data) {
6680                         btrfs_set_extent_flags(leaf, ei,
6681                                                BTRFS_EXTENT_FLAG_DATA);
6682                 } else {
6683                         struct btrfs_disk_key copy_key;;
6684
6685                         tback = to_tree_backref(back);
6686                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6687                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6688                                              sizeof(*bi));
6689
6690                         btrfs_set_disk_key_objectid(&copy_key,
6691                                                     rec->info_objectid);
6692                         btrfs_set_disk_key_type(&copy_key, 0);
6693                         btrfs_set_disk_key_offset(&copy_key, 0);
6694
6695                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6696                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6697
6698                         btrfs_set_extent_flags(leaf, ei,
6699                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6700                 }
6701
6702                 btrfs_mark_buffer_dirty(leaf);
6703                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6704                                                rec->max_size, 1, 0);
6705                 if (ret)
6706                         goto fail;
6707                 btrfs_release_path(path);
6708         }
6709
6710         if (back->is_data) {
6711                 u64 parent;
6712                 int i;
6713
6714                 dback = to_data_backref(back);
6715                 if (back->full_backref)
6716                         parent = dback->parent;
6717                 else
6718                         parent = 0;
6719
6720                 for (i = 0; i < dback->found_ref; i++) {
6721                         /* if parent != 0, we're doing a full backref
6722                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6723                          * just makes the backref allocator create a data
6724                          * backref
6725                          */
6726                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6727                                                    rec->start, rec->max_size,
6728                                                    parent,
6729                                                    dback->root,
6730                                                    parent ?
6731                                                    BTRFS_FIRST_FREE_OBJECTID :
6732                                                    dback->owner,
6733                                                    dback->offset);
6734                         if (ret)
6735                                 break;
6736                 }
6737                 fprintf(stderr, "adding new data backref"
6738                                 " on %llu %s %llu owner %llu"
6739                                 " offset %llu found %d\n",
6740                                 (unsigned long long)rec->start,
6741                                 back->full_backref ?
6742                                 "parent" : "root",
6743                                 back->full_backref ?
6744                                 (unsigned long long)parent :
6745                                 (unsigned long long)dback->root,
6746                                 (unsigned long long)dback->owner,
6747                                 (unsigned long long)dback->offset,
6748                                 dback->found_ref);
6749         } else {
6750                 u64 parent;
6751
6752                 tback = to_tree_backref(back);
6753                 if (back->full_backref)
6754                         parent = tback->parent;
6755                 else
6756                         parent = 0;
6757
6758                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6759                                            rec->start, rec->max_size,
6760                                            parent, tback->root, 0, 0);
6761                 fprintf(stderr, "adding new tree backref on "
6762                         "start %llu len %llu parent %llu root %llu\n",
6763                         rec->start, rec->max_size, parent, tback->root);
6764         }
6765 fail:
6766         btrfs_release_path(path);
6767         return ret;
6768 }
6769
6770 static struct extent_entry *find_entry(struct list_head *entries,
6771                                        u64 bytenr, u64 bytes)
6772 {
6773         struct extent_entry *entry = NULL;
6774
6775         list_for_each_entry(entry, entries, list) {
6776                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6777                         return entry;
6778         }
6779
6780         return NULL;
6781 }
6782
6783 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6784 {
6785         struct extent_entry *entry, *best = NULL, *prev = NULL;
6786
6787         list_for_each_entry(entry, entries, list) {
6788                 if (!prev) {
6789                         prev = entry;
6790                         continue;
6791                 }
6792
6793                 /*
6794                  * If there are as many broken entries as entries then we know
6795                  * not to trust this particular entry.
6796                  */
6797                 if (entry->broken == entry->count)
6798                         continue;
6799
6800                 /*
6801                  * If our current entry == best then we can't be sure our best
6802                  * is really the best, so we need to keep searching.
6803                  */
6804                 if (best && best->count == entry->count) {
6805                         prev = entry;
6806                         best = NULL;
6807                         continue;
6808                 }
6809
6810                 /* Prev == entry, not good enough, have to keep searching */
6811                 if (!prev->broken && prev->count == entry->count)
6812                         continue;
6813
6814                 if (!best)
6815                         best = (prev->count > entry->count) ? prev : entry;
6816                 else if (best->count < entry->count)
6817                         best = entry;
6818                 prev = entry;
6819         }
6820
6821         return best;
6822 }
6823
6824 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6825                       struct data_backref *dback, struct extent_entry *entry)
6826 {
6827         struct btrfs_trans_handle *trans;
6828         struct btrfs_root *root;
6829         struct btrfs_file_extent_item *fi;
6830         struct extent_buffer *leaf;
6831         struct btrfs_key key;
6832         u64 bytenr, bytes;
6833         int ret, err;
6834
6835         key.objectid = dback->root;
6836         key.type = BTRFS_ROOT_ITEM_KEY;
6837         key.offset = (u64)-1;
6838         root = btrfs_read_fs_root(info, &key);
6839         if (IS_ERR(root)) {
6840                 fprintf(stderr, "Couldn't find root for our ref\n");
6841                 return -EINVAL;
6842         }
6843
6844         /*
6845          * The backref points to the original offset of the extent if it was
6846          * split, so we need to search down to the offset we have and then walk
6847          * forward until we find the backref we're looking for.
6848          */
6849         key.objectid = dback->owner;
6850         key.type = BTRFS_EXTENT_DATA_KEY;
6851         key.offset = dback->offset;
6852         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6853         if (ret < 0) {
6854                 fprintf(stderr, "Error looking up ref %d\n", ret);
6855                 return ret;
6856         }
6857
6858         while (1) {
6859                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6860                         ret = btrfs_next_leaf(root, path);
6861                         if (ret) {
6862                                 fprintf(stderr, "Couldn't find our ref, next\n");
6863                                 return -EINVAL;
6864                         }
6865                 }
6866                 leaf = path->nodes[0];
6867                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6868                 if (key.objectid != dback->owner ||
6869                     key.type != BTRFS_EXTENT_DATA_KEY) {
6870                         fprintf(stderr, "Couldn't find our ref, search\n");
6871                         return -EINVAL;
6872                 }
6873                 fi = btrfs_item_ptr(leaf, path->slots[0],
6874                                     struct btrfs_file_extent_item);
6875                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6876                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6877
6878                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6879                         break;
6880                 path->slots[0]++;
6881         }
6882
6883         btrfs_release_path(path);
6884
6885         trans = btrfs_start_transaction(root, 1);
6886         if (IS_ERR(trans))
6887                 return PTR_ERR(trans);
6888
6889         /*
6890          * Ok we have the key of the file extent we want to fix, now we can cow
6891          * down to the thing and fix it.
6892          */
6893         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6894         if (ret < 0) {
6895                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6896                         key.objectid, key.type, key.offset, ret);
6897                 goto out;
6898         }
6899         if (ret > 0) {
6900                 fprintf(stderr, "Well that's odd, we just found this key "
6901                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6902                         key.offset);
6903                 ret = -EINVAL;
6904                 goto out;
6905         }
6906         leaf = path->nodes[0];
6907         fi = btrfs_item_ptr(leaf, path->slots[0],
6908                             struct btrfs_file_extent_item);
6909
6910         if (btrfs_file_extent_compression(leaf, fi) &&
6911             dback->disk_bytenr != entry->bytenr) {
6912                 fprintf(stderr, "Ref doesn't match the record start and is "
6913                         "compressed, please take a btrfs-image of this file "
6914                         "system and send it to a btrfs developer so they can "
6915                         "complete this functionality for bytenr %Lu\n",
6916                         dback->disk_bytenr);
6917                 ret = -EINVAL;
6918                 goto out;
6919         }
6920
6921         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6922                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6923         } else if (dback->disk_bytenr > entry->bytenr) {
6924                 u64 off_diff, offset;
6925
6926                 off_diff = dback->disk_bytenr - entry->bytenr;
6927                 offset = btrfs_file_extent_offset(leaf, fi);
6928                 if (dback->disk_bytenr + offset +
6929                     btrfs_file_extent_num_bytes(leaf, fi) >
6930                     entry->bytenr + entry->bytes) {
6931                         fprintf(stderr, "Ref is past the entry end, please "
6932                                 "take a btrfs-image of this file system and "
6933                                 "send it to a btrfs developer, ref %Lu\n",
6934                                 dback->disk_bytenr);
6935                         ret = -EINVAL;
6936                         goto out;
6937                 }
6938                 offset += off_diff;
6939                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6940                 btrfs_set_file_extent_offset(leaf, fi, offset);
6941         } else if (dback->disk_bytenr < entry->bytenr) {
6942                 u64 offset;
6943
6944                 offset = btrfs_file_extent_offset(leaf, fi);
6945                 if (dback->disk_bytenr + offset < entry->bytenr) {
6946                         fprintf(stderr, "Ref is before the entry start, please"
6947                                 " take a btrfs-image of this file system and "
6948                                 "send it to a btrfs developer, ref %Lu\n",
6949                                 dback->disk_bytenr);
6950                         ret = -EINVAL;
6951                         goto out;
6952                 }
6953
6954                 offset += dback->disk_bytenr;
6955                 offset -= entry->bytenr;
6956                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6957                 btrfs_set_file_extent_offset(leaf, fi, offset);
6958         }
6959
6960         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6961
6962         /*
6963          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6964          * only do this if we aren't using compression, otherwise it's a
6965          * trickier case.
6966          */
6967         if (!btrfs_file_extent_compression(leaf, fi))
6968                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6969         else
6970                 printf("ram bytes may be wrong?\n");
6971         btrfs_mark_buffer_dirty(leaf);
6972 out:
6973         err = btrfs_commit_transaction(trans, root);
6974         btrfs_release_path(path);
6975         return ret ? ret : err;
6976 }
6977
6978 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6979                            struct extent_record *rec)
6980 {
6981         struct extent_backref *back, *tmp;
6982         struct data_backref *dback;
6983         struct extent_entry *entry, *best = NULL;
6984         LIST_HEAD(entries);
6985         int nr_entries = 0;
6986         int broken_entries = 0;
6987         int ret = 0;
6988         short mismatch = 0;
6989
6990         /*
6991          * Metadata is easy and the backrefs should always agree on bytenr and
6992          * size, if not we've got bigger issues.
6993          */
6994         if (rec->metadata)
6995                 return 0;
6996
6997         rbtree_postorder_for_each_entry_safe(back, tmp,
6998                                              &rec->backref_tree, node) {
6999                 if (back->full_backref || !back->is_data)
7000                         continue;
7001
7002                 dback = to_data_backref(back);
7003
7004                 /*
7005                  * We only pay attention to backrefs that we found a real
7006                  * backref for.
7007                  */
7008                 if (dback->found_ref == 0)
7009                         continue;
7010
7011                 /*
7012                  * For now we only catch when the bytes don't match, not the
7013                  * bytenr.  We can easily do this at the same time, but I want
7014                  * to have a fs image to test on before we just add repair
7015                  * functionality willy-nilly so we know we won't screw up the
7016                  * repair.
7017                  */
7018
7019                 entry = find_entry(&entries, dback->disk_bytenr,
7020                                    dback->bytes);
7021                 if (!entry) {
7022                         entry = malloc(sizeof(struct extent_entry));
7023                         if (!entry) {
7024                                 ret = -ENOMEM;
7025                                 goto out;
7026                         }
7027                         memset(entry, 0, sizeof(*entry));
7028                         entry->bytenr = dback->disk_bytenr;
7029                         entry->bytes = dback->bytes;
7030                         list_add_tail(&entry->list, &entries);
7031                         nr_entries++;
7032                 }
7033
7034                 /*
7035                  * If we only have on entry we may think the entries agree when
7036                  * in reality they don't so we have to do some extra checking.
7037                  */
7038                 if (dback->disk_bytenr != rec->start ||
7039                     dback->bytes != rec->nr || back->broken)
7040                         mismatch = 1;
7041
7042                 if (back->broken) {
7043                         entry->broken++;
7044                         broken_entries++;
7045                 }
7046
7047                 entry->count++;
7048         }
7049
7050         /* Yay all the backrefs agree, carry on good sir */
7051         if (nr_entries <= 1 && !mismatch)
7052                 goto out;
7053
7054         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7055                 "%Lu\n", rec->start);
7056
7057         /*
7058          * First we want to see if the backrefs can agree amongst themselves who
7059          * is right, so figure out which one of the entries has the highest
7060          * count.
7061          */
7062         best = find_most_right_entry(&entries);
7063
7064         /*
7065          * Ok so we may have an even split between what the backrefs think, so
7066          * this is where we use the extent ref to see what it thinks.
7067          */
7068         if (!best) {
7069                 entry = find_entry(&entries, rec->start, rec->nr);
7070                 if (!entry && (!broken_entries || !rec->found_rec)) {
7071                         fprintf(stderr, "Backrefs don't agree with each other "
7072                                 "and extent record doesn't agree with anybody,"
7073                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7074                                 rec->start, rec->nr);
7075                         ret = -EINVAL;
7076                         goto out;
7077                 } else if (!entry) {
7078                         /*
7079                          * Ok our backrefs were broken, we'll assume this is the
7080                          * correct value and add an entry for this range.
7081                          */
7082                         entry = malloc(sizeof(struct extent_entry));
7083                         if (!entry) {
7084                                 ret = -ENOMEM;
7085                                 goto out;
7086                         }
7087                         memset(entry, 0, sizeof(*entry));
7088                         entry->bytenr = rec->start;
7089                         entry->bytes = rec->nr;
7090                         list_add_tail(&entry->list, &entries);
7091                         nr_entries++;
7092                 }
7093                 entry->count++;
7094                 best = find_most_right_entry(&entries);
7095                 if (!best) {
7096                         fprintf(stderr, "Backrefs and extent record evenly "
7097                                 "split on who is right, this is going to "
7098                                 "require user input to fix bytenr %Lu bytes "
7099                                 "%Lu\n", rec->start, rec->nr);
7100                         ret = -EINVAL;
7101                         goto out;
7102                 }
7103         }
7104
7105         /*
7106          * I don't think this can happen currently as we'll abort() if we catch
7107          * this case higher up, but in case somebody removes that we still can't
7108          * deal with it properly here yet, so just bail out of that's the case.
7109          */
7110         if (best->bytenr != rec->start) {
7111                 fprintf(stderr, "Extent start and backref starts don't match, "
7112                         "please use btrfs-image on this file system and send "
7113                         "it to a btrfs developer so they can make fsck fix "
7114                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7115                         rec->start, rec->nr);
7116                 ret = -EINVAL;
7117                 goto out;
7118         }
7119
7120         /*
7121          * Ok great we all agreed on an extent record, let's go find the real
7122          * references and fix up the ones that don't match.
7123          */
7124         rbtree_postorder_for_each_entry_safe(back, tmp,
7125                                              &rec->backref_tree, node) {
7126                 if (back->full_backref || !back->is_data)
7127                         continue;
7128
7129                 dback = to_data_backref(back);
7130
7131                 /*
7132                  * Still ignoring backrefs that don't have a real ref attached
7133                  * to them.
7134                  */
7135                 if (dback->found_ref == 0)
7136                         continue;
7137
7138                 if (dback->bytes == best->bytes &&
7139                     dback->disk_bytenr == best->bytenr)
7140                         continue;
7141
7142                 ret = repair_ref(info, path, dback, best);
7143                 if (ret)
7144                         goto out;
7145         }
7146
7147         /*
7148          * Ok we messed with the actual refs, which means we need to drop our
7149          * entire cache and go back and rescan.  I know this is a huge pain and
7150          * adds a lot of extra work, but it's the only way to be safe.  Once all
7151          * the backrefs agree we may not need to do anything to the extent
7152          * record itself.
7153          */
7154         ret = -EAGAIN;
7155 out:
7156         while (!list_empty(&entries)) {
7157                 entry = list_entry(entries.next, struct extent_entry, list);
7158                 list_del_init(&entry->list);
7159                 free(entry);
7160         }
7161         return ret;
7162 }
7163
7164 static int process_duplicates(struct btrfs_root *root,
7165                               struct cache_tree *extent_cache,
7166                               struct extent_record *rec)
7167 {
7168         struct extent_record *good, *tmp;
7169         struct cache_extent *cache;
7170         int ret;
7171
7172         /*
7173          * If we found a extent record for this extent then return, or if we
7174          * have more than one duplicate we are likely going to need to delete
7175          * something.
7176          */
7177         if (rec->found_rec || rec->num_duplicates > 1)
7178                 return 0;
7179
7180         /* Shouldn't happen but just in case */
7181         BUG_ON(!rec->num_duplicates);
7182
7183         /*
7184          * So this happens if we end up with a backref that doesn't match the
7185          * actual extent entry.  So either the backref is bad or the extent
7186          * entry is bad.  Either way we want to have the extent_record actually
7187          * reflect what we found in the extent_tree, so we need to take the
7188          * duplicate out and use that as the extent_record since the only way we
7189          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7190          */
7191         remove_cache_extent(extent_cache, &rec->cache);
7192
7193         good = to_extent_record(rec->dups.next);
7194         list_del_init(&good->list);
7195         INIT_LIST_HEAD(&good->backrefs);
7196         INIT_LIST_HEAD(&good->dups);
7197         good->cache.start = good->start;
7198         good->cache.size = good->nr;
7199         good->content_checked = 0;
7200         good->owner_ref_checked = 0;
7201         good->num_duplicates = 0;
7202         good->refs = rec->refs;
7203         list_splice_init(&rec->backrefs, &good->backrefs);
7204         while (1) {
7205                 cache = lookup_cache_extent(extent_cache, good->start,
7206                                             good->nr);
7207                 if (!cache)
7208                         break;
7209                 tmp = container_of(cache, struct extent_record, cache);
7210
7211                 /*
7212                  * If we find another overlapping extent and it's found_rec is
7213                  * set then it's a duplicate and we need to try and delete
7214                  * something.
7215                  */
7216                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7217                         if (list_empty(&good->list))
7218                                 list_add_tail(&good->list,
7219                                               &duplicate_extents);
7220                         good->num_duplicates += tmp->num_duplicates + 1;
7221                         list_splice_init(&tmp->dups, &good->dups);
7222                         list_del_init(&tmp->list);
7223                         list_add_tail(&tmp->list, &good->dups);
7224                         remove_cache_extent(extent_cache, &tmp->cache);
7225                         continue;
7226                 }
7227
7228                 /*
7229                  * Ok we have another non extent item backed extent rec, so lets
7230                  * just add it to this extent and carry on like we did above.
7231                  */
7232                 good->refs += tmp->refs;
7233                 list_splice_init(&tmp->backrefs, &good->backrefs);
7234                 remove_cache_extent(extent_cache, &tmp->cache);
7235                 free(tmp);
7236         }
7237         ret = insert_cache_extent(extent_cache, &good->cache);
7238         BUG_ON(ret);
7239         free(rec);
7240         return good->num_duplicates ? 0 : 1;
7241 }
7242
7243 static int delete_duplicate_records(struct btrfs_root *root,
7244                                     struct extent_record *rec)
7245 {
7246         struct btrfs_trans_handle *trans;
7247         LIST_HEAD(delete_list);
7248         struct btrfs_path *path;
7249         struct extent_record *tmp, *good, *n;
7250         int nr_del = 0;
7251         int ret = 0, err;
7252         struct btrfs_key key;
7253
7254         path = btrfs_alloc_path();
7255         if (!path) {
7256                 ret = -ENOMEM;
7257                 goto out;
7258         }
7259
7260         good = rec;
7261         /* Find the record that covers all of the duplicates. */
7262         list_for_each_entry(tmp, &rec->dups, list) {
7263                 if (good->start < tmp->start)
7264                         continue;
7265                 if (good->nr > tmp->nr)
7266                         continue;
7267
7268                 if (tmp->start + tmp->nr < good->start + good->nr) {
7269                         fprintf(stderr, "Ok we have overlapping extents that "
7270                                 "aren't completely covered by each other, this "
7271                                 "is going to require more careful thought.  "
7272                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7273                                 tmp->start, tmp->nr, good->start, good->nr);
7274                         abort();
7275                 }
7276                 good = tmp;
7277         }
7278
7279         if (good != rec)
7280                 list_add_tail(&rec->list, &delete_list);
7281
7282         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7283                 if (tmp == good)
7284                         continue;
7285                 list_move_tail(&tmp->list, &delete_list);
7286         }
7287
7288         root = root->fs_info->extent_root;
7289         trans = btrfs_start_transaction(root, 1);
7290         if (IS_ERR(trans)) {
7291                 ret = PTR_ERR(trans);
7292                 goto out;
7293         }
7294
7295         list_for_each_entry(tmp, &delete_list, list) {
7296                 if (tmp->found_rec == 0)
7297                         continue;
7298                 key.objectid = tmp->start;
7299                 key.type = BTRFS_EXTENT_ITEM_KEY;
7300                 key.offset = tmp->nr;
7301
7302                 /* Shouldn't happen but just in case */
7303                 if (tmp->metadata) {
7304                         fprintf(stderr, "Well this shouldn't happen, extent "
7305                                 "record overlaps but is metadata? "
7306                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7307                         abort();
7308                 }
7309
7310                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7311                 if (ret) {
7312                         if (ret > 0)
7313                                 ret = -EINVAL;
7314                         break;
7315                 }
7316                 ret = btrfs_del_item(trans, root, path);
7317                 if (ret)
7318                         break;
7319                 btrfs_release_path(path);
7320                 nr_del++;
7321         }
7322         err = btrfs_commit_transaction(trans, root);
7323         if (err && !ret)
7324                 ret = err;
7325 out:
7326         while (!list_empty(&delete_list)) {
7327                 tmp = to_extent_record(delete_list.next);
7328                 list_del_init(&tmp->list);
7329                 if (tmp == rec)
7330                         continue;
7331                 free(tmp);
7332         }
7333
7334         while (!list_empty(&rec->dups)) {
7335                 tmp = to_extent_record(rec->dups.next);
7336                 list_del_init(&tmp->list);
7337                 free(tmp);
7338         }
7339
7340         btrfs_free_path(path);
7341
7342         if (!ret && !nr_del)
7343                 rec->num_duplicates = 0;
7344
7345         return ret ? ret : nr_del;
7346 }
7347
7348 static int find_possible_backrefs(struct btrfs_fs_info *info,
7349                                   struct btrfs_path *path,
7350                                   struct cache_tree *extent_cache,
7351                                   struct extent_record *rec)
7352 {
7353         struct btrfs_root *root;
7354         struct extent_backref *back, *tmp;
7355         struct data_backref *dback;
7356         struct cache_extent *cache;
7357         struct btrfs_file_extent_item *fi;
7358         struct btrfs_key key;
7359         u64 bytenr, bytes;
7360         int ret;
7361
7362         rbtree_postorder_for_each_entry_safe(back, tmp,
7363                                              &rec->backref_tree, node) {
7364                 /* Don't care about full backrefs (poor unloved backrefs) */
7365                 if (back->full_backref || !back->is_data)
7366                         continue;
7367
7368                 dback = to_data_backref(back);
7369
7370                 /* We found this one, we don't need to do a lookup */
7371                 if (dback->found_ref)
7372                         continue;
7373
7374                 key.objectid = dback->root;
7375                 key.type = BTRFS_ROOT_ITEM_KEY;
7376                 key.offset = (u64)-1;
7377
7378                 root = btrfs_read_fs_root(info, &key);
7379
7380                 /* No root, definitely a bad ref, skip */
7381                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7382                         continue;
7383                 /* Other err, exit */
7384                 if (IS_ERR(root))
7385                         return PTR_ERR(root);
7386
7387                 key.objectid = dback->owner;
7388                 key.type = BTRFS_EXTENT_DATA_KEY;
7389                 key.offset = dback->offset;
7390                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7391                 if (ret) {
7392                         btrfs_release_path(path);
7393                         if (ret < 0)
7394                                 return ret;
7395                         /* Didn't find it, we can carry on */
7396                         ret = 0;
7397                         continue;
7398                 }
7399
7400                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7401                                     struct btrfs_file_extent_item);
7402                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7403                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7404                 btrfs_release_path(path);
7405                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7406                 if (cache) {
7407                         struct extent_record *tmp;
7408                         tmp = container_of(cache, struct extent_record, cache);
7409
7410                         /*
7411                          * If we found an extent record for the bytenr for this
7412                          * particular backref then we can't add it to our
7413                          * current extent record.  We only want to add backrefs
7414                          * that don't have a corresponding extent item in the
7415                          * extent tree since they likely belong to this record
7416                          * and we need to fix it if it doesn't match bytenrs.
7417                          */
7418                         if  (tmp->found_rec)
7419                                 continue;
7420                 }
7421
7422                 dback->found_ref += 1;
7423                 dback->disk_bytenr = bytenr;
7424                 dback->bytes = bytes;
7425
7426                 /*
7427                  * Set this so the verify backref code knows not to trust the
7428                  * values in this backref.
7429                  */
7430                 back->broken = 1;
7431         }
7432
7433         return 0;
7434 }
7435
7436 /*
7437  * Record orphan data ref into corresponding root.
7438  *
7439  * Return 0 if the extent item contains data ref and recorded.
7440  * Return 1 if the extent item contains no useful data ref
7441  *   On that case, it may contains only shared_dataref or metadata backref
7442  *   or the file extent exists(this should be handled by the extent bytenr
7443  *   recovery routine)
7444  * Return <0 if something goes wrong.
7445  */
7446 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7447                                       struct extent_record *rec)
7448 {
7449         struct btrfs_key key;
7450         struct btrfs_root *dest_root;
7451         struct extent_backref *back, *tmp;
7452         struct data_backref *dback;
7453         struct orphan_data_extent *orphan;
7454         struct btrfs_path *path;
7455         int recorded_data_ref = 0;
7456         int ret = 0;
7457
7458         if (rec->metadata)
7459                 return 1;
7460         path = btrfs_alloc_path();
7461         if (!path)
7462                 return -ENOMEM;
7463         rbtree_postorder_for_each_entry_safe(back, tmp,
7464                                              &rec->backref_tree, node) {
7465                 if (back->full_backref || !back->is_data ||
7466                     !back->found_extent_tree)
7467                         continue;
7468                 dback = to_data_backref(back);
7469                 if (dback->found_ref)
7470                         continue;
7471                 key.objectid = dback->root;
7472                 key.type = BTRFS_ROOT_ITEM_KEY;
7473                 key.offset = (u64)-1;
7474
7475                 dest_root = btrfs_read_fs_root(fs_info, &key);
7476
7477                 /* For non-exist root we just skip it */
7478                 if (IS_ERR(dest_root) || !dest_root)
7479                         continue;
7480
7481                 key.objectid = dback->owner;
7482                 key.type = BTRFS_EXTENT_DATA_KEY;
7483                 key.offset = dback->offset;
7484
7485                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7486                 /*
7487                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7488                  * we need to record it for inode/file extent rebuild.
7489                  * For ret > 0, we record it only for file extent rebuild.
7490                  * For ret == 0, the file extent exists but only bytenr
7491                  * mismatch, let the original bytenr fix routine to handle,
7492                  * don't record it.
7493                  */
7494                 if (ret == 0)
7495                         continue;
7496                 ret = 0;
7497                 orphan = malloc(sizeof(*orphan));
7498                 if (!orphan) {
7499                         ret = -ENOMEM;
7500                         goto out;
7501                 }
7502                 INIT_LIST_HEAD(&orphan->list);
7503                 orphan->root = dback->root;
7504                 orphan->objectid = dback->owner;
7505                 orphan->offset = dback->offset;
7506                 orphan->disk_bytenr = rec->cache.start;
7507                 orphan->disk_len = rec->cache.size;
7508                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7509                 recorded_data_ref = 1;
7510         }
7511 out:
7512         btrfs_free_path(path);
7513         if (!ret)
7514                 return !recorded_data_ref;
7515         else
7516                 return ret;
7517 }
7518
7519 /*
7520  * when an incorrect extent item is found, this will delete
7521  * all of the existing entries for it and recreate them
7522  * based on what the tree scan found.
7523  */
7524 static int fixup_extent_refs(struct btrfs_fs_info *info,
7525                              struct cache_tree *extent_cache,
7526                              struct extent_record *rec)
7527 {
7528         struct btrfs_trans_handle *trans = NULL;
7529         int ret;
7530         struct btrfs_path *path;
7531         struct cache_extent *cache;
7532         struct extent_backref *back, *tmp;
7533         int allocated = 0;
7534         u64 flags = 0;
7535
7536         if (rec->flag_block_full_backref)
7537                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7538
7539         path = btrfs_alloc_path();
7540         if (!path)
7541                 return -ENOMEM;
7542
7543         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7544                 /*
7545                  * Sometimes the backrefs themselves are so broken they don't
7546                  * get attached to any meaningful rec, so first go back and
7547                  * check any of our backrefs that we couldn't find and throw
7548                  * them into the list if we find the backref so that
7549                  * verify_backrefs can figure out what to do.
7550                  */
7551                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7552                 if (ret < 0)
7553                         goto out;
7554         }
7555
7556         /* step one, make sure all of the backrefs agree */
7557         ret = verify_backrefs(info, path, rec);
7558         if (ret < 0)
7559                 goto out;
7560
7561         trans = btrfs_start_transaction(info->extent_root, 1);
7562         if (IS_ERR(trans)) {
7563                 ret = PTR_ERR(trans);
7564                 goto out;
7565         }
7566
7567         /* step two, delete all the existing records */
7568         ret = delete_extent_records(trans, info->extent_root, path,
7569                                     rec->start, rec->max_size);
7570
7571         if (ret < 0)
7572                 goto out;
7573
7574         /* was this block corrupt?  If so, don't add references to it */
7575         cache = lookup_cache_extent(info->corrupt_blocks,
7576                                     rec->start, rec->max_size);
7577         if (cache) {
7578                 ret = 0;
7579                 goto out;
7580         }
7581
7582         /* step three, recreate all the refs we did find */
7583         rbtree_postorder_for_each_entry_safe(back, tmp,
7584                                              &rec->backref_tree, node) {
7585                 /*
7586                  * if we didn't find any references, don't create a
7587                  * new extent record
7588                  */
7589                 if (!back->found_ref)
7590                         continue;
7591
7592                 rec->bad_full_backref = 0;
7593                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7594                 allocated = 1;
7595
7596                 if (ret)
7597                         goto out;
7598         }
7599 out:
7600         if (trans) {
7601                 int err = btrfs_commit_transaction(trans, info->extent_root);
7602                 if (!ret)
7603                         ret = err;
7604         }
7605
7606         btrfs_free_path(path);
7607         return ret;
7608 }
7609
7610 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7611                               struct extent_record *rec)
7612 {
7613         struct btrfs_trans_handle *trans;
7614         struct btrfs_root *root = fs_info->extent_root;
7615         struct btrfs_path *path;
7616         struct btrfs_extent_item *ei;
7617         struct btrfs_key key;
7618         u64 flags;
7619         int ret = 0;
7620
7621         key.objectid = rec->start;
7622         if (rec->metadata) {
7623                 key.type = BTRFS_METADATA_ITEM_KEY;
7624                 key.offset = rec->info_level;
7625         } else {
7626                 key.type = BTRFS_EXTENT_ITEM_KEY;
7627                 key.offset = rec->max_size;
7628         }
7629
7630         path = btrfs_alloc_path();
7631         if (!path)
7632                 return -ENOMEM;
7633
7634         trans = btrfs_start_transaction(root, 0);
7635         if (IS_ERR(trans)) {
7636                 btrfs_free_path(path);
7637                 return PTR_ERR(trans);
7638         }
7639
7640         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7641         if (ret < 0) {
7642                 btrfs_free_path(path);
7643                 btrfs_commit_transaction(trans, root);
7644                 return ret;
7645         } else if (ret) {
7646                 fprintf(stderr, "Didn't find extent for %llu\n",
7647                         (unsigned long long)rec->start);
7648                 btrfs_free_path(path);
7649                 btrfs_commit_transaction(trans, root);
7650                 return -ENOENT;
7651         }
7652
7653         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7654                             struct btrfs_extent_item);
7655         flags = btrfs_extent_flags(path->nodes[0], ei);
7656         if (rec->flag_block_full_backref) {
7657                 fprintf(stderr, "setting full backref on %llu\n",
7658                         (unsigned long long)key.objectid);
7659                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7660         } else {
7661                 fprintf(stderr, "clearing full backref on %llu\n",
7662                         (unsigned long long)key.objectid);
7663                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7664         }
7665         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7666         btrfs_mark_buffer_dirty(path->nodes[0]);
7667         btrfs_free_path(path);
7668         return btrfs_commit_transaction(trans, root);
7669 }
7670
7671 /* right now we only prune from the extent allocation tree */
7672 static int prune_one_block(struct btrfs_trans_handle *trans,
7673                            struct btrfs_fs_info *info,
7674                            struct btrfs_corrupt_block *corrupt)
7675 {
7676         int ret;
7677         struct btrfs_path path;
7678         struct extent_buffer *eb;
7679         u64 found;
7680         int slot;
7681         int nritems;
7682         int level = corrupt->level + 1;
7683
7684         btrfs_init_path(&path);
7685 again:
7686         /* we want to stop at the parent to our busted block */
7687         path.lowest_level = level;
7688
7689         ret = btrfs_search_slot(trans, info->extent_root,
7690                                 &corrupt->key, &path, -1, 1);
7691
7692         if (ret < 0)
7693                 goto out;
7694
7695         eb = path.nodes[level];
7696         if (!eb) {
7697                 ret = -ENOENT;
7698                 goto out;
7699         }
7700
7701         /*
7702          * hopefully the search gave us the block we want to prune,
7703          * lets try that first
7704          */
7705         slot = path.slots[level];
7706         found =  btrfs_node_blockptr(eb, slot);
7707         if (found == corrupt->cache.start)
7708                 goto del_ptr;
7709
7710         nritems = btrfs_header_nritems(eb);
7711
7712         /* the search failed, lets scan this node and hope we find it */
7713         for (slot = 0; slot < nritems; slot++) {
7714                 found =  btrfs_node_blockptr(eb, slot);
7715                 if (found == corrupt->cache.start)
7716                         goto del_ptr;
7717         }
7718         /*
7719          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7720          * to this block
7721          */
7722         if (eb == info->extent_root->node) {
7723                 ret = -ENOENT;
7724                 goto out;
7725         } else {
7726                 level++;
7727                 btrfs_release_path(&path);
7728                 goto again;
7729         }
7730
7731 del_ptr:
7732         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7733         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7734
7735 out:
7736         btrfs_release_path(&path);
7737         return ret;
7738 }
7739
7740 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7741 {
7742         struct btrfs_trans_handle *trans = NULL;
7743         struct cache_extent *cache;
7744         struct btrfs_corrupt_block *corrupt;
7745
7746         while (1) {
7747                 cache = search_cache_extent(info->corrupt_blocks, 0);
7748                 if (!cache)
7749                         break;
7750                 if (!trans) {
7751                         trans = btrfs_start_transaction(info->extent_root, 1);
7752                         if (IS_ERR(trans))
7753                                 return PTR_ERR(trans);
7754                 }
7755                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7756                 prune_one_block(trans, info, corrupt);
7757                 remove_cache_extent(info->corrupt_blocks, cache);
7758         }
7759         if (trans)
7760                 return btrfs_commit_transaction(trans, info->extent_root);
7761         return 0;
7762 }
7763
7764 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7765 {
7766         struct btrfs_block_group_cache *cache;
7767         u64 start, end;
7768         int ret;
7769
7770         while (1) {
7771                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7772                                             &start, &end, EXTENT_DIRTY);
7773                 if (ret)
7774                         break;
7775                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7776                                    GFP_NOFS);
7777         }
7778
7779         start = 0;
7780         while (1) {
7781                 cache = btrfs_lookup_first_block_group(fs_info, start);
7782                 if (!cache)
7783                         break;
7784                 if (cache->cached)
7785                         cache->cached = 0;
7786                 start = cache->key.objectid + cache->key.offset;
7787         }
7788 }
7789
7790 static int check_extent_refs(struct btrfs_root *root,
7791                              struct cache_tree *extent_cache)
7792 {
7793         struct extent_record *rec;
7794         struct cache_extent *cache;
7795         int err = 0;
7796         int ret = 0;
7797         int fixed = 0;
7798         int had_dups = 0;
7799         int recorded = 0;
7800
7801         if (repair) {
7802                 /*
7803                  * if we're doing a repair, we have to make sure
7804                  * we don't allocate from the problem extents.
7805                  * In the worst case, this will be all the
7806                  * extents in the FS
7807                  */
7808                 cache = search_cache_extent(extent_cache, 0);
7809                 while(cache) {
7810                         rec = container_of(cache, struct extent_record, cache);
7811                         set_extent_dirty(root->fs_info->excluded_extents,
7812                                          rec->start,
7813                                          rec->start + rec->max_size - 1,
7814                                          GFP_NOFS);
7815                         cache = next_cache_extent(cache);
7816                 }
7817
7818                 /* pin down all the corrupted blocks too */
7819                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7820                 while(cache) {
7821                         set_extent_dirty(root->fs_info->excluded_extents,
7822                                          cache->start,
7823                                          cache->start + cache->size - 1,
7824                                          GFP_NOFS);
7825                         cache = next_cache_extent(cache);
7826                 }
7827                 prune_corrupt_blocks(root->fs_info);
7828                 reset_cached_block_groups(root->fs_info);
7829         }
7830
7831         reset_cached_block_groups(root->fs_info);
7832
7833         /*
7834          * We need to delete any duplicate entries we find first otherwise we
7835          * could mess up the extent tree when we have backrefs that actually
7836          * belong to a different extent item and not the weird duplicate one.
7837          */
7838         while (repair && !list_empty(&duplicate_extents)) {
7839                 rec = to_extent_record(duplicate_extents.next);
7840                 list_del_init(&rec->list);
7841
7842                 /* Sometimes we can find a backref before we find an actual
7843                  * extent, so we need to process it a little bit to see if there
7844                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7845                  * if this is a backref screwup.  If we need to delete stuff
7846                  * process_duplicates() will return 0, otherwise it will return
7847                  * 1 and we
7848                  */
7849                 if (process_duplicates(root, extent_cache, rec))
7850                         continue;
7851                 ret = delete_duplicate_records(root, rec);
7852                 if (ret < 0)
7853                         return ret;
7854                 /*
7855                  * delete_duplicate_records will return the number of entries
7856                  * deleted, so if it's greater than 0 then we know we actually
7857                  * did something and we need to remove.
7858                  */
7859                 if (ret)
7860                         had_dups = 1;
7861         }
7862
7863         if (had_dups)
7864                 return -EAGAIN;
7865
7866         while(1) {
7867                 int cur_err = 0;
7868
7869                 fixed = 0;
7870                 recorded = 0;
7871                 cache = search_cache_extent(extent_cache, 0);
7872                 if (!cache)
7873                         break;
7874                 rec = container_of(cache, struct extent_record, cache);
7875                 if (rec->num_duplicates) {
7876                         fprintf(stderr, "extent item %llu has multiple extent "
7877                                 "items\n", (unsigned long long)rec->start);
7878                         err = 1;
7879                         cur_err = 1;
7880                 }
7881
7882                 if (rec->refs != rec->extent_item_refs) {
7883                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7884                                 (unsigned long long)rec->start,
7885                                 (unsigned long long)rec->nr);
7886                         fprintf(stderr, "extent item %llu, found %llu\n",
7887                                 (unsigned long long)rec->extent_item_refs,
7888                                 (unsigned long long)rec->refs);
7889                         ret = record_orphan_data_extents(root->fs_info, rec);
7890                         if (ret < 0)
7891                                 goto repair_abort;
7892                         if (ret == 0) {
7893                                 recorded = 1;
7894                         } else {
7895                                 /*
7896                                  * we can't use the extent to repair file
7897                                  * extent, let the fallback method handle it.
7898                                  */
7899                                 if (!fixed && repair) {
7900                                         ret = fixup_extent_refs(
7901                                                         root->fs_info,
7902                                                         extent_cache, rec);
7903                                         if (ret)
7904                                                 goto repair_abort;
7905                                         fixed = 1;
7906                                 }
7907                         }
7908                         err = 1;
7909                         cur_err = 1;
7910                 }
7911                 if (all_backpointers_checked(rec, 1)) {
7912                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7913                                 (unsigned long long)rec->start,
7914                                 (unsigned long long)rec->nr);
7915
7916                         if (!fixed && !recorded && repair) {
7917                                 ret = fixup_extent_refs(root->fs_info,
7918                                                         extent_cache, rec);
7919                                 if (ret)
7920                                         goto repair_abort;
7921                                 fixed = 1;
7922                         }
7923                         cur_err = 1;
7924                         err = 1;
7925                 }
7926                 if (!rec->owner_ref_checked) {
7927                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7928                                 (unsigned long long)rec->start,
7929                                 (unsigned long long)rec->nr);
7930                         if (!fixed && !recorded && repair) {
7931                                 ret = fixup_extent_refs(root->fs_info,
7932                                                         extent_cache, rec);
7933                                 if (ret)
7934                                         goto repair_abort;
7935                                 fixed = 1;
7936                         }
7937                         err = 1;
7938                         cur_err = 1;
7939                 }
7940                 if (rec->bad_full_backref) {
7941                         fprintf(stderr, "bad full backref, on [%llu]\n",
7942                                 (unsigned long long)rec->start);
7943                         if (repair) {
7944                                 ret = fixup_extent_flags(root->fs_info, rec);
7945                                 if (ret)
7946                                         goto repair_abort;
7947                                 fixed = 1;
7948                         }
7949                         err = 1;
7950                         cur_err = 1;
7951                 }
7952                 /*
7953                  * Although it's not a extent ref's problem, we reuse this
7954                  * routine for error reporting.
7955                  * No repair function yet.
7956                  */
7957                 if (rec->crossing_stripes) {
7958                         fprintf(stderr,
7959                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7960                                 rec->start, rec->start + rec->max_size);
7961                         err = 1;
7962                         cur_err = 1;
7963                 }
7964
7965                 if (rec->wrong_chunk_type) {
7966                         fprintf(stderr,
7967                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7968                                 rec->start, rec->start + rec->max_size);
7969                         err = 1;
7970                         cur_err = 1;
7971                 }
7972
7973                 remove_cache_extent(extent_cache, cache);
7974                 free_all_extent_backrefs(rec);
7975                 if (!init_extent_tree && repair && (!cur_err || fixed))
7976                         clear_extent_dirty(root->fs_info->excluded_extents,
7977                                            rec->start,
7978                                            rec->start + rec->max_size - 1,
7979                                            GFP_NOFS);
7980                 free(rec);
7981         }
7982 repair_abort:
7983         if (repair) {
7984                 if (ret && ret != -EAGAIN) {
7985                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7986                         exit(1);
7987                 } else if (!ret) {
7988                         struct btrfs_trans_handle *trans;
7989
7990                         root = root->fs_info->extent_root;
7991                         trans = btrfs_start_transaction(root, 1);
7992                         if (IS_ERR(trans)) {
7993                                 ret = PTR_ERR(trans);
7994                                 goto repair_abort;
7995                         }
7996
7997                         btrfs_fix_block_accounting(trans, root);
7998                         ret = btrfs_commit_transaction(trans, root);
7999                         if (ret)
8000                                 goto repair_abort;
8001                 }
8002                 if (err)
8003                         fprintf(stderr, "repaired damaged extent references\n");
8004                 return ret;
8005         }
8006         return err;
8007 }
8008
8009 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8010 {
8011         u64 stripe_size;
8012
8013         if (type & BTRFS_BLOCK_GROUP_RAID0) {
8014                 stripe_size = length;
8015                 stripe_size /= num_stripes;
8016         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8017                 stripe_size = length * 2;
8018                 stripe_size /= num_stripes;
8019         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8020                 stripe_size = length;
8021                 stripe_size /= (num_stripes - 1);
8022         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8023                 stripe_size = length;
8024                 stripe_size /= (num_stripes - 2);
8025         } else {
8026                 stripe_size = length;
8027         }
8028         return stripe_size;
8029 }
8030
8031 /*
8032  * Check the chunk with its block group/dev list ref:
8033  * Return 0 if all refs seems valid.
8034  * Return 1 if part of refs seems valid, need later check for rebuild ref
8035  * like missing block group and needs to search extent tree to rebuild them.
8036  * Return -1 if essential refs are missing and unable to rebuild.
8037  */
8038 static int check_chunk_refs(struct chunk_record *chunk_rec,
8039                             struct block_group_tree *block_group_cache,
8040                             struct device_extent_tree *dev_extent_cache,
8041                             int silent)
8042 {
8043         struct cache_extent *block_group_item;
8044         struct block_group_record *block_group_rec;
8045         struct cache_extent *dev_extent_item;
8046         struct device_extent_record *dev_extent_rec;
8047         u64 devid;
8048         u64 offset;
8049         u64 length;
8050         int metadump_v2 = 0;
8051         int i;
8052         int ret = 0;
8053
8054         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8055                                                chunk_rec->offset,
8056                                                chunk_rec->length);
8057         if (block_group_item) {
8058                 block_group_rec = container_of(block_group_item,
8059                                                struct block_group_record,
8060                                                cache);
8061                 if (chunk_rec->length != block_group_rec->offset ||
8062                     chunk_rec->offset != block_group_rec->objectid ||
8063                     (!metadump_v2 &&
8064                      chunk_rec->type_flags != block_group_rec->flags)) {
8065                         if (!silent)
8066                                 fprintf(stderr,
8067                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8068                                         chunk_rec->objectid,
8069                                         chunk_rec->type,
8070                                         chunk_rec->offset,
8071                                         chunk_rec->length,
8072                                         chunk_rec->offset,
8073                                         chunk_rec->type_flags,
8074                                         block_group_rec->objectid,
8075                                         block_group_rec->type,
8076                                         block_group_rec->offset,
8077                                         block_group_rec->offset,
8078                                         block_group_rec->objectid,
8079                                         block_group_rec->flags);
8080                         ret = -1;
8081                 } else {
8082                         list_del_init(&block_group_rec->list);
8083                         chunk_rec->bg_rec = block_group_rec;
8084                 }
8085         } else {
8086                 if (!silent)
8087                         fprintf(stderr,
8088                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8089                                 chunk_rec->objectid,
8090                                 chunk_rec->type,
8091                                 chunk_rec->offset,
8092                                 chunk_rec->length,
8093                                 chunk_rec->offset,
8094                                 chunk_rec->type_flags);
8095                 ret = 1;
8096         }
8097
8098         if (metadump_v2)
8099                 return ret;
8100
8101         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8102                                     chunk_rec->num_stripes);
8103         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8104                 devid = chunk_rec->stripes[i].devid;
8105                 offset = chunk_rec->stripes[i].offset;
8106                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8107                                                        devid, offset, length);
8108                 if (dev_extent_item) {
8109                         dev_extent_rec = container_of(dev_extent_item,
8110                                                 struct device_extent_record,
8111                                                 cache);
8112                         if (dev_extent_rec->objectid != devid ||
8113                             dev_extent_rec->offset != offset ||
8114                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8115                             dev_extent_rec->length != length) {
8116                                 if (!silent)
8117                                         fprintf(stderr,
8118                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8119                                                 chunk_rec->objectid,
8120                                                 chunk_rec->type,
8121                                                 chunk_rec->offset,
8122                                                 chunk_rec->stripes[i].devid,
8123                                                 chunk_rec->stripes[i].offset,
8124                                                 dev_extent_rec->objectid,
8125                                                 dev_extent_rec->offset,
8126                                                 dev_extent_rec->length);
8127                                 ret = -1;
8128                         } else {
8129                                 list_move(&dev_extent_rec->chunk_list,
8130                                           &chunk_rec->dextents);
8131                         }
8132                 } else {
8133                         if (!silent)
8134                                 fprintf(stderr,
8135                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8136                                         chunk_rec->objectid,
8137                                         chunk_rec->type,
8138                                         chunk_rec->offset,
8139                                         chunk_rec->stripes[i].devid,
8140                                         chunk_rec->stripes[i].offset);
8141                         ret = -1;
8142                 }
8143         }
8144         return ret;
8145 }
8146
8147 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8148 int check_chunks(struct cache_tree *chunk_cache,
8149                  struct block_group_tree *block_group_cache,
8150                  struct device_extent_tree *dev_extent_cache,
8151                  struct list_head *good, struct list_head *bad,
8152                  struct list_head *rebuild, int silent)
8153 {
8154         struct cache_extent *chunk_item;
8155         struct chunk_record *chunk_rec;
8156         struct block_group_record *bg_rec;
8157         struct device_extent_record *dext_rec;
8158         int err;
8159         int ret = 0;
8160
8161         chunk_item = first_cache_extent(chunk_cache);
8162         while (chunk_item) {
8163                 chunk_rec = container_of(chunk_item, struct chunk_record,
8164                                          cache);
8165                 err = check_chunk_refs(chunk_rec, block_group_cache,
8166                                        dev_extent_cache, silent);
8167                 if (err < 0)
8168                         ret = err;
8169                 if (err == 0 && good)
8170                         list_add_tail(&chunk_rec->list, good);
8171                 if (err > 0 && rebuild)
8172                         list_add_tail(&chunk_rec->list, rebuild);
8173                 if (err < 0 && bad)
8174                         list_add_tail(&chunk_rec->list, bad);
8175                 chunk_item = next_cache_extent(chunk_item);
8176         }
8177
8178         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8179                 if (!silent)
8180                         fprintf(stderr,
8181                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8182                                 bg_rec->objectid,
8183                                 bg_rec->offset,
8184                                 bg_rec->flags);
8185                 if (!ret)
8186                         ret = 1;
8187         }
8188
8189         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8190                             chunk_list) {
8191                 if (!silent)
8192                         fprintf(stderr,
8193                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8194                                 dext_rec->objectid,
8195                                 dext_rec->offset,
8196                                 dext_rec->length);
8197                 if (!ret)
8198                         ret = 1;
8199         }
8200         return ret;
8201 }
8202
8203
8204 static int check_device_used(struct device_record *dev_rec,
8205                              struct device_extent_tree *dext_cache)
8206 {
8207         struct cache_extent *cache;
8208         struct device_extent_record *dev_extent_rec;
8209         u64 total_byte = 0;
8210
8211         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8212         while (cache) {
8213                 dev_extent_rec = container_of(cache,
8214                                               struct device_extent_record,
8215                                               cache);
8216                 if (dev_extent_rec->objectid != dev_rec->devid)
8217                         break;
8218
8219                 list_del_init(&dev_extent_rec->device_list);
8220                 total_byte += dev_extent_rec->length;
8221                 cache = next_cache_extent(cache);
8222         }
8223
8224         if (total_byte != dev_rec->byte_used) {
8225                 fprintf(stderr,
8226                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8227                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8228                         dev_rec->type, dev_rec->offset);
8229                 return -1;
8230         } else {
8231                 return 0;
8232         }
8233 }
8234
8235 /* check btrfs_dev_item -> btrfs_dev_extent */
8236 static int check_devices(struct rb_root *dev_cache,
8237                          struct device_extent_tree *dev_extent_cache)
8238 {
8239         struct rb_node *dev_node;
8240         struct device_record *dev_rec;
8241         struct device_extent_record *dext_rec;
8242         int err;
8243         int ret = 0;
8244
8245         dev_node = rb_first(dev_cache);
8246         while (dev_node) {
8247                 dev_rec = container_of(dev_node, struct device_record, node);
8248                 err = check_device_used(dev_rec, dev_extent_cache);
8249                 if (err)
8250                         ret = err;
8251
8252                 dev_node = rb_next(dev_node);
8253         }
8254         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8255                             device_list) {
8256                 fprintf(stderr,
8257                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8258                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8259                 if (!ret)
8260                         ret = 1;
8261         }
8262         return ret;
8263 }
8264
8265 static int add_root_item_to_list(struct list_head *head,
8266                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8267                                   u8 level, u8 drop_level,
8268                                   int level_size, struct btrfs_key *drop_key)
8269 {
8270
8271         struct root_item_record *ri_rec;
8272         ri_rec = malloc(sizeof(*ri_rec));
8273         if (!ri_rec)
8274                 return -ENOMEM;
8275         ri_rec->bytenr = bytenr;
8276         ri_rec->objectid = objectid;
8277         ri_rec->level = level;
8278         ri_rec->level_size = level_size;
8279         ri_rec->drop_level = drop_level;
8280         ri_rec->last_snapshot = last_snapshot;
8281         if (drop_key)
8282                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8283         list_add_tail(&ri_rec->list, head);
8284
8285         return 0;
8286 }
8287
8288 static void free_root_item_list(struct list_head *list)
8289 {
8290         struct root_item_record *ri_rec;
8291
8292         while (!list_empty(list)) {
8293                 ri_rec = list_first_entry(list, struct root_item_record,
8294                                           list);
8295                 list_del_init(&ri_rec->list);
8296                 free(ri_rec);
8297         }
8298 }
8299
8300 static int deal_root_from_list(struct list_head *list,
8301                                struct btrfs_root *root,
8302                                struct block_info *bits,
8303                                int bits_nr,
8304                                struct cache_tree *pending,
8305                                struct cache_tree *seen,
8306                                struct cache_tree *reada,
8307                                struct cache_tree *nodes,
8308                                struct cache_tree *extent_cache,
8309                                struct cache_tree *chunk_cache,
8310                                struct rb_root *dev_cache,
8311                                struct block_group_tree *block_group_cache,
8312                                struct device_extent_tree *dev_extent_cache)
8313 {
8314         int ret = 0;
8315         u64 last;
8316
8317         while (!list_empty(list)) {
8318                 struct root_item_record *rec;
8319                 struct extent_buffer *buf;
8320                 rec = list_entry(list->next,
8321                                  struct root_item_record, list);
8322                 last = 0;
8323                 buf = read_tree_block(root->fs_info->tree_root,
8324                                       rec->bytenr, rec->level_size, 0);
8325                 if (!extent_buffer_uptodate(buf)) {
8326                         free_extent_buffer(buf);
8327                         ret = -EIO;
8328                         break;
8329                 }
8330                 add_root_to_pending(buf, extent_cache, pending,
8331                                     seen, nodes, rec->objectid);
8332                 /*
8333                  * To rebuild extent tree, we need deal with snapshot
8334                  * one by one, otherwise we deal with node firstly which
8335                  * can maximize readahead.
8336                  */
8337                 while (1) {
8338                         ret = run_next_block(root, bits, bits_nr, &last,
8339                                              pending, seen, reada, nodes,
8340                                              extent_cache, chunk_cache,
8341                                              dev_cache, block_group_cache,
8342                                              dev_extent_cache, rec);
8343                         if (ret != 0)
8344                                 break;
8345                 }
8346                 free_extent_buffer(buf);
8347                 list_del(&rec->list);
8348                 free(rec);
8349                 if (ret < 0)
8350                         break;
8351         }
8352         while (ret >= 0) {
8353                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8354                                      reada, nodes, extent_cache, chunk_cache,
8355                                      dev_cache, block_group_cache,
8356                                      dev_extent_cache, NULL);
8357                 if (ret != 0) {
8358                         if (ret > 0)
8359                                 ret = 0;
8360                         break;
8361                 }
8362         }
8363         return ret;
8364 }
8365
8366 static int check_chunks_and_extents(struct btrfs_root *root)
8367 {
8368         struct rb_root dev_cache;
8369         struct cache_tree chunk_cache;
8370         struct block_group_tree block_group_cache;
8371         struct device_extent_tree dev_extent_cache;
8372         struct cache_tree extent_cache;
8373         struct cache_tree seen;
8374         struct cache_tree pending;
8375         struct cache_tree reada;
8376         struct cache_tree nodes;
8377         struct extent_io_tree excluded_extents;
8378         struct cache_tree corrupt_blocks;
8379         struct btrfs_path path;
8380         struct btrfs_key key;
8381         struct btrfs_key found_key;
8382         int ret, err = 0;
8383         struct block_info *bits;
8384         int bits_nr;
8385         struct extent_buffer *leaf;
8386         int slot;
8387         struct btrfs_root_item ri;
8388         struct list_head dropping_trees;
8389         struct list_head normal_trees;
8390         struct btrfs_root *root1;
8391         u64 objectid;
8392         u32 level_size;
8393         u8 level;
8394
8395         dev_cache = RB_ROOT;
8396         cache_tree_init(&chunk_cache);
8397         block_group_tree_init(&block_group_cache);
8398         device_extent_tree_init(&dev_extent_cache);
8399
8400         cache_tree_init(&extent_cache);
8401         cache_tree_init(&seen);
8402         cache_tree_init(&pending);
8403         cache_tree_init(&nodes);
8404         cache_tree_init(&reada);
8405         cache_tree_init(&corrupt_blocks);
8406         extent_io_tree_init(&excluded_extents);
8407         INIT_LIST_HEAD(&dropping_trees);
8408         INIT_LIST_HEAD(&normal_trees);
8409
8410         if (repair) {
8411                 root->fs_info->excluded_extents = &excluded_extents;
8412                 root->fs_info->fsck_extent_cache = &extent_cache;
8413                 root->fs_info->free_extent_hook = free_extent_hook;
8414                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8415         }
8416
8417         bits_nr = 1024;
8418         bits = malloc(bits_nr * sizeof(struct block_info));
8419         if (!bits) {
8420                 perror("malloc");
8421                 exit(1);
8422         }
8423
8424         if (ctx.progress_enabled) {
8425                 ctx.tp = TASK_EXTENTS;
8426                 task_start(ctx.info);
8427         }
8428
8429 again:
8430         root1 = root->fs_info->tree_root;
8431         level = btrfs_header_level(root1->node);
8432         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8433                                     root1->node->start, 0, level, 0,
8434                                     root1->nodesize, NULL);
8435         if (ret < 0)
8436                 goto out;
8437         root1 = root->fs_info->chunk_root;
8438         level = btrfs_header_level(root1->node);
8439         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8440                                     root1->node->start, 0, level, 0,
8441                                     root1->nodesize, NULL);
8442         if (ret < 0)
8443                 goto out;
8444         btrfs_init_path(&path);
8445         key.offset = 0;
8446         key.objectid = 0;
8447         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8448         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8449                                         &key, &path, 0, 0);
8450         if (ret < 0)
8451                 goto out;
8452         while(1) {
8453                 leaf = path.nodes[0];
8454                 slot = path.slots[0];
8455                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8456                         ret = btrfs_next_leaf(root, &path);
8457                         if (ret != 0)
8458                                 break;
8459                         leaf = path.nodes[0];
8460                         slot = path.slots[0];
8461                 }
8462                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8463                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8464                         unsigned long offset;
8465                         u64 last_snapshot;
8466
8467                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8468                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8469                         last_snapshot = btrfs_root_last_snapshot(&ri);
8470                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8471                                 level = btrfs_root_level(&ri);
8472                                 level_size = root->nodesize;
8473                                 ret = add_root_item_to_list(&normal_trees,
8474                                                 found_key.objectid,
8475                                                 btrfs_root_bytenr(&ri),
8476                                                 last_snapshot, level,
8477                                                 0, level_size, NULL);
8478                                 if (ret < 0)
8479                                         goto out;
8480                         } else {
8481                                 level = btrfs_root_level(&ri);
8482                                 level_size = root->nodesize;
8483                                 objectid = found_key.objectid;
8484                                 btrfs_disk_key_to_cpu(&found_key,
8485                                                       &ri.drop_progress);
8486                                 ret = add_root_item_to_list(&dropping_trees,
8487                                                 objectid,
8488                                                 btrfs_root_bytenr(&ri),
8489                                                 last_snapshot, level,
8490                                                 ri.drop_level,
8491                                                 level_size, &found_key);
8492                                 if (ret < 0)
8493                                         goto out;
8494                         }
8495                 }
8496                 path.slots[0]++;
8497         }
8498         btrfs_release_path(&path);
8499
8500         /*
8501          * check_block can return -EAGAIN if it fixes something, please keep
8502          * this in mind when dealing with return values from these functions, if
8503          * we get -EAGAIN we want to fall through and restart the loop.
8504          */
8505         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8506                                   &seen, &reada, &nodes, &extent_cache,
8507                                   &chunk_cache, &dev_cache, &block_group_cache,
8508                                   &dev_extent_cache);
8509         if (ret < 0) {
8510                 if (ret == -EAGAIN)
8511                         goto loop;
8512                 goto out;
8513         }
8514         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8515                                   &pending, &seen, &reada, &nodes,
8516                                   &extent_cache, &chunk_cache, &dev_cache,
8517                                   &block_group_cache, &dev_extent_cache);
8518         if (ret < 0) {
8519                 if (ret == -EAGAIN)
8520                         goto loop;
8521                 goto out;
8522         }
8523
8524         ret = check_chunks(&chunk_cache, &block_group_cache,
8525                            &dev_extent_cache, NULL, NULL, NULL, 0);
8526         if (ret) {
8527                 if (ret == -EAGAIN)
8528                         goto loop;
8529                 err = ret;
8530         }
8531
8532         ret = check_extent_refs(root, &extent_cache);
8533         if (ret < 0) {
8534                 if (ret == -EAGAIN)
8535                         goto loop;
8536                 goto out;
8537         }
8538
8539         ret = check_devices(&dev_cache, &dev_extent_cache);
8540         if (ret && err)
8541                 ret = err;
8542
8543 out:
8544         task_stop(ctx.info);
8545         if (repair) {
8546                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8547                 extent_io_tree_cleanup(&excluded_extents);
8548                 root->fs_info->fsck_extent_cache = NULL;
8549                 root->fs_info->free_extent_hook = NULL;
8550                 root->fs_info->corrupt_blocks = NULL;
8551                 root->fs_info->excluded_extents = NULL;
8552         }
8553         free(bits);
8554         free_chunk_cache_tree(&chunk_cache);
8555         free_device_cache_tree(&dev_cache);
8556         free_block_group_tree(&block_group_cache);
8557         free_device_extent_tree(&dev_extent_cache);
8558         free_extent_cache_tree(&seen);
8559         free_extent_cache_tree(&pending);
8560         free_extent_cache_tree(&reada);
8561         free_extent_cache_tree(&nodes);
8562         return ret;
8563 loop:
8564         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8565         free_extent_cache_tree(&seen);
8566         free_extent_cache_tree(&pending);
8567         free_extent_cache_tree(&reada);
8568         free_extent_cache_tree(&nodes);
8569         free_chunk_cache_tree(&chunk_cache);
8570         free_block_group_tree(&block_group_cache);
8571         free_device_cache_tree(&dev_cache);
8572         free_device_extent_tree(&dev_extent_cache);
8573         free_extent_record_cache(root->fs_info, &extent_cache);
8574         free_root_item_list(&normal_trees);
8575         free_root_item_list(&dropping_trees);
8576         extent_io_tree_cleanup(&excluded_extents);
8577         goto again;
8578 }
8579
8580 /*
8581  * Check backrefs of a tree block given by @bytenr or @eb.
8582  *
8583  * @root:       the root containing the @bytenr or @eb
8584  * @eb:         tree block extent buffer, can be NULL
8585  * @bytenr:     bytenr of the tree block to search
8586  * @level:      tree level of the tree block
8587  * @owner:      owner of the tree block
8588  *
8589  * Return >0 for any error found and output error message
8590  * Return 0 for no error found
8591  */
8592 static int check_tree_block_ref(struct btrfs_root *root,
8593                                 struct extent_buffer *eb, u64 bytenr,
8594                                 int level, u64 owner)
8595 {
8596         struct btrfs_key key;
8597         struct btrfs_root *extent_root = root->fs_info->extent_root;
8598         struct btrfs_path path;
8599         struct btrfs_extent_item *ei;
8600         struct btrfs_extent_inline_ref *iref;
8601         struct extent_buffer *leaf;
8602         unsigned long end;
8603         unsigned long ptr;
8604         int slot;
8605         int skinny_level;
8606         int type;
8607         u32 nodesize = root->nodesize;
8608         u32 item_size;
8609         u64 offset;
8610         int found_ref = 0;
8611         int err = 0;
8612         int ret;
8613
8614         btrfs_init_path(&path);
8615         key.objectid = bytenr;
8616         if (btrfs_fs_incompat(root->fs_info,
8617                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8618                 key.type = BTRFS_METADATA_ITEM_KEY;
8619         else
8620                 key.type = BTRFS_EXTENT_ITEM_KEY;
8621         key.offset = (u64)-1;
8622
8623         /* Search for the backref in extent tree */
8624         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8625         if (ret < 0) {
8626                 err |= BACKREF_MISSING;
8627                 goto out;
8628         }
8629         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8630         if (ret) {
8631                 err |= BACKREF_MISSING;
8632                 goto out;
8633         }
8634
8635         leaf = path.nodes[0];
8636         slot = path.slots[0];
8637         btrfs_item_key_to_cpu(leaf, &key, slot);
8638
8639         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8640
8641         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8642                 skinny_level = (int)key.offset;
8643                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8644         } else {
8645                 struct btrfs_tree_block_info *info;
8646
8647                 info = (struct btrfs_tree_block_info *)(ei + 1);
8648                 skinny_level = btrfs_tree_block_level(leaf, info);
8649                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8650         }
8651
8652         if (eb) {
8653                 u64 header_gen;
8654                 u64 extent_gen;
8655
8656                 if (!(btrfs_extent_flags(leaf, ei) &
8657                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8658                         error(
8659                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8660                                 key.objectid, nodesize,
8661                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8662                         err = BACKREF_MISMATCH;
8663                 }
8664                 header_gen = btrfs_header_generation(eb);
8665                 extent_gen = btrfs_extent_generation(leaf, ei);
8666                 if (header_gen != extent_gen) {
8667                         error(
8668         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8669                                 key.objectid, nodesize, header_gen,
8670                                 extent_gen);
8671                         err = BACKREF_MISMATCH;
8672                 }
8673                 if (level != skinny_level) {
8674                         error(
8675                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8676                                 key.objectid, nodesize, level, skinny_level);
8677                         err = BACKREF_MISMATCH;
8678                 }
8679                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8680                         error(
8681                         "extent[%llu %u] is referred by other roots than %llu",
8682                                 key.objectid, nodesize, root->objectid);
8683                         err = BACKREF_MISMATCH;
8684                 }
8685         }
8686
8687         /*
8688          * Iterate the extent/metadata item to find the exact backref
8689          */
8690         item_size = btrfs_item_size_nr(leaf, slot);
8691         ptr = (unsigned long)iref;
8692         end = (unsigned long)ei + item_size;
8693         while (ptr < end) {
8694                 iref = (struct btrfs_extent_inline_ref *)ptr;
8695                 type = btrfs_extent_inline_ref_type(leaf, iref);
8696                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8697
8698                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8699                         (offset == root->objectid || offset == owner)) {
8700                         found_ref = 1;
8701                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8702                         /* Check if the backref points to valid referencer */
8703                         found_ref = !check_tree_block_ref(root, NULL, offset,
8704                                                           level + 1, owner);
8705                 }
8706
8707                 if (found_ref)
8708                         break;
8709                 ptr += btrfs_extent_inline_ref_size(type);
8710         }
8711
8712         /*
8713          * Inlined extent item doesn't have what we need, check
8714          * TREE_BLOCK_REF_KEY
8715          */
8716         if (!found_ref) {
8717                 btrfs_release_path(&path);
8718                 key.objectid = bytenr;
8719                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8720                 key.offset = root->objectid;
8721
8722                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8723                 if (!ret)
8724                         found_ref = 1;
8725         }
8726         if (!found_ref)
8727                 err |= BACKREF_MISSING;
8728 out:
8729         btrfs_release_path(&path);
8730         if (eb && (err & BACKREF_MISSING))
8731                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8732                         bytenr, nodesize, owner, level);
8733         return err;
8734 }
8735
8736 /*
8737  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8738  *
8739  * Return >0 any error found and output error message
8740  * Return 0 for no error found
8741  */
8742 static int check_extent_data_item(struct btrfs_root *root,
8743                                   struct extent_buffer *eb, int slot)
8744 {
8745         struct btrfs_file_extent_item *fi;
8746         struct btrfs_path path;
8747         struct btrfs_root *extent_root = root->fs_info->extent_root;
8748         struct btrfs_key fi_key;
8749         struct btrfs_key dbref_key;
8750         struct extent_buffer *leaf;
8751         struct btrfs_extent_item *ei;
8752         struct btrfs_extent_inline_ref *iref;
8753         struct btrfs_extent_data_ref *dref;
8754         u64 owner;
8755         u64 file_extent_gen;
8756         u64 disk_bytenr;
8757         u64 disk_num_bytes;
8758         u64 extent_num_bytes;
8759         u64 extent_flags;
8760         u64 extent_gen;
8761         u32 item_size;
8762         unsigned long end;
8763         unsigned long ptr;
8764         int type;
8765         u64 ref_root;
8766         int found_dbackref = 0;
8767         int err = 0;
8768         int ret;
8769
8770         btrfs_item_key_to_cpu(eb, &fi_key, slot);
8771         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8772         file_extent_gen = btrfs_file_extent_generation(eb, fi);
8773
8774         /* Nothing to check for hole and inline data extents */
8775         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8776             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8777                 return 0;
8778
8779         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8780         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8781         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8782
8783         /* Check unaligned disk_num_bytes and num_bytes */
8784         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8785                 error(
8786 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8787                         fi_key.objectid, fi_key.offset, disk_num_bytes,
8788                         root->sectorsize);
8789                 err |= BYTES_UNALIGNED;
8790         } else {
8791                 data_bytes_allocated += disk_num_bytes;
8792         }
8793         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8794                 error(
8795 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8796                         fi_key.objectid, fi_key.offset, extent_num_bytes,
8797                         root->sectorsize);
8798                 err |= BYTES_UNALIGNED;
8799         } else {
8800                 data_bytes_referenced += extent_num_bytes;
8801         }
8802         owner = btrfs_header_owner(eb);
8803
8804         /* Check the extent item of the file extent in extent tree */
8805         btrfs_init_path(&path);
8806         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8807         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8808         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8809
8810         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8811         if (ret) {
8812                 err |= BACKREF_MISSING;
8813                 goto error;
8814         }
8815
8816         leaf = path.nodes[0];
8817         slot = path.slots[0];
8818         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8819
8820         extent_flags = btrfs_extent_flags(leaf, ei);
8821         extent_gen = btrfs_extent_generation(leaf, ei);
8822
8823         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8824                 error(
8825                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8826                     disk_bytenr, disk_num_bytes,
8827                     BTRFS_EXTENT_FLAG_DATA);
8828                 err |= BACKREF_MISMATCH;
8829         }
8830
8831         if (file_extent_gen < extent_gen) {
8832                 error(
8833 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8834                         disk_bytenr, disk_num_bytes, file_extent_gen,
8835                         extent_gen);
8836                 err |= BACKREF_MISMATCH;
8837         }
8838
8839         /* Check data backref inside that extent item */
8840         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8841         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8842         ptr = (unsigned long)iref;
8843         end = (unsigned long)ei + item_size;
8844         while (ptr < end) {
8845                 iref = (struct btrfs_extent_inline_ref *)ptr;
8846                 type = btrfs_extent_inline_ref_type(leaf, iref);
8847                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8848
8849                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8850                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
8851                         if (ref_root == owner || ref_root == root->objectid)
8852                                 found_dbackref = 1;
8853                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8854                         found_dbackref = !check_tree_block_ref(root, NULL,
8855                                 btrfs_extent_inline_ref_offset(leaf, iref),
8856                                 0, owner);
8857                 }
8858
8859                 if (found_dbackref)
8860                         break;
8861                 ptr += btrfs_extent_inline_ref_size(type);
8862         }
8863
8864         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8865         if (!found_dbackref) {
8866                 btrfs_release_path(&path);
8867
8868                 btrfs_init_path(&path);
8869                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8870                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8871                 dbref_key.offset = hash_extent_data_ref(root->objectid,
8872                                 fi_key.objectid, fi_key.offset);
8873
8874                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8875                                         &dbref_key, &path, 0, 0);
8876                 if (!ret)
8877                         found_dbackref = 1;
8878         }
8879
8880         if (!found_dbackref)
8881                 err |= BACKREF_MISSING;
8882 error:
8883         btrfs_release_path(&path);
8884         if (err & BACKREF_MISSING) {
8885                 error("data extent[%llu %llu] backref lost",
8886                       disk_bytenr, disk_num_bytes);
8887         }
8888         return err;
8889 }
8890
8891 /*
8892  * Get real tree block level for the case like shared block
8893  * Return >= 0 as tree level
8894  * Return <0 for error
8895  */
8896 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8897 {
8898         struct extent_buffer *eb;
8899         struct btrfs_path path;
8900         struct btrfs_key key;
8901         struct btrfs_extent_item *ei;
8902         u64 flags;
8903         u64 transid;
8904         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8905         u8 backref_level;
8906         u8 header_level;
8907         int ret;
8908
8909         /* Search extent tree for extent generation and level */
8910         key.objectid = bytenr;
8911         key.type = BTRFS_METADATA_ITEM_KEY;
8912         key.offset = (u64)-1;
8913
8914         btrfs_init_path(&path);
8915         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8916         if (ret < 0)
8917                 goto release_out;
8918         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8919         if (ret < 0)
8920                 goto release_out;
8921         if (ret > 0) {
8922                 ret = -ENOENT;
8923                 goto release_out;
8924         }
8925
8926         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8927         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8928                             struct btrfs_extent_item);
8929         flags = btrfs_extent_flags(path.nodes[0], ei);
8930         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8931                 ret = -ENOENT;
8932                 goto release_out;
8933         }
8934
8935         /* Get transid for later read_tree_block() check */
8936         transid = btrfs_extent_generation(path.nodes[0], ei);
8937
8938         /* Get backref level as one source */
8939         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8940                 backref_level = key.offset;
8941         } else {
8942                 struct btrfs_tree_block_info *info;
8943
8944                 info = (struct btrfs_tree_block_info *)(ei + 1);
8945                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
8946         }
8947         btrfs_release_path(&path);
8948
8949         /* Get level from tree block as an alternative source */
8950         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
8951         if (!extent_buffer_uptodate(eb)) {
8952                 free_extent_buffer(eb);
8953                 return -EIO;
8954         }
8955         header_level = btrfs_header_level(eb);
8956         free_extent_buffer(eb);
8957
8958         if (header_level != backref_level)
8959                 return -EIO;
8960         return header_level;
8961
8962 release_out:
8963         btrfs_release_path(&path);
8964         return ret;
8965 }
8966
8967 /*
8968  * Check if a tree block backref is valid (points to a valid tree block)
8969  * if level == -1, level will be resolved
8970  * Return >0 for any error found and print error message
8971  */
8972 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
8973                                     u64 bytenr, int level)
8974 {
8975         struct btrfs_root *root;
8976         struct btrfs_key key;
8977         struct btrfs_path path;
8978         struct extent_buffer *eb;
8979         struct extent_buffer *node;
8980         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8981         int err = 0;
8982         int ret;
8983
8984         /* Query level for level == -1 special case */
8985         if (level == -1)
8986                 level = query_tree_block_level(fs_info, bytenr);
8987         if (level < 0) {
8988                 err |= REFERENCER_MISSING;
8989                 goto out;
8990         }
8991
8992         key.objectid = root_id;
8993         key.type = BTRFS_ROOT_ITEM_KEY;
8994         key.offset = (u64)-1;
8995
8996         root = btrfs_read_fs_root(fs_info, &key);
8997         if (IS_ERR(root)) {
8998                 err |= REFERENCER_MISSING;
8999                 goto out;
9000         }
9001
9002         /* Read out the tree block to get item/node key */
9003         eb = read_tree_block(root, bytenr, root->nodesize, 0);
9004         if (!extent_buffer_uptodate(eb)) {
9005                 err |= REFERENCER_MISSING;
9006                 free_extent_buffer(eb);
9007                 goto out;
9008         }
9009
9010         /* Empty tree, no need to check key */
9011         if (!btrfs_header_nritems(eb) && !level) {
9012                 free_extent_buffer(eb);
9013                 goto out;
9014         }
9015
9016         if (level)
9017                 btrfs_node_key_to_cpu(eb, &key, 0);
9018         else
9019                 btrfs_item_key_to_cpu(eb, &key, 0);
9020
9021         free_extent_buffer(eb);
9022
9023         btrfs_init_path(&path);
9024         /* Search with the first key, to ensure we can reach it */
9025         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9026         if (ret) {
9027                 err |= REFERENCER_MISSING;
9028                 goto release_out;
9029         }
9030
9031         node = path.nodes[level];
9032         if (btrfs_header_bytenr(node) != bytenr) {
9033                 error(
9034         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9035                         bytenr, nodesize, bytenr,
9036                         btrfs_header_bytenr(node));
9037                 err |= REFERENCER_MISMATCH;
9038         }
9039         if (btrfs_header_level(node) != level) {
9040                 error(
9041         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9042                         bytenr, nodesize, level,
9043                         btrfs_header_level(node));
9044                 err |= REFERENCER_MISMATCH;
9045         }
9046
9047 release_out:
9048         btrfs_release_path(&path);
9049 out:
9050         if (err & REFERENCER_MISSING) {
9051                 if (level < 0)
9052                         error("extent [%llu %d] lost referencer (owner: %llu)",
9053                                 bytenr, nodesize, root_id);
9054                 else
9055                         error(
9056                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9057                                 bytenr, nodesize, root_id, level);
9058         }
9059
9060         return err;
9061 }
9062
9063 /*
9064  * Check referencer for shared block backref
9065  * If level == -1, this function will resolve the level.
9066  */
9067 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9068                                      u64 parent, u64 bytenr, int level)
9069 {
9070         struct extent_buffer *eb;
9071         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9072         u32 nr;
9073         int found_parent = 0;
9074         int i;
9075
9076         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9077         if (!extent_buffer_uptodate(eb))
9078                 goto out;
9079
9080         if (level == -1)
9081                 level = query_tree_block_level(fs_info, bytenr);
9082         if (level < 0)
9083                 goto out;
9084
9085         if (level + 1 != btrfs_header_level(eb))
9086                 goto out;
9087
9088         nr = btrfs_header_nritems(eb);
9089         for (i = 0; i < nr; i++) {
9090                 if (bytenr == btrfs_node_blockptr(eb, i)) {
9091                         found_parent = 1;
9092                         break;
9093                 }
9094         }
9095 out:
9096         free_extent_buffer(eb);
9097         if (!found_parent) {
9098                 error(
9099         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9100                         bytenr, nodesize, parent, level);
9101                 return REFERENCER_MISSING;
9102         }
9103         return 0;
9104 }
9105
9106 /*
9107  * Check referencer for normal (inlined) data ref
9108  * If len == 0, it will be resolved by searching in extent tree
9109  */
9110 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9111                                      u64 root_id, u64 objectid, u64 offset,
9112                                      u64 bytenr, u64 len, u32 count)
9113 {
9114         struct btrfs_root *root;
9115         struct btrfs_root *extent_root = fs_info->extent_root;
9116         struct btrfs_key key;
9117         struct btrfs_path path;
9118         struct extent_buffer *leaf;
9119         struct btrfs_file_extent_item *fi;
9120         u32 found_count = 0;
9121         int slot;
9122         int ret = 0;
9123
9124         if (!len) {
9125                 key.objectid = bytenr;
9126                 key.type = BTRFS_EXTENT_ITEM_KEY;
9127                 key.offset = (u64)-1;
9128
9129                 btrfs_init_path(&path);
9130                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9131                 if (ret < 0)
9132                         goto out;
9133                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9134                 if (ret)
9135                         goto out;
9136                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9137                 if (key.objectid != bytenr ||
9138                     key.type != BTRFS_EXTENT_ITEM_KEY)
9139                         goto out;
9140                 len = key.offset;
9141                 btrfs_release_path(&path);
9142         }
9143         key.objectid = root_id;
9144         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
9145         key.offset = (u64)-1;
9146         btrfs_init_path(&path);
9147
9148         root = btrfs_read_fs_root(fs_info, &key);
9149         if (IS_ERR(root))
9150                 goto out;
9151
9152         key.objectid = objectid;
9153         key.type = BTRFS_EXTENT_DATA_KEY;
9154         /*
9155          * It can be nasty as data backref offset is
9156          * file offset - file extent offset, which is smaller or
9157          * equal to original backref offset.  The only special case is
9158          * overflow.  So we need to special check and do further search.
9159          */
9160         key.offset = offset & (1ULL << 63) ? 0 : offset;
9161
9162         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9163         if (ret < 0)
9164                 goto out;
9165
9166         /*
9167          * Search afterwards to get correct one
9168          * NOTE: As we must do a comprehensive check on the data backref to
9169          * make sure the dref count also matches, we must iterate all file
9170          * extents for that inode.
9171          */
9172         while (1) {
9173                 leaf = path.nodes[0];
9174                 slot = path.slots[0];
9175
9176                 btrfs_item_key_to_cpu(leaf, &key, slot);
9177                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9178                         break;
9179                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9180                 /*
9181                  * Except normal disk bytenr and disk num bytes, we still
9182                  * need to do extra check on dbackref offset as
9183                  * dbackref offset = file_offset - file_extent_offset
9184                  */
9185                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9186                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9187                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9188                     offset)
9189                         found_count++;
9190
9191                 ret = btrfs_next_item(root, &path);
9192                 if (ret)
9193                         break;
9194         }
9195 out:
9196         btrfs_release_path(&path);
9197         if (found_count != count) {
9198                 error(
9199 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9200                         bytenr, len, root_id, objectid, offset, count, found_count);
9201                 return REFERENCER_MISSING;
9202         }
9203         return 0;
9204 }
9205
9206 /*
9207  * Check if the referencer of a shared data backref exists
9208  */
9209 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9210                                      u64 parent, u64 bytenr)
9211 {
9212         struct extent_buffer *eb;
9213         struct btrfs_key key;
9214         struct btrfs_file_extent_item *fi;
9215         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9216         u32 nr;
9217         int found_parent = 0;
9218         int i;
9219
9220         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9221         if (!extent_buffer_uptodate(eb))
9222                 goto out;
9223
9224         nr = btrfs_header_nritems(eb);
9225         for (i = 0; i < nr; i++) {
9226                 btrfs_item_key_to_cpu(eb, &key, i);
9227                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9228                         continue;
9229
9230                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9231                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9232                         continue;
9233
9234                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9235                         found_parent = 1;
9236                         break;
9237                 }
9238         }
9239
9240 out:
9241         free_extent_buffer(eb);
9242         if (!found_parent) {
9243                 error("shared extent %llu referencer lost (parent: %llu)",
9244                         bytenr, parent);
9245                 return REFERENCER_MISSING;
9246         }
9247         return 0;
9248 }
9249
9250 /*
9251  * This function will check a given extent item, including its backref and
9252  * itself (like crossing stripe boundary and type)
9253  *
9254  * Since we don't use extent_record anymore, introduce new error bit
9255  */
9256 static int check_extent_item(struct btrfs_fs_info *fs_info,
9257                              struct extent_buffer *eb, int slot)
9258 {
9259         struct btrfs_extent_item *ei;
9260         struct btrfs_extent_inline_ref *iref;
9261         struct btrfs_extent_data_ref *dref;
9262         unsigned long end;
9263         unsigned long ptr;
9264         int type;
9265         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9266         u32 item_size = btrfs_item_size_nr(eb, slot);
9267         u64 flags;
9268         u64 offset;
9269         int metadata = 0;
9270         int level;
9271         struct btrfs_key key;
9272         int ret;
9273         int err = 0;
9274
9275         btrfs_item_key_to_cpu(eb, &key, slot);
9276         if (key.type == BTRFS_EXTENT_ITEM_KEY)
9277                 bytes_used += key.offset;
9278         else
9279                 bytes_used += nodesize;
9280
9281         if (item_size < sizeof(*ei)) {
9282                 /*
9283                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9284                  * old thing when on disk format is still un-determined.
9285                  * No need to care about it anymore
9286                  */
9287                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9288                 return -ENOTTY;
9289         }
9290
9291         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9292         flags = btrfs_extent_flags(eb, ei);
9293
9294         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9295                 metadata = 1;
9296         if (metadata && check_crossing_stripes(key.objectid, eb->len)) {
9297                 error("bad metadata [%llu, %llu) crossing stripe boundary",
9298                       key.objectid, key.objectid + nodesize);
9299                 err |= CROSSING_STRIPE_BOUNDARY;
9300         }
9301
9302         ptr = (unsigned long)(ei + 1);
9303
9304         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9305                 /* Old EXTENT_ITEM metadata */
9306                 struct btrfs_tree_block_info *info;
9307
9308                 info = (struct btrfs_tree_block_info *)ptr;
9309                 level = btrfs_tree_block_level(eb, info);
9310                 ptr += sizeof(struct btrfs_tree_block_info);
9311         } else {
9312                 /* New METADATA_ITEM */
9313                 level = key.offset;
9314         }
9315         end = (unsigned long)ei + item_size;
9316
9317         if (ptr >= end) {
9318                 err |= ITEM_SIZE_MISMATCH;
9319                 goto out;
9320         }
9321
9322         /* Now check every backref in this extent item */
9323 next:
9324         iref = (struct btrfs_extent_inline_ref *)ptr;
9325         type = btrfs_extent_inline_ref_type(eb, iref);
9326         offset = btrfs_extent_inline_ref_offset(eb, iref);
9327         switch (type) {
9328         case BTRFS_TREE_BLOCK_REF_KEY:
9329                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9330                                                level);
9331                 err |= ret;
9332                 break;
9333         case BTRFS_SHARED_BLOCK_REF_KEY:
9334                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9335                                                  level);
9336                 err |= ret;
9337                 break;
9338         case BTRFS_EXTENT_DATA_REF_KEY:
9339                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9340                 ret = check_extent_data_backref(fs_info,
9341                                 btrfs_extent_data_ref_root(eb, dref),
9342                                 btrfs_extent_data_ref_objectid(eb, dref),
9343                                 btrfs_extent_data_ref_offset(eb, dref),
9344                                 key.objectid, key.offset,
9345                                 btrfs_extent_data_ref_count(eb, dref));
9346                 err |= ret;
9347                 break;
9348         case BTRFS_SHARED_DATA_REF_KEY:
9349                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9350                 err |= ret;
9351                 break;
9352         default:
9353                 error("extent[%llu %d %llu] has unknown ref type: %d",
9354                         key.objectid, key.type, key.offset, type);
9355                 err |= UNKNOWN_TYPE;
9356                 goto out;
9357         }
9358
9359         ptr += btrfs_extent_inline_ref_size(type);
9360         if (ptr < end)
9361                 goto next;
9362
9363 out:
9364         return err;
9365 }
9366
9367 /*
9368  * Check if a dev extent item is referred correctly by its chunk
9369  */
9370 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9371                                  struct extent_buffer *eb, int slot)
9372 {
9373         struct btrfs_root *chunk_root = fs_info->chunk_root;
9374         struct btrfs_dev_extent *ptr;
9375         struct btrfs_path path;
9376         struct btrfs_key chunk_key;
9377         struct btrfs_key devext_key;
9378         struct btrfs_chunk *chunk;
9379         struct extent_buffer *l;
9380         int num_stripes;
9381         u64 length;
9382         int i;
9383         int found_chunk = 0;
9384         int ret;
9385
9386         btrfs_item_key_to_cpu(eb, &devext_key, slot);
9387         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9388         length = btrfs_dev_extent_length(eb, ptr);
9389
9390         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9391         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9392         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9393
9394         btrfs_init_path(&path);
9395         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9396         if (ret)
9397                 goto out;
9398
9399         l = path.nodes[0];
9400         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9401         if (btrfs_chunk_length(l, chunk) != length)
9402                 goto out;
9403
9404         num_stripes = btrfs_chunk_num_stripes(l, chunk);
9405         for (i = 0; i < num_stripes; i++) {
9406                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9407                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9408
9409                 if (devid == devext_key.objectid &&
9410                     offset == devext_key.offset) {
9411                         found_chunk = 1;
9412                         break;
9413                 }
9414         }
9415 out:
9416         btrfs_release_path(&path);
9417         if (!found_chunk) {
9418                 error(
9419                 "device extent[%llu, %llu, %llu] did not find the related chunk",
9420                         devext_key.objectid, devext_key.offset, length);
9421                 return REFERENCER_MISSING;
9422         }
9423         return 0;
9424 }
9425
9426 /*
9427  * Check if the used space is correct with the dev item
9428  */
9429 static int check_dev_item(struct btrfs_fs_info *fs_info,
9430                           struct extent_buffer *eb, int slot)
9431 {
9432         struct btrfs_root *dev_root = fs_info->dev_root;
9433         struct btrfs_dev_item *dev_item;
9434         struct btrfs_path path;
9435         struct btrfs_key key;
9436         struct btrfs_dev_extent *ptr;
9437         u64 dev_id;
9438         u64 used;
9439         u64 total = 0;
9440         int ret;
9441
9442         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9443         dev_id = btrfs_device_id(eb, dev_item);
9444         used = btrfs_device_bytes_used(eb, dev_item);
9445
9446         key.objectid = dev_id;
9447         key.type = BTRFS_DEV_EXTENT_KEY;
9448         key.offset = 0;
9449
9450         btrfs_init_path(&path);
9451         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9452         if (ret < 0) {
9453                 btrfs_item_key_to_cpu(eb, &key, slot);
9454                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9455                         key.objectid, key.type, key.offset);
9456                 btrfs_release_path(&path);
9457                 return REFERENCER_MISSING;
9458         }
9459
9460         /* Iterate dev_extents to calculate the used space of a device */
9461         while (1) {
9462                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9463
9464                 if (key.objectid > dev_id)
9465                         break;
9466                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9467                         goto next;
9468
9469                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9470                                      struct btrfs_dev_extent);
9471                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9472 next:
9473                 ret = btrfs_next_item(dev_root, &path);
9474                 if (ret)
9475                         break;
9476         }
9477         btrfs_release_path(&path);
9478
9479         if (used != total) {
9480                 btrfs_item_key_to_cpu(eb, &key, slot);
9481                 error(
9482 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9483                         total, used, BTRFS_ROOT_TREE_OBJECTID,
9484                         BTRFS_DEV_EXTENT_KEY, dev_id);
9485                 return ACCOUNTING_MISMATCH;
9486         }
9487         return 0;
9488 }
9489
9490 /*
9491  * Check a block group item with its referener (chunk) and its used space
9492  * with extent/metadata item
9493  */
9494 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9495                                   struct extent_buffer *eb, int slot)
9496 {
9497         struct btrfs_root *extent_root = fs_info->extent_root;
9498         struct btrfs_root *chunk_root = fs_info->chunk_root;
9499         struct btrfs_block_group_item *bi;
9500         struct btrfs_block_group_item bg_item;
9501         struct btrfs_path path;
9502         struct btrfs_key bg_key;
9503         struct btrfs_key chunk_key;
9504         struct btrfs_key extent_key;
9505         struct btrfs_chunk *chunk;
9506         struct extent_buffer *leaf;
9507         struct btrfs_extent_item *ei;
9508         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9509         u64 flags;
9510         u64 bg_flags;
9511         u64 used;
9512         u64 total = 0;
9513         int ret;
9514         int err = 0;
9515
9516         btrfs_item_key_to_cpu(eb, &bg_key, slot);
9517         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9518         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9519         used = btrfs_block_group_used(&bg_item);
9520         bg_flags = btrfs_block_group_flags(&bg_item);
9521
9522         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9523         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9524         chunk_key.offset = bg_key.objectid;
9525
9526         btrfs_init_path(&path);
9527         /* Search for the referencer chunk */
9528         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9529         if (ret) {
9530                 error(
9531                 "block group[%llu %llu] did not find the related chunk item",
9532                         bg_key.objectid, bg_key.offset);
9533                 err |= REFERENCER_MISSING;
9534         } else {
9535                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9536                                         struct btrfs_chunk);
9537                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9538                                                 bg_key.offset) {
9539                         error(
9540         "block group[%llu %llu] related chunk item length does not match",
9541                                 bg_key.objectid, bg_key.offset);
9542                         err |= REFERENCER_MISMATCH;
9543                 }
9544         }
9545         btrfs_release_path(&path);
9546
9547         /* Search from the block group bytenr */
9548         extent_key.objectid = bg_key.objectid;
9549         extent_key.type = 0;
9550         extent_key.offset = 0;
9551
9552         btrfs_init_path(&path);
9553         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9554         if (ret < 0)
9555                 goto out;
9556
9557         /* Iterate extent tree to account used space */
9558         while (1) {
9559                 leaf = path.nodes[0];
9560                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9561                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9562                         break;
9563
9564                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9565                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9566                         goto next;
9567                 if (extent_key.objectid < bg_key.objectid)
9568                         goto next;
9569
9570                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9571                         total += nodesize;
9572                 else
9573                         total += extent_key.offset;
9574
9575                 ei = btrfs_item_ptr(leaf, path.slots[0],
9576                                     struct btrfs_extent_item);
9577                 flags = btrfs_extent_flags(leaf, ei);
9578                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9579                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9580                                 error(
9581                         "bad extent[%llu, %llu) type mismatch with chunk",
9582                                         extent_key.objectid,
9583                                         extent_key.objectid + extent_key.offset);
9584                                 err |= CHUNK_TYPE_MISMATCH;
9585                         }
9586                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9587                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9588                                     BTRFS_BLOCK_GROUP_METADATA))) {
9589                                 error(
9590                         "bad extent[%llu, %llu) type mismatch with chunk",
9591                                         extent_key.objectid,
9592                                         extent_key.objectid + nodesize);
9593                                 err |= CHUNK_TYPE_MISMATCH;
9594                         }
9595                 }
9596 next:
9597                 ret = btrfs_next_item(extent_root, &path);
9598                 if (ret)
9599                         break;
9600         }
9601
9602 out:
9603         btrfs_release_path(&path);
9604
9605         if (total != used) {
9606                 error(
9607                 "block group[%llu %llu] used %llu but extent items used %llu",
9608                         bg_key.objectid, bg_key.offset, used, total);
9609                 err |= ACCOUNTING_MISMATCH;
9610         }
9611         return err;
9612 }
9613
9614 /*
9615  * Check a chunk item.
9616  * Including checking all referred dev_extents and block group
9617  */
9618 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9619                             struct extent_buffer *eb, int slot)
9620 {
9621         struct btrfs_root *extent_root = fs_info->extent_root;
9622         struct btrfs_root *dev_root = fs_info->dev_root;
9623         struct btrfs_path path;
9624         struct btrfs_key chunk_key;
9625         struct btrfs_key bg_key;
9626         struct btrfs_key devext_key;
9627         struct btrfs_chunk *chunk;
9628         struct extent_buffer *leaf;
9629         struct btrfs_block_group_item *bi;
9630         struct btrfs_block_group_item bg_item;
9631         struct btrfs_dev_extent *ptr;
9632         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9633         u64 length;
9634         u64 chunk_end;
9635         u64 type;
9636         u64 profile;
9637         int num_stripes;
9638         u64 offset;
9639         u64 objectid;
9640         int i;
9641         int ret;
9642         int err = 0;
9643
9644         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9645         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9646         length = btrfs_chunk_length(eb, chunk);
9647         chunk_end = chunk_key.offset + length;
9648         if (!IS_ALIGNED(length, sectorsize)) {
9649                 error("chunk[%llu %llu) not aligned to %u",
9650                         chunk_key.offset, chunk_end, sectorsize);
9651                 err |= BYTES_UNALIGNED;
9652                 goto out;
9653         }
9654
9655         type = btrfs_chunk_type(eb, chunk);
9656         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9657         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9658                 error("chunk[%llu %llu) has no chunk type",
9659                         chunk_key.offset, chunk_end);
9660                 err |= UNKNOWN_TYPE;
9661         }
9662         if (profile && (profile & (profile - 1))) {
9663                 error("chunk[%llu %llu) multiple profiles detected: %llx",
9664                         chunk_key.offset, chunk_end, profile);
9665                 err |= UNKNOWN_TYPE;
9666         }
9667
9668         bg_key.objectid = chunk_key.offset;
9669         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9670         bg_key.offset = length;
9671
9672         btrfs_init_path(&path);
9673         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9674         if (ret) {
9675                 error(
9676                 "chunk[%llu %llu) did not find the related block group item",
9677                         chunk_key.offset, chunk_end);
9678                 err |= REFERENCER_MISSING;
9679         } else{
9680                 leaf = path.nodes[0];
9681                 bi = btrfs_item_ptr(leaf, path.slots[0],
9682                                     struct btrfs_block_group_item);
9683                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9684                                    sizeof(bg_item));
9685                 if (btrfs_block_group_flags(&bg_item) != type) {
9686                         error(
9687 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9688                                 chunk_key.offset, chunk_end, type,
9689                                 btrfs_block_group_flags(&bg_item));
9690                         err |= REFERENCER_MISSING;
9691                 }
9692         }
9693
9694         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9695         for (i = 0; i < num_stripes; i++) {
9696                 btrfs_release_path(&path);
9697                 btrfs_init_path(&path);
9698                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9699                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9700                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9701
9702                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9703                                         0, 0);
9704                 if (ret)
9705                         goto not_match_dev;
9706
9707                 leaf = path.nodes[0];
9708                 ptr = btrfs_item_ptr(leaf, path.slots[0],
9709                                      struct btrfs_dev_extent);
9710                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9711                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9712                 if (objectid != chunk_key.objectid ||
9713                     offset != chunk_key.offset ||
9714                     btrfs_dev_extent_length(leaf, ptr) != length)
9715                         goto not_match_dev;
9716                 continue;
9717 not_match_dev:
9718                 err |= BACKREF_MISSING;
9719                 error(
9720                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9721                         chunk_key.objectid, chunk_end, i);
9722                 continue;
9723         }
9724         btrfs_release_path(&path);
9725 out:
9726         return err;
9727 }
9728
9729 /*
9730  * Main entry function to check known items and update related accounting info
9731  */
9732 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9733 {
9734         struct btrfs_fs_info *fs_info = root->fs_info;
9735         struct btrfs_key key;
9736         int slot = 0;
9737         int type;
9738         struct btrfs_extent_data_ref *dref;
9739         int ret;
9740         int err = 0;
9741
9742 next:
9743         btrfs_item_key_to_cpu(eb, &key, slot);
9744         type = btrfs_key_type(&key);
9745
9746         switch (type) {
9747         case BTRFS_EXTENT_DATA_KEY:
9748                 ret = check_extent_data_item(root, eb, slot);
9749                 err |= ret;
9750                 break;
9751         case BTRFS_BLOCK_GROUP_ITEM_KEY:
9752                 ret = check_block_group_item(fs_info, eb, slot);
9753                 err |= ret;
9754                 break;
9755         case BTRFS_DEV_ITEM_KEY:
9756                 ret = check_dev_item(fs_info, eb, slot);
9757                 err |= ret;
9758                 break;
9759         case BTRFS_CHUNK_ITEM_KEY:
9760                 ret = check_chunk_item(fs_info, eb, slot);
9761                 err |= ret;
9762                 break;
9763         case BTRFS_DEV_EXTENT_KEY:
9764                 ret = check_dev_extent_item(fs_info, eb, slot);
9765                 err |= ret;
9766                 break;
9767         case BTRFS_EXTENT_ITEM_KEY:
9768         case BTRFS_METADATA_ITEM_KEY:
9769                 ret = check_extent_item(fs_info, eb, slot);
9770                 err |= ret;
9771                 break;
9772         case BTRFS_EXTENT_CSUM_KEY:
9773                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9774                 break;
9775         case BTRFS_TREE_BLOCK_REF_KEY:
9776                 ret = check_tree_block_backref(fs_info, key.offset,
9777                                                key.objectid, -1);
9778                 err |= ret;
9779                 break;
9780         case BTRFS_EXTENT_DATA_REF_KEY:
9781                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9782                 ret = check_extent_data_backref(fs_info,
9783                                 btrfs_extent_data_ref_root(eb, dref),
9784                                 btrfs_extent_data_ref_objectid(eb, dref),
9785                                 btrfs_extent_data_ref_offset(eb, dref),
9786                                 key.objectid, 0,
9787                                 btrfs_extent_data_ref_count(eb, dref));
9788                 err |= ret;
9789                 break;
9790         case BTRFS_SHARED_BLOCK_REF_KEY:
9791                 ret = check_shared_block_backref(fs_info, key.offset,
9792                                                  key.objectid, -1);
9793                 err |= ret;
9794                 break;
9795         case BTRFS_SHARED_DATA_REF_KEY:
9796                 ret = check_shared_data_backref(fs_info, key.offset,
9797                                                 key.objectid);
9798                 err |= ret;
9799                 break;
9800         default:
9801                 break;
9802         }
9803
9804         if (++slot < btrfs_header_nritems(eb))
9805                 goto next;
9806
9807         return err;
9808 }
9809
9810 /*
9811  * Helper function for later fs/subvol tree check.  To determine if a tree
9812  * block should be checked.
9813  * This function will ensure only the direct referencer with lowest rootid to
9814  * check a fs/subvolume tree block.
9815  *
9816  * Backref check at extent tree would detect errors like missing subvolume
9817  * tree, so we can do aggressive check to reduce duplicated checks.
9818  */
9819 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9820 {
9821         struct btrfs_root *extent_root = root->fs_info->extent_root;
9822         struct btrfs_key key;
9823         struct btrfs_path path;
9824         struct extent_buffer *leaf;
9825         int slot;
9826         struct btrfs_extent_item *ei;
9827         unsigned long ptr;
9828         unsigned long end;
9829         int type;
9830         u32 item_size;
9831         u64 offset;
9832         struct btrfs_extent_inline_ref *iref;
9833         int ret;
9834
9835         btrfs_init_path(&path);
9836         key.objectid = btrfs_header_bytenr(eb);
9837         key.type = BTRFS_METADATA_ITEM_KEY;
9838         key.offset = (u64)-1;
9839
9840         /*
9841          * Any failure in backref resolving means we can't determine
9842          * whom the tree block belongs to.
9843          * So in that case, we need to check that tree block
9844          */
9845         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9846         if (ret < 0)
9847                 goto need_check;
9848
9849         ret = btrfs_previous_extent_item(extent_root, &path,
9850                                          btrfs_header_bytenr(eb));
9851         if (ret)
9852                 goto need_check;
9853
9854         leaf = path.nodes[0];
9855         slot = path.slots[0];
9856         btrfs_item_key_to_cpu(leaf, &key, slot);
9857         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9858
9859         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9860                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9861         } else {
9862                 struct btrfs_tree_block_info *info;
9863
9864                 info = (struct btrfs_tree_block_info *)(ei + 1);
9865                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9866         }
9867
9868         item_size = btrfs_item_size_nr(leaf, slot);
9869         ptr = (unsigned long)iref;
9870         end = (unsigned long)ei + item_size;
9871         while (ptr < end) {
9872                 iref = (struct btrfs_extent_inline_ref *)ptr;
9873                 type = btrfs_extent_inline_ref_type(leaf, iref);
9874                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9875
9876                 /*
9877                  * We only check the tree block if current root is
9878                  * the lowest referencer of it.
9879                  */
9880                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9881                     offset < root->objectid) {
9882                         btrfs_release_path(&path);
9883                         return 0;
9884                 }
9885
9886                 ptr += btrfs_extent_inline_ref_size(type);
9887         }
9888         /*
9889          * Normally we should also check keyed tree block ref, but that may be
9890          * very time consuming.  Inlined ref should already make us skip a lot
9891          * of refs now.  So skip search keyed tree block ref.
9892          */
9893
9894 need_check:
9895         btrfs_release_path(&path);
9896         return 1;
9897 }
9898
9899 /*
9900  * Traversal function for tree block. We will do:
9901  * 1) Skip shared fs/subvolume tree blocks
9902  * 2) Update related bytes accounting
9903  * 3) Pre-order traversal
9904  */
9905 static int traverse_tree_block(struct btrfs_root *root,
9906                                 struct extent_buffer *node)
9907 {
9908         struct extent_buffer *eb;
9909         int level;
9910         u64 nr;
9911         int i;
9912         int err = 0;
9913         int ret;
9914
9915         /*
9916          * Skip shared fs/subvolume tree block, in that case they will
9917          * be checked by referencer with lowest rootid
9918          */
9919         if (is_fstree(root->objectid) && !should_check(root, node))
9920                 return 0;
9921
9922         /* Update bytes accounting */
9923         total_btree_bytes += node->len;
9924         if (fs_root_objectid(btrfs_header_owner(node)))
9925                 total_fs_tree_bytes += node->len;
9926         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
9927                 total_extent_tree_bytes += node->len;
9928         if (!found_old_backref &&
9929             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
9930             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
9931             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
9932                 found_old_backref = 1;
9933
9934         /* pre-order tranversal, check itself first */
9935         level = btrfs_header_level(node);
9936         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
9937                                    btrfs_header_level(node),
9938                                    btrfs_header_owner(node));
9939         err |= ret;
9940         if (err)
9941                 error(
9942         "check %s failed root %llu bytenr %llu level %d, force continue check",
9943                         level ? "node":"leaf", root->objectid,
9944                         btrfs_header_bytenr(node), btrfs_header_level(node));
9945
9946         if (!level) {
9947                 btree_space_waste += btrfs_leaf_free_space(root, node);
9948                 ret = check_leaf_items(root, node);
9949                 err |= ret;
9950                 return err;
9951         }
9952
9953         nr = btrfs_header_nritems(node);
9954         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
9955                 sizeof(struct btrfs_key_ptr);
9956
9957         /* Then check all its children */
9958         for (i = 0; i < nr; i++) {
9959                 u64 blocknr = btrfs_node_blockptr(node, i);
9960
9961                 /*
9962                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
9963                  * to call the function itself.
9964                  */
9965                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
9966                 if (extent_buffer_uptodate(eb)) {
9967                         ret = traverse_tree_block(root, eb);
9968                         err |= ret;
9969                 }
9970                 free_extent_buffer(eb);
9971         }
9972
9973         return err;
9974 }
9975
9976 /*
9977  * Low memory usage version check_chunks_and_extents.
9978  */
9979 static int check_chunks_and_extents_v2(struct btrfs_root *root)
9980 {
9981         struct btrfs_path path;
9982         struct btrfs_key key;
9983         struct btrfs_root *root1;
9984         struct btrfs_root *cur_root;
9985         int err = 0;
9986         int ret;
9987
9988         root1 = root->fs_info->chunk_root;
9989         ret = traverse_tree_block(root1, root1->node);
9990         err |= ret;
9991
9992         root1 = root->fs_info->tree_root;
9993         ret = traverse_tree_block(root1, root1->node);
9994         err |= ret;
9995
9996         btrfs_init_path(&path);
9997         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
9998         key.offset = 0;
9999         key.type = BTRFS_ROOT_ITEM_KEY;
10000
10001         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10002         if (ret) {
10003                 error("cannot find extent treet in tree_root");
10004                 goto out;
10005         }
10006
10007         while (1) {
10008                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10009                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10010                         goto next;
10011                 key.offset = (u64)-1;
10012
10013                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10014                 if (IS_ERR(cur_root) || !cur_root) {
10015                         error("failed to read tree: %lld", key.objectid);
10016                         goto next;
10017                 }
10018
10019                 ret = traverse_tree_block(cur_root, cur_root->node);
10020                 err |= ret;
10021
10022 next:
10023                 ret = btrfs_next_item(root1, &path);
10024                 if (ret)
10025                         goto out;
10026         }
10027
10028 out:
10029         btrfs_release_path(&path);
10030         return err;
10031 }
10032
10033 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10034                            struct btrfs_root *root, int overwrite)
10035 {
10036         struct extent_buffer *c;
10037         struct extent_buffer *old = root->node;
10038         int level;
10039         int ret;
10040         struct btrfs_disk_key disk_key = {0,0,0};
10041
10042         level = 0;
10043
10044         if (overwrite) {
10045                 c = old;
10046                 extent_buffer_get(c);
10047                 goto init;
10048         }
10049         c = btrfs_alloc_free_block(trans, root,
10050                                    root->nodesize,
10051                                    root->root_key.objectid,
10052                                    &disk_key, level, 0, 0);
10053         if (IS_ERR(c)) {
10054                 c = old;
10055                 extent_buffer_get(c);
10056                 overwrite = 1;
10057         }
10058 init:
10059         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10060         btrfs_set_header_level(c, level);
10061         btrfs_set_header_bytenr(c, c->start);
10062         btrfs_set_header_generation(c, trans->transid);
10063         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10064         btrfs_set_header_owner(c, root->root_key.objectid);
10065
10066         write_extent_buffer(c, root->fs_info->fsid,
10067                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
10068
10069         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10070                             btrfs_header_chunk_tree_uuid(c),
10071                             BTRFS_UUID_SIZE);
10072
10073         btrfs_mark_buffer_dirty(c);
10074         /*
10075          * this case can happen in the following case:
10076          *
10077          * 1.overwrite previous root.
10078          *
10079          * 2.reinit reloc data root, this is because we skip pin
10080          * down reloc data tree before which means we can allocate
10081          * same block bytenr here.
10082          */
10083         if (old->start == c->start) {
10084                 btrfs_set_root_generation(&root->root_item,
10085                                           trans->transid);
10086                 root->root_item.level = btrfs_header_level(root->node);
10087                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10088                                         &root->root_key, &root->root_item);
10089                 if (ret) {
10090                         free_extent_buffer(c);
10091                         return ret;
10092                 }
10093         }
10094         free_extent_buffer(old);
10095         root->node = c;
10096         add_root_to_dirty_list(root);
10097         return 0;
10098 }
10099
10100 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10101                                 struct extent_buffer *eb, int tree_root)
10102 {
10103         struct extent_buffer *tmp;
10104         struct btrfs_root_item *ri;
10105         struct btrfs_key key;
10106         u64 bytenr;
10107         u32 nodesize;
10108         int level = btrfs_header_level(eb);
10109         int nritems;
10110         int ret;
10111         int i;
10112
10113         /*
10114          * If we have pinned this block before, don't pin it again.
10115          * This can not only avoid forever loop with broken filesystem
10116          * but also give us some speedups.
10117          */
10118         if (test_range_bit(&fs_info->pinned_extents, eb->start,
10119                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10120                 return 0;
10121
10122         btrfs_pin_extent(fs_info, eb->start, eb->len);
10123
10124         nodesize = btrfs_super_nodesize(fs_info->super_copy);
10125         nritems = btrfs_header_nritems(eb);
10126         for (i = 0; i < nritems; i++) {
10127                 if (level == 0) {
10128                         btrfs_item_key_to_cpu(eb, &key, i);
10129                         if (key.type != BTRFS_ROOT_ITEM_KEY)
10130                                 continue;
10131                         /* Skip the extent root and reloc roots */
10132                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10133                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10134                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10135                                 continue;
10136                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10137                         bytenr = btrfs_disk_root_bytenr(eb, ri);
10138
10139                         /*
10140                          * If at any point we start needing the real root we
10141                          * will have to build a stump root for the root we are
10142                          * in, but for now this doesn't actually use the root so
10143                          * just pass in extent_root.
10144                          */
10145                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10146                                               nodesize, 0);
10147                         if (!extent_buffer_uptodate(tmp)) {
10148                                 fprintf(stderr, "Error reading root block\n");
10149                                 return -EIO;
10150                         }
10151                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
10152                         free_extent_buffer(tmp);
10153                         if (ret)
10154                                 return ret;
10155                 } else {
10156                         bytenr = btrfs_node_blockptr(eb, i);
10157
10158                         /* If we aren't the tree root don't read the block */
10159                         if (level == 1 && !tree_root) {
10160                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
10161                                 continue;
10162                         }
10163
10164                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10165                                               nodesize, 0);
10166                         if (!extent_buffer_uptodate(tmp)) {
10167                                 fprintf(stderr, "Error reading tree block\n");
10168                                 return -EIO;
10169                         }
10170                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10171                         free_extent_buffer(tmp);
10172                         if (ret)
10173                                 return ret;
10174                 }
10175         }
10176
10177         return 0;
10178 }
10179
10180 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10181 {
10182         int ret;
10183
10184         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10185         if (ret)
10186                 return ret;
10187
10188         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10189 }
10190
10191 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10192 {
10193         struct btrfs_block_group_cache *cache;
10194         struct btrfs_path *path;
10195         struct extent_buffer *leaf;
10196         struct btrfs_chunk *chunk;
10197         struct btrfs_key key;
10198         int ret;
10199         u64 start;
10200
10201         path = btrfs_alloc_path();
10202         if (!path)
10203                 return -ENOMEM;
10204
10205         key.objectid = 0;
10206         key.type = BTRFS_CHUNK_ITEM_KEY;
10207         key.offset = 0;
10208
10209         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10210         if (ret < 0) {
10211                 btrfs_free_path(path);
10212                 return ret;
10213         }
10214
10215         /*
10216          * We do this in case the block groups were screwed up and had alloc
10217          * bits that aren't actually set on the chunks.  This happens with
10218          * restored images every time and could happen in real life I guess.
10219          */
10220         fs_info->avail_data_alloc_bits = 0;
10221         fs_info->avail_metadata_alloc_bits = 0;
10222         fs_info->avail_system_alloc_bits = 0;
10223
10224         /* First we need to create the in-memory block groups */
10225         while (1) {
10226                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10227                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
10228                         if (ret < 0) {
10229                                 btrfs_free_path(path);
10230                                 return ret;
10231                         }
10232                         if (ret) {
10233                                 ret = 0;
10234                                 break;
10235                         }
10236                 }
10237                 leaf = path->nodes[0];
10238                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10239                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10240                         path->slots[0]++;
10241                         continue;
10242                 }
10243
10244                 chunk = btrfs_item_ptr(leaf, path->slots[0],
10245                                        struct btrfs_chunk);
10246                 btrfs_add_block_group(fs_info, 0,
10247                                       btrfs_chunk_type(leaf, chunk),
10248                                       key.objectid, key.offset,
10249                                       btrfs_chunk_length(leaf, chunk));
10250                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10251                                  key.offset + btrfs_chunk_length(leaf, chunk),
10252                                  GFP_NOFS);
10253                 path->slots[0]++;
10254         }
10255         start = 0;
10256         while (1) {
10257                 cache = btrfs_lookup_first_block_group(fs_info, start);
10258                 if (!cache)
10259                         break;
10260                 cache->cached = 1;
10261                 start = cache->key.objectid + cache->key.offset;
10262         }
10263
10264         btrfs_free_path(path);
10265         return 0;
10266 }
10267
10268 static int reset_balance(struct btrfs_trans_handle *trans,
10269                          struct btrfs_fs_info *fs_info)
10270 {
10271         struct btrfs_root *root = fs_info->tree_root;
10272         struct btrfs_path *path;
10273         struct extent_buffer *leaf;
10274         struct btrfs_key key;
10275         int del_slot, del_nr = 0;
10276         int ret;
10277         int found = 0;
10278
10279         path = btrfs_alloc_path();
10280         if (!path)
10281                 return -ENOMEM;
10282
10283         key.objectid = BTRFS_BALANCE_OBJECTID;
10284         key.type = BTRFS_BALANCE_ITEM_KEY;
10285         key.offset = 0;
10286
10287         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10288         if (ret) {
10289                 if (ret > 0)
10290                         ret = 0;
10291                 if (!ret)
10292                         goto reinit_data_reloc;
10293                 else
10294                         goto out;
10295         }
10296
10297         ret = btrfs_del_item(trans, root, path);
10298         if (ret)
10299                 goto out;
10300         btrfs_release_path(path);
10301
10302         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10303         key.type = BTRFS_ROOT_ITEM_KEY;
10304         key.offset = 0;
10305
10306         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10307         if (ret < 0)
10308                 goto out;
10309         while (1) {
10310                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10311                         if (!found)
10312                                 break;
10313
10314                         if (del_nr) {
10315                                 ret = btrfs_del_items(trans, root, path,
10316                                                       del_slot, del_nr);
10317                                 del_nr = 0;
10318                                 if (ret)
10319                                         goto out;
10320                         }
10321                         key.offset++;
10322                         btrfs_release_path(path);
10323
10324                         found = 0;
10325                         ret = btrfs_search_slot(trans, root, &key, path,
10326                                                 -1, 1);
10327                         if (ret < 0)
10328                                 goto out;
10329                         continue;
10330                 }
10331                 found = 1;
10332                 leaf = path->nodes[0];
10333                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10334                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10335                         break;
10336                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10337                         path->slots[0]++;
10338                         continue;
10339                 }
10340                 if (!del_nr) {
10341                         del_slot = path->slots[0];
10342                         del_nr = 1;
10343                 } else {
10344                         del_nr++;
10345                 }
10346                 path->slots[0]++;
10347         }
10348
10349         if (del_nr) {
10350                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10351                 if (ret)
10352                         goto out;
10353         }
10354         btrfs_release_path(path);
10355
10356 reinit_data_reloc:
10357         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10358         key.type = BTRFS_ROOT_ITEM_KEY;
10359         key.offset = (u64)-1;
10360         root = btrfs_read_fs_root(fs_info, &key);
10361         if (IS_ERR(root)) {
10362                 fprintf(stderr, "Error reading data reloc tree\n");
10363                 ret = PTR_ERR(root);
10364                 goto out;
10365         }
10366         record_root_in_trans(trans, root);
10367         ret = btrfs_fsck_reinit_root(trans, root, 0);
10368         if (ret)
10369                 goto out;
10370         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10371 out:
10372         btrfs_free_path(path);
10373         return ret;
10374 }
10375
10376 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10377                               struct btrfs_fs_info *fs_info)
10378 {
10379         u64 start = 0;
10380         int ret;
10381
10382         /*
10383          * The only reason we don't do this is because right now we're just
10384          * walking the trees we find and pinning down their bytes, we don't look
10385          * at any of the leaves.  In order to do mixed groups we'd have to check
10386          * the leaves of any fs roots and pin down the bytes for any file
10387          * extents we find.  Not hard but why do it if we don't have to?
10388          */
10389         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10390                 fprintf(stderr, "We don't support re-initing the extent tree "
10391                         "for mixed block groups yet, please notify a btrfs "
10392                         "developer you want to do this so they can add this "
10393                         "functionality.\n");
10394                 return -EINVAL;
10395         }
10396
10397         /*
10398          * first we need to walk all of the trees except the extent tree and pin
10399          * down the bytes that are in use so we don't overwrite any existing
10400          * metadata.
10401          */
10402         ret = pin_metadata_blocks(fs_info);
10403         if (ret) {
10404                 fprintf(stderr, "error pinning down used bytes\n");
10405                 return ret;
10406         }
10407
10408         /*
10409          * Need to drop all the block groups since we're going to recreate all
10410          * of them again.
10411          */
10412         btrfs_free_block_groups(fs_info);
10413         ret = reset_block_groups(fs_info);
10414         if (ret) {
10415                 fprintf(stderr, "error resetting the block groups\n");
10416                 return ret;
10417         }
10418
10419         /* Ok we can allocate now, reinit the extent root */
10420         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10421         if (ret) {
10422                 fprintf(stderr, "extent root initialization failed\n");
10423                 /*
10424                  * When the transaction code is updated we should end the
10425                  * transaction, but for now progs only knows about commit so
10426                  * just return an error.
10427                  */
10428                 return ret;
10429         }
10430
10431         /*
10432          * Now we have all the in-memory block groups setup so we can make
10433          * allocations properly, and the metadata we care about is safe since we
10434          * pinned all of it above.
10435          */
10436         while (1) {
10437                 struct btrfs_block_group_cache *cache;
10438
10439                 cache = btrfs_lookup_first_block_group(fs_info, start);
10440                 if (!cache)
10441                         break;
10442                 start = cache->key.objectid + cache->key.offset;
10443                 ret = btrfs_insert_item(trans, fs_info->extent_root,
10444                                         &cache->key, &cache->item,
10445                                         sizeof(cache->item));
10446                 if (ret) {
10447                         fprintf(stderr, "Error adding block group\n");
10448                         return ret;
10449                 }
10450                 btrfs_extent_post_op(trans, fs_info->extent_root);
10451         }
10452
10453         ret = reset_balance(trans, fs_info);
10454         if (ret)
10455                 fprintf(stderr, "error resetting the pending balance\n");
10456
10457         return ret;
10458 }
10459
10460 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10461 {
10462         struct btrfs_path *path;
10463         struct btrfs_trans_handle *trans;
10464         struct btrfs_key key;
10465         int ret;
10466
10467         printf("Recowing metadata block %llu\n", eb->start);
10468         key.objectid = btrfs_header_owner(eb);
10469         key.type = BTRFS_ROOT_ITEM_KEY;
10470         key.offset = (u64)-1;
10471
10472         root = btrfs_read_fs_root(root->fs_info, &key);
10473         if (IS_ERR(root)) {
10474                 fprintf(stderr, "Couldn't find owner root %llu\n",
10475                         key.objectid);
10476                 return PTR_ERR(root);
10477         }
10478
10479         path = btrfs_alloc_path();
10480         if (!path)
10481                 return -ENOMEM;
10482
10483         trans = btrfs_start_transaction(root, 1);
10484         if (IS_ERR(trans)) {
10485                 btrfs_free_path(path);
10486                 return PTR_ERR(trans);
10487         }
10488
10489         path->lowest_level = btrfs_header_level(eb);
10490         if (path->lowest_level)
10491                 btrfs_node_key_to_cpu(eb, &key, 0);
10492         else
10493                 btrfs_item_key_to_cpu(eb, &key, 0);
10494
10495         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10496         btrfs_commit_transaction(trans, root);
10497         btrfs_free_path(path);
10498         return ret;
10499 }
10500
10501 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10502 {
10503         struct btrfs_path *path;
10504         struct btrfs_trans_handle *trans;
10505         struct btrfs_key key;
10506         int ret;
10507
10508         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10509                bad->key.type, bad->key.offset);
10510         key.objectid = bad->root_id;
10511         key.type = BTRFS_ROOT_ITEM_KEY;
10512         key.offset = (u64)-1;
10513
10514         root = btrfs_read_fs_root(root->fs_info, &key);
10515         if (IS_ERR(root)) {
10516                 fprintf(stderr, "Couldn't find owner root %llu\n",
10517                         key.objectid);
10518                 return PTR_ERR(root);
10519         }
10520
10521         path = btrfs_alloc_path();
10522         if (!path)
10523                 return -ENOMEM;
10524
10525         trans = btrfs_start_transaction(root, 1);
10526         if (IS_ERR(trans)) {
10527                 btrfs_free_path(path);
10528                 return PTR_ERR(trans);
10529         }
10530
10531         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10532         if (ret) {
10533                 if (ret > 0)
10534                         ret = 0;
10535                 goto out;
10536         }
10537         ret = btrfs_del_item(trans, root, path);
10538 out:
10539         btrfs_commit_transaction(trans, root);
10540         btrfs_free_path(path);
10541         return ret;
10542 }
10543
10544 static int zero_log_tree(struct btrfs_root *root)
10545 {
10546         struct btrfs_trans_handle *trans;
10547         int ret;
10548
10549         trans = btrfs_start_transaction(root, 1);
10550         if (IS_ERR(trans)) {
10551                 ret = PTR_ERR(trans);
10552                 return ret;
10553         }
10554         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10555         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10556         ret = btrfs_commit_transaction(trans, root);
10557         return ret;
10558 }
10559
10560 static int populate_csum(struct btrfs_trans_handle *trans,
10561                          struct btrfs_root *csum_root, char *buf, u64 start,
10562                          u64 len)
10563 {
10564         u64 offset = 0;
10565         u64 sectorsize;
10566         int ret = 0;
10567
10568         while (offset < len) {
10569                 sectorsize = csum_root->sectorsize;
10570                 ret = read_extent_data(csum_root, buf, start + offset,
10571                                        &sectorsize, 0);
10572                 if (ret)
10573                         break;
10574                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10575                                             start + offset, buf, sectorsize);
10576                 if (ret)
10577                         break;
10578                 offset += sectorsize;
10579         }
10580         return ret;
10581 }
10582
10583 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10584                                       struct btrfs_root *csum_root,
10585                                       struct btrfs_root *cur_root)
10586 {
10587         struct btrfs_path *path;
10588         struct btrfs_key key;
10589         struct extent_buffer *node;
10590         struct btrfs_file_extent_item *fi;
10591         char *buf = NULL;
10592         u64 start = 0;
10593         u64 len = 0;
10594         int slot = 0;
10595         int ret = 0;
10596
10597         path = btrfs_alloc_path();
10598         if (!path)
10599                 return -ENOMEM;
10600         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10601         if (!buf) {
10602                 ret = -ENOMEM;
10603                 goto out;
10604         }
10605
10606         key.objectid = 0;
10607         key.offset = 0;
10608         key.type = 0;
10609
10610         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10611         if (ret < 0)
10612                 goto out;
10613         /* Iterate all regular file extents and fill its csum */
10614         while (1) {
10615                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10616
10617                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10618                         goto next;
10619                 node = path->nodes[0];
10620                 slot = path->slots[0];
10621                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10622                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10623                         goto next;
10624                 start = btrfs_file_extent_disk_bytenr(node, fi);
10625                 len = btrfs_file_extent_disk_num_bytes(node, fi);
10626
10627                 ret = populate_csum(trans, csum_root, buf, start, len);
10628                 if (ret == -EEXIST)
10629                         ret = 0;
10630                 if (ret < 0)
10631                         goto out;
10632 next:
10633                 /*
10634                  * TODO: if next leaf is corrupted, jump to nearest next valid
10635                  * leaf.
10636                  */
10637                 ret = btrfs_next_item(cur_root, path);
10638                 if (ret < 0)
10639                         goto out;
10640                 if (ret > 0) {
10641                         ret = 0;
10642                         goto out;
10643                 }
10644         }
10645
10646 out:
10647         btrfs_free_path(path);
10648         free(buf);
10649         return ret;
10650 }
10651
10652 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10653                                   struct btrfs_root *csum_root)
10654 {
10655         struct btrfs_fs_info *fs_info = csum_root->fs_info;
10656         struct btrfs_path *path;
10657         struct btrfs_root *tree_root = fs_info->tree_root;
10658         struct btrfs_root *cur_root;
10659         struct extent_buffer *node;
10660         struct btrfs_key key;
10661         int slot = 0;
10662         int ret = 0;
10663
10664         path = btrfs_alloc_path();
10665         if (!path)
10666                 return -ENOMEM;
10667
10668         key.objectid = BTRFS_FS_TREE_OBJECTID;
10669         key.offset = 0;
10670         key.type = BTRFS_ROOT_ITEM_KEY;
10671
10672         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10673         if (ret < 0)
10674                 goto out;
10675         if (ret > 0) {
10676                 ret = -ENOENT;
10677                 goto out;
10678         }
10679
10680         while (1) {
10681                 node = path->nodes[0];
10682                 slot = path->slots[0];
10683                 btrfs_item_key_to_cpu(node, &key, slot);
10684                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10685                         goto out;
10686                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10687                         goto next;
10688                 if (!is_fstree(key.objectid))
10689                         goto next;
10690                 key.offset = (u64)-1;
10691
10692                 cur_root = btrfs_read_fs_root(fs_info, &key);
10693                 if (IS_ERR(cur_root) || !cur_root) {
10694                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10695                                 key.objectid);
10696                         goto out;
10697                 }
10698                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10699                                 cur_root);
10700                 if (ret < 0)
10701                         goto out;
10702 next:
10703                 ret = btrfs_next_item(tree_root, path);
10704                 if (ret > 0) {
10705                         ret = 0;
10706                         goto out;
10707                 }
10708                 if (ret < 0)
10709                         goto out;
10710         }
10711
10712 out:
10713         btrfs_free_path(path);
10714         return ret;
10715 }
10716
10717 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10718                                       struct btrfs_root *csum_root)
10719 {
10720         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10721         struct btrfs_path *path;
10722         struct btrfs_extent_item *ei;
10723         struct extent_buffer *leaf;
10724         char *buf;
10725         struct btrfs_key key;
10726         int ret;
10727
10728         path = btrfs_alloc_path();
10729         if (!path)
10730                 return -ENOMEM;
10731
10732         key.objectid = 0;
10733         key.type = BTRFS_EXTENT_ITEM_KEY;
10734         key.offset = 0;
10735
10736         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10737         if (ret < 0) {
10738                 btrfs_free_path(path);
10739                 return ret;
10740         }
10741
10742         buf = malloc(csum_root->sectorsize);
10743         if (!buf) {
10744                 btrfs_free_path(path);
10745                 return -ENOMEM;
10746         }
10747
10748         while (1) {
10749                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10750                         ret = btrfs_next_leaf(extent_root, path);
10751                         if (ret < 0)
10752                                 break;
10753                         if (ret) {
10754                                 ret = 0;
10755                                 break;
10756                         }
10757                 }
10758                 leaf = path->nodes[0];
10759
10760                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10761                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10762                         path->slots[0]++;
10763                         continue;
10764                 }
10765
10766                 ei = btrfs_item_ptr(leaf, path->slots[0],
10767                                     struct btrfs_extent_item);
10768                 if (!(btrfs_extent_flags(leaf, ei) &
10769                       BTRFS_EXTENT_FLAG_DATA)) {
10770                         path->slots[0]++;
10771                         continue;
10772                 }
10773
10774                 ret = populate_csum(trans, csum_root, buf, key.objectid,
10775                                     key.offset);
10776                 if (ret)
10777                         break;
10778                 path->slots[0]++;
10779         }
10780
10781         btrfs_free_path(path);
10782         free(buf);
10783         return ret;
10784 }
10785
10786 /*
10787  * Recalculate the csum and put it into the csum tree.
10788  *
10789  * Extent tree init will wipe out all the extent info, so in that case, we
10790  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
10791  * will use fs/subvol trees to init the csum tree.
10792  */
10793 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10794                           struct btrfs_root *csum_root,
10795                           int search_fs_tree)
10796 {
10797         if (search_fs_tree)
10798                 return fill_csum_tree_from_fs(trans, csum_root);
10799         else
10800                 return fill_csum_tree_from_extent(trans, csum_root);
10801 }
10802
10803 static void free_roots_info_cache(void)
10804 {
10805         if (!roots_info_cache)
10806                 return;
10807
10808         while (!cache_tree_empty(roots_info_cache)) {
10809                 struct cache_extent *entry;
10810                 struct root_item_info *rii;
10811
10812                 entry = first_cache_extent(roots_info_cache);
10813                 if (!entry)
10814                         break;
10815                 remove_cache_extent(roots_info_cache, entry);
10816                 rii = container_of(entry, struct root_item_info, cache_extent);
10817                 free(rii);
10818         }
10819
10820         free(roots_info_cache);
10821         roots_info_cache = NULL;
10822 }
10823
10824 static int build_roots_info_cache(struct btrfs_fs_info *info)
10825 {
10826         int ret = 0;
10827         struct btrfs_key key;
10828         struct extent_buffer *leaf;
10829         struct btrfs_path *path;
10830
10831         if (!roots_info_cache) {
10832                 roots_info_cache = malloc(sizeof(*roots_info_cache));
10833                 if (!roots_info_cache)
10834                         return -ENOMEM;
10835                 cache_tree_init(roots_info_cache);
10836         }
10837
10838         path = btrfs_alloc_path();
10839         if (!path)
10840                 return -ENOMEM;
10841
10842         key.objectid = 0;
10843         key.type = BTRFS_EXTENT_ITEM_KEY;
10844         key.offset = 0;
10845
10846         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10847         if (ret < 0)
10848                 goto out;
10849         leaf = path->nodes[0];
10850
10851         while (1) {
10852                 struct btrfs_key found_key;
10853                 struct btrfs_extent_item *ei;
10854                 struct btrfs_extent_inline_ref *iref;
10855                 int slot = path->slots[0];
10856                 int type;
10857                 u64 flags;
10858                 u64 root_id;
10859                 u8 level;
10860                 struct cache_extent *entry;
10861                 struct root_item_info *rii;
10862
10863                 if (slot >= btrfs_header_nritems(leaf)) {
10864                         ret = btrfs_next_leaf(info->extent_root, path);
10865                         if (ret < 0) {
10866                                 break;
10867                         } else if (ret) {
10868                                 ret = 0;
10869                                 break;
10870                         }
10871                         leaf = path->nodes[0];
10872                         slot = path->slots[0];
10873                 }
10874
10875                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10876
10877                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10878                     found_key.type != BTRFS_METADATA_ITEM_KEY)
10879                         goto next;
10880
10881                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10882                 flags = btrfs_extent_flags(leaf, ei);
10883
10884                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10885                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10886                         goto next;
10887
10888                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10889                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10890                         level = found_key.offset;
10891                 } else {
10892                         struct btrfs_tree_block_info *binfo;
10893
10894                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
10895                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10896                         level = btrfs_tree_block_level(leaf, binfo);
10897                 }
10898
10899                 /*
10900                  * For a root extent, it must be of the following type and the
10901                  * first (and only one) iref in the item.
10902                  */
10903                 type = btrfs_extent_inline_ref_type(leaf, iref);
10904                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10905                         goto next;
10906
10907                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10908                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10909                 if (!entry) {
10910                         rii = malloc(sizeof(struct root_item_info));
10911                         if (!rii) {
10912                                 ret = -ENOMEM;
10913                                 goto out;
10914                         }
10915                         rii->cache_extent.start = root_id;
10916                         rii->cache_extent.size = 1;
10917                         rii->level = (u8)-1;
10918                         entry = &rii->cache_extent;
10919                         ret = insert_cache_extent(roots_info_cache, entry);
10920                         ASSERT(ret == 0);
10921                 } else {
10922                         rii = container_of(entry, struct root_item_info,
10923                                            cache_extent);
10924                 }
10925
10926                 ASSERT(rii->cache_extent.start == root_id);
10927                 ASSERT(rii->cache_extent.size == 1);
10928
10929                 if (level > rii->level || rii->level == (u8)-1) {
10930                         rii->level = level;
10931                         rii->bytenr = found_key.objectid;
10932                         rii->gen = btrfs_extent_generation(leaf, ei);
10933                         rii->node_count = 1;
10934                 } else if (level == rii->level) {
10935                         rii->node_count++;
10936                 }
10937 next:
10938                 path->slots[0]++;
10939         }
10940
10941 out:
10942         btrfs_free_path(path);
10943
10944         return ret;
10945 }
10946
10947 static int maybe_repair_root_item(struct btrfs_fs_info *info,
10948                                   struct btrfs_path *path,
10949                                   const struct btrfs_key *root_key,
10950                                   const int read_only_mode)
10951 {
10952         const u64 root_id = root_key->objectid;
10953         struct cache_extent *entry;
10954         struct root_item_info *rii;
10955         struct btrfs_root_item ri;
10956         unsigned long offset;
10957
10958         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10959         if (!entry) {
10960                 fprintf(stderr,
10961                         "Error: could not find extent items for root %llu\n",
10962                         root_key->objectid);
10963                 return -ENOENT;
10964         }
10965
10966         rii = container_of(entry, struct root_item_info, cache_extent);
10967         ASSERT(rii->cache_extent.start == root_id);
10968         ASSERT(rii->cache_extent.size == 1);
10969
10970         if (rii->node_count != 1) {
10971                 fprintf(stderr,
10972                         "Error: could not find btree root extent for root %llu\n",
10973                         root_id);
10974                 return -ENOENT;
10975         }
10976
10977         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
10978         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
10979
10980         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
10981             btrfs_root_level(&ri) != rii->level ||
10982             btrfs_root_generation(&ri) != rii->gen) {
10983
10984                 /*
10985                  * If we're in repair mode but our caller told us to not update
10986                  * the root item, i.e. just check if it needs to be updated, don't
10987                  * print this message, since the caller will call us again shortly
10988                  * for the same root item without read only mode (the caller will
10989                  * open a transaction first).
10990                  */
10991                 if (!(read_only_mode && repair))
10992                         fprintf(stderr,
10993                                 "%sroot item for root %llu,"
10994                                 " current bytenr %llu, current gen %llu, current level %u,"
10995                                 " new bytenr %llu, new gen %llu, new level %u\n",
10996                                 (read_only_mode ? "" : "fixing "),
10997                                 root_id,
10998                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
10999                                 btrfs_root_level(&ri),
11000                                 rii->bytenr, rii->gen, rii->level);
11001
11002                 if (btrfs_root_generation(&ri) > rii->gen) {
11003                         fprintf(stderr,
11004                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11005                                 root_id, btrfs_root_generation(&ri), rii->gen);
11006                         return -EINVAL;
11007                 }
11008
11009                 if (!read_only_mode) {
11010                         btrfs_set_root_bytenr(&ri, rii->bytenr);
11011                         btrfs_set_root_level(&ri, rii->level);
11012                         btrfs_set_root_generation(&ri, rii->gen);
11013                         write_extent_buffer(path->nodes[0], &ri,
11014                                             offset, sizeof(ri));
11015                 }
11016
11017                 return 1;
11018         }
11019
11020         return 0;
11021 }
11022
11023 /*
11024  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11025  * caused read-only snapshots to be corrupted if they were created at a moment
11026  * when the source subvolume/snapshot had orphan items. The issue was that the
11027  * on-disk root items became incorrect, referring to the pre orphan cleanup root
11028  * node instead of the post orphan cleanup root node.
11029  * So this function, and its callees, just detects and fixes those cases. Even
11030  * though the regression was for read-only snapshots, this function applies to
11031  * any snapshot/subvolume root.
11032  * This must be run before any other repair code - not doing it so, makes other
11033  * repair code delete or modify backrefs in the extent tree for example, which
11034  * will result in an inconsistent fs after repairing the root items.
11035  */
11036 static int repair_root_items(struct btrfs_fs_info *info)
11037 {
11038         struct btrfs_path *path = NULL;
11039         struct btrfs_key key;
11040         struct extent_buffer *leaf;
11041         struct btrfs_trans_handle *trans = NULL;
11042         int ret = 0;
11043         int bad_roots = 0;
11044         int need_trans = 0;
11045
11046         ret = build_roots_info_cache(info);
11047         if (ret)
11048                 goto out;
11049
11050         path = btrfs_alloc_path();
11051         if (!path) {
11052                 ret = -ENOMEM;
11053                 goto out;
11054         }
11055
11056         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11057         key.type = BTRFS_ROOT_ITEM_KEY;
11058         key.offset = 0;
11059
11060 again:
11061         /*
11062          * Avoid opening and committing transactions if a leaf doesn't have
11063          * any root items that need to be fixed, so that we avoid rotating
11064          * backup roots unnecessarily.
11065          */
11066         if (need_trans) {
11067                 trans = btrfs_start_transaction(info->tree_root, 1);
11068                 if (IS_ERR(trans)) {
11069                         ret = PTR_ERR(trans);
11070                         goto out;
11071                 }
11072         }
11073
11074         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11075                                 0, trans ? 1 : 0);
11076         if (ret < 0)
11077                 goto out;
11078         leaf = path->nodes[0];
11079
11080         while (1) {
11081                 struct btrfs_key found_key;
11082
11083                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11084                         int no_more_keys = find_next_key(path, &key);
11085
11086                         btrfs_release_path(path);
11087                         if (trans) {
11088                                 ret = btrfs_commit_transaction(trans,
11089                                                                info->tree_root);
11090                                 trans = NULL;
11091                                 if (ret < 0)
11092                                         goto out;
11093                         }
11094                         need_trans = 0;
11095                         if (no_more_keys)
11096                                 break;
11097                         goto again;
11098                 }
11099
11100                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11101
11102                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11103                         goto next;
11104                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11105                         goto next;
11106
11107                 ret = maybe_repair_root_item(info, path, &found_key,
11108                                              trans ? 0 : 1);
11109                 if (ret < 0)
11110                         goto out;
11111                 if (ret) {
11112                         if (!trans && repair) {
11113                                 need_trans = 1;
11114                                 key = found_key;
11115                                 btrfs_release_path(path);
11116                                 goto again;
11117                         }
11118                         bad_roots++;
11119                 }
11120 next:
11121                 path->slots[0]++;
11122         }
11123         ret = 0;
11124 out:
11125         free_roots_info_cache();
11126         btrfs_free_path(path);
11127         if (trans)
11128                 btrfs_commit_transaction(trans, info->tree_root);
11129         if (ret < 0)
11130                 return ret;
11131
11132         return bad_roots;
11133 }
11134
11135 const char * const cmd_check_usage[] = {
11136         "btrfs check [options] <device>",
11137         "Check structural integrity of a filesystem (unmounted).",
11138         "Check structural integrity of an unmounted filesystem. Verify internal",
11139         "trees' consistency and item connectivity. In the repair mode try to",
11140         "fix the problems found. ",
11141         "WARNING: the repair mode is considered dangerous",
11142         "",
11143         "-s|--super <superblock>     use this superblock copy",
11144         "-b|--backup                 use the first valid backup root copy",
11145         "--repair                    try to repair the filesystem",
11146         "--readonly                  run in read-only mode (default)",
11147         "--init-csum-tree            create a new CRC tree",
11148         "--init-extent-tree          create a new extent tree",
11149         "--mode <MODE>               select mode, allows to make some memory/IO",
11150         "                            trade-offs, where MODE is one of:",
11151         "                            original - read inodes and extents to memory (requires",
11152         "                                       more memory, does less IO)",
11153         "                            lowmem   - try to use less memory but read blocks again",
11154         "                                       when needed",
11155         "--check-data-csum           verify checksums of data blocks",
11156         "-Q|--qgroup-report           print a report on qgroup consistency",
11157         "-E|--subvol-extents <subvolid>",
11158         "                            print subvolume extents and sharing state",
11159         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
11160         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
11161         "-p|--progress               indicate progress",
11162         NULL
11163 };
11164
11165 int cmd_check(int argc, char **argv)
11166 {
11167         struct cache_tree root_cache;
11168         struct btrfs_root *root;
11169         struct btrfs_fs_info *info;
11170         u64 bytenr = 0;
11171         u64 subvolid = 0;
11172         u64 tree_root_bytenr = 0;
11173         u64 chunk_root_bytenr = 0;
11174         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11175         int ret;
11176         u64 num;
11177         int init_csum_tree = 0;
11178         int readonly = 0;
11179         int qgroup_report = 0;
11180         int qgroups_repaired = 0;
11181         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
11182
11183         while(1) {
11184                 int c;
11185                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11186                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11187                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11188                         GETOPT_VAL_MODE };
11189                 static const struct option long_options[] = {
11190                         { "super", required_argument, NULL, 's' },
11191                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11192                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11193                         { "init-csum-tree", no_argument, NULL,
11194                                 GETOPT_VAL_INIT_CSUM },
11195                         { "init-extent-tree", no_argument, NULL,
11196                                 GETOPT_VAL_INIT_EXTENT },
11197                         { "check-data-csum", no_argument, NULL,
11198                                 GETOPT_VAL_CHECK_CSUM },
11199                         { "backup", no_argument, NULL, 'b' },
11200                         { "subvol-extents", required_argument, NULL, 'E' },
11201                         { "qgroup-report", no_argument, NULL, 'Q' },
11202                         { "tree-root", required_argument, NULL, 'r' },
11203                         { "chunk-root", required_argument, NULL,
11204                                 GETOPT_VAL_CHUNK_TREE },
11205                         { "progress", no_argument, NULL, 'p' },
11206                         { "mode", required_argument, NULL,
11207                                 GETOPT_VAL_MODE },
11208                         { NULL, 0, NULL, 0}
11209                 };
11210
11211                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11212                 if (c < 0)
11213                         break;
11214                 switch(c) {
11215                         case 'a': /* ignored */ break;
11216                         case 'b':
11217                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11218                                 break;
11219                         case 's':
11220                                 num = arg_strtou64(optarg);
11221                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11222                                         fprintf(stderr,
11223                                                 "ERROR: super mirror should be less than: %d\n",
11224                                                 BTRFS_SUPER_MIRROR_MAX);
11225                                         exit(1);
11226                                 }
11227                                 bytenr = btrfs_sb_offset(((int)num));
11228                                 printf("using SB copy %llu, bytenr %llu\n", num,
11229                                        (unsigned long long)bytenr);
11230                                 break;
11231                         case 'Q':
11232                                 qgroup_report = 1;
11233                                 break;
11234                         case 'E':
11235                                 subvolid = arg_strtou64(optarg);
11236                                 break;
11237                         case 'r':
11238                                 tree_root_bytenr = arg_strtou64(optarg);
11239                                 break;
11240                         case GETOPT_VAL_CHUNK_TREE:
11241                                 chunk_root_bytenr = arg_strtou64(optarg);
11242                                 break;
11243                         case 'p':
11244                                 ctx.progress_enabled = true;
11245                                 break;
11246                         case '?':
11247                         case 'h':
11248                                 usage(cmd_check_usage);
11249                         case GETOPT_VAL_REPAIR:
11250                                 printf("enabling repair mode\n");
11251                                 repair = 1;
11252                                 ctree_flags |= OPEN_CTREE_WRITES;
11253                                 break;
11254                         case GETOPT_VAL_READONLY:
11255                                 readonly = 1;
11256                                 break;
11257                         case GETOPT_VAL_INIT_CSUM:
11258                                 printf("Creating a new CRC tree\n");
11259                                 init_csum_tree = 1;
11260                                 repair = 1;
11261                                 ctree_flags |= OPEN_CTREE_WRITES;
11262                                 break;
11263                         case GETOPT_VAL_INIT_EXTENT:
11264                                 init_extent_tree = 1;
11265                                 ctree_flags |= (OPEN_CTREE_WRITES |
11266                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
11267                                 repair = 1;
11268                                 break;
11269                         case GETOPT_VAL_CHECK_CSUM:
11270                                 check_data_csum = 1;
11271                                 break;
11272                         case GETOPT_VAL_MODE:
11273                                 check_mode = parse_check_mode(optarg);
11274                                 if (check_mode == CHECK_MODE_UNKNOWN) {
11275                                         error("unknown mode: %s", optarg);
11276                                         exit(1);
11277                                 }
11278                                 break;
11279                 }
11280         }
11281
11282         if (check_argc_exact(argc - optind, 1))
11283                 usage(cmd_check_usage);
11284
11285         if (ctx.progress_enabled) {
11286                 ctx.tp = TASK_NOTHING;
11287                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11288         }
11289
11290         /* This check is the only reason for --readonly to exist */
11291         if (readonly && repair) {
11292                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
11293                 exit(1);
11294         }
11295
11296         /*
11297          * Not supported yet
11298          */
11299         if (repair && check_mode == CHECK_MODE_LOWMEM) {
11300                 error("Low memory mode doesn't support repair yet");
11301                 exit(1);
11302         }
11303
11304         radix_tree_init();
11305         cache_tree_init(&root_cache);
11306
11307         if((ret = check_mounted(argv[optind])) < 0) {
11308                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
11309                 goto err_out;
11310         } else if(ret) {
11311                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
11312                 ret = -EBUSY;
11313                 goto err_out;
11314         }
11315
11316         /* only allow partial opening under repair mode */
11317         if (repair)
11318                 ctree_flags |= OPEN_CTREE_PARTIAL;
11319
11320         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11321                                   chunk_root_bytenr, ctree_flags);
11322         if (!info) {
11323                 fprintf(stderr, "Couldn't open file system\n");
11324                 ret = -EIO;
11325                 goto err_out;
11326         }
11327
11328         global_info = info;
11329         root = info->fs_root;
11330
11331         /*
11332          * repair mode will force us to commit transaction which
11333          * will make us fail to load log tree when mounting.
11334          */
11335         if (repair && btrfs_super_log_root(info->super_copy)) {
11336                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
11337                 if (!ret) {
11338                         ret = 1;
11339                         goto close_out;
11340                 }
11341                 ret = zero_log_tree(root);
11342                 if (ret) {
11343                         fprintf(stderr, "fail to zero log tree\n");
11344                         goto close_out;
11345                 }
11346         }
11347
11348         uuid_unparse(info->super_copy->fsid, uuidbuf);
11349         if (qgroup_report) {
11350                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11351                        uuidbuf);
11352                 ret = qgroup_verify_all(info);
11353                 if (ret == 0)
11354                         report_qgroups(1);
11355                 goto close_out;
11356         }
11357         if (subvolid) {
11358                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11359                        subvolid, argv[optind], uuidbuf);
11360                 ret = print_extent_state(info, subvolid);
11361                 goto close_out;
11362         }
11363         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11364
11365         if (!extent_buffer_uptodate(info->tree_root->node) ||
11366             !extent_buffer_uptodate(info->dev_root->node) ||
11367             !extent_buffer_uptodate(info->chunk_root->node)) {
11368                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11369                 ret = -EIO;
11370                 goto close_out;
11371         }
11372
11373         if (init_extent_tree || init_csum_tree) {
11374                 struct btrfs_trans_handle *trans;
11375
11376                 trans = btrfs_start_transaction(info->extent_root, 0);
11377                 if (IS_ERR(trans)) {
11378                         fprintf(stderr, "Error starting transaction\n");
11379                         ret = PTR_ERR(trans);
11380                         goto close_out;
11381                 }
11382
11383                 if (init_extent_tree) {
11384                         printf("Creating a new extent tree\n");
11385                         ret = reinit_extent_tree(trans, info);
11386                         if (ret)
11387                                 goto close_out;
11388                 }
11389
11390                 if (init_csum_tree) {
11391                         fprintf(stderr, "Reinit crc root\n");
11392                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11393                         if (ret) {
11394                                 fprintf(stderr, "crc root initialization failed\n");
11395                                 ret = -EIO;
11396                                 goto close_out;
11397                         }
11398
11399                         ret = fill_csum_tree(trans, info->csum_root,
11400                                              init_extent_tree);
11401                         if (ret) {
11402                                 fprintf(stderr, "crc refilling failed\n");
11403                                 return -EIO;
11404                         }
11405                 }
11406                 /*
11407                  * Ok now we commit and run the normal fsck, which will add
11408                  * extent entries for all of the items it finds.
11409                  */
11410                 ret = btrfs_commit_transaction(trans, info->extent_root);
11411                 if (ret)
11412                         goto close_out;
11413         }
11414         if (!extent_buffer_uptodate(info->extent_root->node)) {
11415                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11416                 ret = -EIO;
11417                 goto close_out;
11418         }
11419         if (!extent_buffer_uptodate(info->csum_root->node)) {
11420                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
11421                 ret = -EIO;
11422                 goto close_out;
11423         }
11424
11425         if (!ctx.progress_enabled)
11426                 fprintf(stderr, "checking extents\n");
11427         if (check_mode == CHECK_MODE_LOWMEM)
11428                 ret = check_chunks_and_extents_v2(root);
11429         else
11430                 ret = check_chunks_and_extents(root);
11431         if (ret)
11432                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
11433
11434         ret = repair_root_items(info);
11435         if (ret < 0)
11436                 goto close_out;
11437         if (repair) {
11438                 fprintf(stderr, "Fixed %d roots.\n", ret);
11439                 ret = 0;
11440         } else if (ret > 0) {
11441                 fprintf(stderr,
11442                        "Found %d roots with an outdated root item.\n",
11443                        ret);
11444                 fprintf(stderr,
11445                         "Please run a filesystem check with the option --repair to fix them.\n");
11446                 ret = 1;
11447                 goto close_out;
11448         }
11449
11450         if (!ctx.progress_enabled) {
11451                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11452                         fprintf(stderr, "checking free space tree\n");
11453                 else
11454                         fprintf(stderr, "checking free space cache\n");
11455         }
11456         ret = check_space_cache(root);
11457         if (ret)
11458                 goto out;
11459
11460         /*
11461          * We used to have to have these hole extents in between our real
11462          * extents so if we don't have this flag set we need to make sure there
11463          * are no gaps in the file extents for inodes, otherwise we can just
11464          * ignore it when this happens.
11465          */
11466         no_holes = btrfs_fs_incompat(root->fs_info,
11467                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11468         if (!ctx.progress_enabled)
11469                 fprintf(stderr, "checking fs roots\n");
11470         ret = check_fs_roots(root, &root_cache);
11471         if (ret)
11472                 goto out;
11473
11474         fprintf(stderr, "checking csums\n");
11475         ret = check_csums(root);
11476         if (ret)
11477                 goto out;
11478
11479         fprintf(stderr, "checking root refs\n");
11480         ret = check_root_refs(root, &root_cache);
11481         if (ret)
11482                 goto out;
11483
11484         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11485                 struct extent_buffer *eb;
11486
11487                 eb = list_first_entry(&root->fs_info->recow_ebs,
11488                                       struct extent_buffer, recow);
11489                 list_del_init(&eb->recow);
11490                 ret = recow_extent_buffer(root, eb);
11491                 if (ret)
11492                         break;
11493         }
11494
11495         while (!list_empty(&delete_items)) {
11496                 struct bad_item *bad;
11497
11498                 bad = list_first_entry(&delete_items, struct bad_item, list);
11499                 list_del_init(&bad->list);
11500                 if (repair)
11501                         ret = delete_bad_item(root, bad);
11502                 free(bad);
11503         }
11504
11505         if (info->quota_enabled) {
11506                 int err;
11507                 fprintf(stderr, "checking quota groups\n");
11508                 err = qgroup_verify_all(info);
11509                 if (err)
11510                         goto out;
11511                 report_qgroups(0);
11512                 err = repair_qgroups(info, &qgroups_repaired);
11513                 if (err)
11514                         goto out;
11515         }
11516
11517         if (!list_empty(&root->fs_info->recow_ebs)) {
11518                 fprintf(stderr, "Transid errors in file system\n");
11519                 ret = 1;
11520         }
11521 out:
11522         /* Don't override original ret */
11523         if (!ret && qgroups_repaired)
11524                 ret = qgroups_repaired;
11525
11526         if (found_old_backref) { /*
11527                  * there was a disk format change when mixed
11528                  * backref was in testing tree. The old format
11529                  * existed about one week.
11530                  */
11531                 printf("\n * Found old mixed backref format. "
11532                        "The old format is not supported! *"
11533                        "\n * Please mount the FS in readonly mode, "
11534                        "backup data and re-format the FS. *\n\n");
11535                 ret = 1;
11536         }
11537         printf("found %llu bytes used err is %d\n",
11538                (unsigned long long)bytes_used, ret);
11539         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11540         printf("total tree bytes: %llu\n",
11541                (unsigned long long)total_btree_bytes);
11542         printf("total fs tree bytes: %llu\n",
11543                (unsigned long long)total_fs_tree_bytes);
11544         printf("total extent tree bytes: %llu\n",
11545                (unsigned long long)total_extent_tree_bytes);
11546         printf("btree space waste bytes: %llu\n",
11547                (unsigned long long)btree_space_waste);
11548         printf("file data blocks allocated: %llu\n referenced %llu\n",
11549                 (unsigned long long)data_bytes_allocated,
11550                 (unsigned long long)data_bytes_referenced);
11551
11552         free_qgroup_counts();
11553         free_root_recs_tree(&root_cache);
11554 close_out:
11555         close_ctree(root);
11556 err_out:
11557         if (ctx.progress_enabled)
11558                 task_deinit(ctx.info);
11559
11560         return ret;
11561 }