btrfs-progs: check: introduce main entry function for checking leaf items
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
76
77 struct extent_backref {
78         struct rb_node node;
79         unsigned int is_data:1;
80         unsigned int found_extent_tree:1;
81         unsigned int full_backref:1;
82         unsigned int found_ref:1;
83         unsigned int broken:1;
84 };
85
86 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
87 {
88         return rb_entry(node, struct extent_backref, node);
89 }
90
91 struct data_backref {
92         struct extent_backref node;
93         union {
94                 u64 parent;
95                 u64 root;
96         };
97         u64 owner;
98         u64 offset;
99         u64 disk_bytenr;
100         u64 bytes;
101         u64 ram_bytes;
102         u32 num_refs;
103         u32 found_ref;
104 };
105
106 static inline struct data_backref* to_data_backref(struct extent_backref *back)
107 {
108         return container_of(back, struct data_backref, node);
109 }
110
111 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
112 {
113         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
114         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
115         struct data_backref *back1 = to_data_backref(ext1);
116         struct data_backref *back2 = to_data_backref(ext2);
117
118         WARN_ON(!ext1->is_data);
119         WARN_ON(!ext2->is_data);
120
121         /* parent and root are a union, so this covers both */
122         if (back1->parent > back2->parent)
123                 return 1;
124         if (back1->parent < back2->parent)
125                 return -1;
126
127         /* This is a full backref and the parents match. */
128         if (back1->node.full_backref)
129                 return 0;
130
131         if (back1->owner > back2->owner)
132                 return 1;
133         if (back1->owner < back2->owner)
134                 return -1;
135
136         if (back1->offset > back2->offset)
137                 return 1;
138         if (back1->offset < back2->offset)
139                 return -1;
140
141         if (back1->bytes > back2->bytes)
142                 return 1;
143         if (back1->bytes < back2->bytes)
144                 return -1;
145
146         if (back1->found_ref && back2->found_ref) {
147                 if (back1->disk_bytenr > back2->disk_bytenr)
148                         return 1;
149                 if (back1->disk_bytenr < back2->disk_bytenr)
150                         return -1;
151
152                 if (back1->found_ref > back2->found_ref)
153                         return 1;
154                 if (back1->found_ref < back2->found_ref)
155                         return -1;
156         }
157
158         return 0;
159 }
160
161 /*
162  * Much like data_backref, just removed the undetermined members
163  * and change it to use list_head.
164  * During extent scan, it is stored in root->orphan_data_extent.
165  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
166  */
167 struct orphan_data_extent {
168         struct list_head list;
169         u64 root;
170         u64 objectid;
171         u64 offset;
172         u64 disk_bytenr;
173         u64 disk_len;
174 };
175
176 struct tree_backref {
177         struct extent_backref node;
178         union {
179                 u64 parent;
180                 u64 root;
181         };
182 };
183
184 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
185 {
186         return container_of(back, struct tree_backref, node);
187 }
188
189 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
190 {
191         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
192         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
193         struct tree_backref *back1 = to_tree_backref(ext1);
194         struct tree_backref *back2 = to_tree_backref(ext2);
195
196         WARN_ON(ext1->is_data);
197         WARN_ON(ext2->is_data);
198
199         /* parent and root are a union, so this covers both */
200         if (back1->parent > back2->parent)
201                 return 1;
202         if (back1->parent < back2->parent)
203                 return -1;
204
205         return 0;
206 }
207
208 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
209 {
210         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
211         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
212
213         if (ext1->is_data > ext2->is_data)
214                 return 1;
215
216         if (ext1->is_data < ext2->is_data)
217                 return -1;
218
219         if (ext1->full_backref > ext2->full_backref)
220                 return 1;
221         if (ext1->full_backref < ext2->full_backref)
222                 return -1;
223
224         if (ext1->is_data)
225                 return compare_data_backref(node1, node2);
226         else
227                 return compare_tree_backref(node1, node2);
228 }
229
230 /* Explicit initialization for extent_record::flag_block_full_backref */
231 enum { FLAG_UNSET = 2 };
232
233 struct extent_record {
234         struct list_head backrefs;
235         struct list_head dups;
236         struct rb_root backref_tree;
237         struct list_head list;
238         struct cache_extent cache;
239         struct btrfs_disk_key parent_key;
240         u64 start;
241         u64 max_size;
242         u64 nr;
243         u64 refs;
244         u64 extent_item_refs;
245         u64 generation;
246         u64 parent_generation;
247         u64 info_objectid;
248         u32 num_duplicates;
249         u8 info_level;
250         unsigned int flag_block_full_backref:2;
251         unsigned int found_rec:1;
252         unsigned int content_checked:1;
253         unsigned int owner_ref_checked:1;
254         unsigned int is_root:1;
255         unsigned int metadata:1;
256         unsigned int bad_full_backref:1;
257         unsigned int crossing_stripes:1;
258         unsigned int wrong_chunk_type:1;
259 };
260
261 static inline struct extent_record* to_extent_record(struct list_head *entry)
262 {
263         return container_of(entry, struct extent_record, list);
264 }
265
266 struct inode_backref {
267         struct list_head list;
268         unsigned int found_dir_item:1;
269         unsigned int found_dir_index:1;
270         unsigned int found_inode_ref:1;
271         unsigned int filetype:8;
272         int errors;
273         unsigned int ref_type;
274         u64 dir;
275         u64 index;
276         u16 namelen;
277         char name[0];
278 };
279
280 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
281 {
282         return list_entry(entry, struct inode_backref, list);
283 }
284
285 struct root_item_record {
286         struct list_head list;
287         u64 objectid;
288         u64 bytenr;
289         u64 last_snapshot;
290         u8 level;
291         u8 drop_level;
292         int level_size;
293         struct btrfs_key drop_key;
294 };
295
296 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
297 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
298 #define REF_ERR_NO_INODE_REF            (1 << 2)
299 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
300 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
301 #define REF_ERR_DUP_INODE_REF           (1 << 5)
302 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
303 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
304 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
305 #define REF_ERR_NO_ROOT_REF             (1 << 9)
306 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
307 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
308 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
309
310 struct file_extent_hole {
311         struct rb_node node;
312         u64 start;
313         u64 len;
314 };
315
316 struct inode_record {
317         struct list_head backrefs;
318         unsigned int checked:1;
319         unsigned int merging:1;
320         unsigned int found_inode_item:1;
321         unsigned int found_dir_item:1;
322         unsigned int found_file_extent:1;
323         unsigned int found_csum_item:1;
324         unsigned int some_csum_missing:1;
325         unsigned int nodatasum:1;
326         int errors;
327
328         u64 ino;
329         u32 nlink;
330         u32 imode;
331         u64 isize;
332         u64 nbytes;
333
334         u32 found_link;
335         u64 found_size;
336         u64 extent_start;
337         u64 extent_end;
338         struct rb_root holes;
339         struct list_head orphan_extents;
340
341         u32 refs;
342 };
343
344 #define I_ERR_NO_INODE_ITEM             (1 << 0)
345 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
346 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
347 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
348 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
349 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
350 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
351 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
352 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
353 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
354 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
355 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
356 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
357 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
358 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
359
360 struct root_backref {
361         struct list_head list;
362         unsigned int found_dir_item:1;
363         unsigned int found_dir_index:1;
364         unsigned int found_back_ref:1;
365         unsigned int found_forward_ref:1;
366         unsigned int reachable:1;
367         int errors;
368         u64 ref_root;
369         u64 dir;
370         u64 index;
371         u16 namelen;
372         char name[0];
373 };
374
375 static inline struct root_backref* to_root_backref(struct list_head *entry)
376 {
377         return list_entry(entry, struct root_backref, list);
378 }
379
380 struct root_record {
381         struct list_head backrefs;
382         struct cache_extent cache;
383         unsigned int found_root_item:1;
384         u64 objectid;
385         u32 found_ref;
386 };
387
388 struct ptr_node {
389         struct cache_extent cache;
390         void *data;
391 };
392
393 struct shared_node {
394         struct cache_extent cache;
395         struct cache_tree root_cache;
396         struct cache_tree inode_cache;
397         struct inode_record *current;
398         u32 refs;
399 };
400
401 struct block_info {
402         u64 start;
403         u32 size;
404 };
405
406 struct walk_control {
407         struct cache_tree shared;
408         struct shared_node *nodes[BTRFS_MAX_LEVEL];
409         int active_node;
410         int root_level;
411 };
412
413 struct bad_item {
414         struct btrfs_key key;
415         u64 root_id;
416         struct list_head list;
417 };
418
419 struct extent_entry {
420         u64 bytenr;
421         u64 bytes;
422         int count;
423         int broken;
424         struct list_head list;
425 };
426
427 struct root_item_info {
428         /* level of the root */
429         u8 level;
430         /* number of nodes at this level, must be 1 for a root */
431         int node_count;
432         u64 bytenr;
433         u64 gen;
434         struct cache_extent cache_extent;
435 };
436
437 /*
438  * Error bit for low memory mode check.
439  *
440  * Currently no caller cares about it yet.  Just internal use for error
441  * classification.
442  */
443 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
444 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
445 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
446 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
447 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
448 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
449 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
450 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
451 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
452 #define CHUNK_TYPE_MISMATCH     (1 << 8)
453
454 static void *print_status_check(void *p)
455 {
456         struct task_ctx *priv = p;
457         const char work_indicator[] = { '.', 'o', 'O', 'o' };
458         uint32_t count = 0;
459         static char *task_position_string[] = {
460                 "checking extents",
461                 "checking free space cache",
462                 "checking fs roots",
463         };
464
465         task_period_start(priv->info, 1000 /* 1s */);
466
467         if (priv->tp == TASK_NOTHING)
468                 return NULL;
469
470         while (1) {
471                 printf("%s [%c]\r", task_position_string[priv->tp],
472                                 work_indicator[count % 4]);
473                 count++;
474                 fflush(stdout);
475                 task_period_wait(priv->info);
476         }
477         return NULL;
478 }
479
480 static int print_status_return(void *p)
481 {
482         printf("\n");
483         fflush(stdout);
484
485         return 0;
486 }
487
488 /* Compatible function to allow reuse of old codes */
489 static u64 first_extent_gap(struct rb_root *holes)
490 {
491         struct file_extent_hole *hole;
492
493         if (RB_EMPTY_ROOT(holes))
494                 return (u64)-1;
495
496         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
497         return hole->start;
498 }
499
500 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
501 {
502         struct file_extent_hole *hole1;
503         struct file_extent_hole *hole2;
504
505         hole1 = rb_entry(node1, struct file_extent_hole, node);
506         hole2 = rb_entry(node2, struct file_extent_hole, node);
507
508         if (hole1->start > hole2->start)
509                 return -1;
510         if (hole1->start < hole2->start)
511                 return 1;
512         /* Now hole1->start == hole2->start */
513         if (hole1->len >= hole2->len)
514                 /*
515                  * Hole 1 will be merge center
516                  * Same hole will be merged later
517                  */
518                 return -1;
519         /* Hole 2 will be merge center */
520         return 1;
521 }
522
523 /*
524  * Add a hole to the record
525  *
526  * This will do hole merge for copy_file_extent_holes(),
527  * which will ensure there won't be continuous holes.
528  */
529 static int add_file_extent_hole(struct rb_root *holes,
530                                 u64 start, u64 len)
531 {
532         struct file_extent_hole *hole;
533         struct file_extent_hole *prev = NULL;
534         struct file_extent_hole *next = NULL;
535
536         hole = malloc(sizeof(*hole));
537         if (!hole)
538                 return -ENOMEM;
539         hole->start = start;
540         hole->len = len;
541         /* Since compare will not return 0, no -EEXIST will happen */
542         rb_insert(holes, &hole->node, compare_hole);
543
544         /* simple merge with previous hole */
545         if (rb_prev(&hole->node))
546                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
547                                 node);
548         if (prev && prev->start + prev->len >= hole->start) {
549                 hole->len = hole->start + hole->len - prev->start;
550                 hole->start = prev->start;
551                 rb_erase(&prev->node, holes);
552                 free(prev);
553                 prev = NULL;
554         }
555
556         /* iterate merge with next holes */
557         while (1) {
558                 if (!rb_next(&hole->node))
559                         break;
560                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
561                                         node);
562                 if (hole->start + hole->len >= next->start) {
563                         if (hole->start + hole->len <= next->start + next->len)
564                                 hole->len = next->start + next->len -
565                                             hole->start;
566                         rb_erase(&next->node, holes);
567                         free(next);
568                         next = NULL;
569                 } else
570                         break;
571         }
572         return 0;
573 }
574
575 static int compare_hole_range(struct rb_node *node, void *data)
576 {
577         struct file_extent_hole *hole;
578         u64 start;
579
580         hole = (struct file_extent_hole *)data;
581         start = hole->start;
582
583         hole = rb_entry(node, struct file_extent_hole, node);
584         if (start < hole->start)
585                 return -1;
586         if (start >= hole->start && start < hole->start + hole->len)
587                 return 0;
588         return 1;
589 }
590
591 /*
592  * Delete a hole in the record
593  *
594  * This will do the hole split and is much restrict than add.
595  */
596 static int del_file_extent_hole(struct rb_root *holes,
597                                 u64 start, u64 len)
598 {
599         struct file_extent_hole *hole;
600         struct file_extent_hole tmp;
601         u64 prev_start = 0;
602         u64 prev_len = 0;
603         u64 next_start = 0;
604         u64 next_len = 0;
605         struct rb_node *node;
606         int have_prev = 0;
607         int have_next = 0;
608         int ret = 0;
609
610         tmp.start = start;
611         tmp.len = len;
612         node = rb_search(holes, &tmp, compare_hole_range, NULL);
613         if (!node)
614                 return -EEXIST;
615         hole = rb_entry(node, struct file_extent_hole, node);
616         if (start + len > hole->start + hole->len)
617                 return -EEXIST;
618
619         /*
620          * Now there will be no overlap, delete the hole and re-add the
621          * split(s) if they exists.
622          */
623         if (start > hole->start) {
624                 prev_start = hole->start;
625                 prev_len = start - hole->start;
626                 have_prev = 1;
627         }
628         if (hole->start + hole->len > start + len) {
629                 next_start = start + len;
630                 next_len = hole->start + hole->len - start - len;
631                 have_next = 1;
632         }
633         rb_erase(node, holes);
634         free(hole);
635         if (have_prev) {
636                 ret = add_file_extent_hole(holes, prev_start, prev_len);
637                 if (ret < 0)
638                         return ret;
639         }
640         if (have_next) {
641                 ret = add_file_extent_hole(holes, next_start, next_len);
642                 if (ret < 0)
643                         return ret;
644         }
645         return 0;
646 }
647
648 static int copy_file_extent_holes(struct rb_root *dst,
649                                   struct rb_root *src)
650 {
651         struct file_extent_hole *hole;
652         struct rb_node *node;
653         int ret = 0;
654
655         node = rb_first(src);
656         while (node) {
657                 hole = rb_entry(node, struct file_extent_hole, node);
658                 ret = add_file_extent_hole(dst, hole->start, hole->len);
659                 if (ret)
660                         break;
661                 node = rb_next(node);
662         }
663         return ret;
664 }
665
666 static void free_file_extent_holes(struct rb_root *holes)
667 {
668         struct rb_node *node;
669         struct file_extent_hole *hole;
670
671         node = rb_first(holes);
672         while (node) {
673                 hole = rb_entry(node, struct file_extent_hole, node);
674                 rb_erase(node, holes);
675                 free(hole);
676                 node = rb_first(holes);
677         }
678 }
679
680 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
681
682 static void record_root_in_trans(struct btrfs_trans_handle *trans,
683                                  struct btrfs_root *root)
684 {
685         if (root->last_trans != trans->transid) {
686                 root->track_dirty = 1;
687                 root->last_trans = trans->transid;
688                 root->commit_root = root->node;
689                 extent_buffer_get(root->node);
690         }
691 }
692
693 static u8 imode_to_type(u32 imode)
694 {
695 #define S_SHIFT 12
696         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
697                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
698                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
699                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
700                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
701                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
702                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
703                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
704         };
705
706         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
707 #undef S_SHIFT
708 }
709
710 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
711 {
712         struct device_record *rec1;
713         struct device_record *rec2;
714
715         rec1 = rb_entry(node1, struct device_record, node);
716         rec2 = rb_entry(node2, struct device_record, node);
717         if (rec1->devid > rec2->devid)
718                 return -1;
719         else if (rec1->devid < rec2->devid)
720                 return 1;
721         else
722                 return 0;
723 }
724
725 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
726 {
727         struct inode_record *rec;
728         struct inode_backref *backref;
729         struct inode_backref *orig;
730         struct inode_backref *tmp;
731         struct orphan_data_extent *src_orphan;
732         struct orphan_data_extent *dst_orphan;
733         size_t size;
734         int ret;
735
736         rec = malloc(sizeof(*rec));
737         if (!rec)
738                 return ERR_PTR(-ENOMEM);
739         memcpy(rec, orig_rec, sizeof(*rec));
740         rec->refs = 1;
741         INIT_LIST_HEAD(&rec->backrefs);
742         INIT_LIST_HEAD(&rec->orphan_extents);
743         rec->holes = RB_ROOT;
744
745         list_for_each_entry(orig, &orig_rec->backrefs, list) {
746                 size = sizeof(*orig) + orig->namelen + 1;
747                 backref = malloc(size);
748                 if (!backref) {
749                         ret = -ENOMEM;
750                         goto cleanup;
751                 }
752                 memcpy(backref, orig, size);
753                 list_add_tail(&backref->list, &rec->backrefs);
754         }
755         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
756                 dst_orphan = malloc(sizeof(*dst_orphan));
757                 if (!dst_orphan) {
758                         ret = -ENOMEM;
759                         goto cleanup;
760                 }
761                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
762                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
763         }
764         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
765         BUG_ON(ret < 0);
766
767         return rec;
768
769 cleanup:
770         if (!list_empty(&rec->backrefs))
771                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
772                         list_del(&orig->list);
773                         free(orig);
774                 }
775
776         if (!list_empty(&rec->orphan_extents))
777                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
778                         list_del(&orig->list);
779                         free(orig);
780                 }
781
782         free(rec);
783
784         return ERR_PTR(ret);
785 }
786
787 static void print_orphan_data_extents(struct list_head *orphan_extents,
788                                       u64 objectid)
789 {
790         struct orphan_data_extent *orphan;
791
792         if (list_empty(orphan_extents))
793                 return;
794         printf("The following data extent is lost in tree %llu:\n",
795                objectid);
796         list_for_each_entry(orphan, orphan_extents, list) {
797                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
798                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
799                        orphan->disk_len);
800         }
801 }
802
803 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
804 {
805         u64 root_objectid = root->root_key.objectid;
806         int errors = rec->errors;
807
808         if (!errors)
809                 return;
810         /* reloc root errors, we print its corresponding fs root objectid*/
811         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
812                 root_objectid = root->root_key.offset;
813                 fprintf(stderr, "reloc");
814         }
815         fprintf(stderr, "root %llu inode %llu errors %x",
816                 (unsigned long long) root_objectid,
817                 (unsigned long long) rec->ino, rec->errors);
818
819         if (errors & I_ERR_NO_INODE_ITEM)
820                 fprintf(stderr, ", no inode item");
821         if (errors & I_ERR_NO_ORPHAN_ITEM)
822                 fprintf(stderr, ", no orphan item");
823         if (errors & I_ERR_DUP_INODE_ITEM)
824                 fprintf(stderr, ", dup inode item");
825         if (errors & I_ERR_DUP_DIR_INDEX)
826                 fprintf(stderr, ", dup dir index");
827         if (errors & I_ERR_ODD_DIR_ITEM)
828                 fprintf(stderr, ", odd dir item");
829         if (errors & I_ERR_ODD_FILE_EXTENT)
830                 fprintf(stderr, ", odd file extent");
831         if (errors & I_ERR_BAD_FILE_EXTENT)
832                 fprintf(stderr, ", bad file extent");
833         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
834                 fprintf(stderr, ", file extent overlap");
835         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
836                 fprintf(stderr, ", file extent discount");
837         if (errors & I_ERR_DIR_ISIZE_WRONG)
838                 fprintf(stderr, ", dir isize wrong");
839         if (errors & I_ERR_FILE_NBYTES_WRONG)
840                 fprintf(stderr, ", nbytes wrong");
841         if (errors & I_ERR_ODD_CSUM_ITEM)
842                 fprintf(stderr, ", odd csum item");
843         if (errors & I_ERR_SOME_CSUM_MISSING)
844                 fprintf(stderr, ", some csum missing");
845         if (errors & I_ERR_LINK_COUNT_WRONG)
846                 fprintf(stderr, ", link count wrong");
847         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
848                 fprintf(stderr, ", orphan file extent");
849         fprintf(stderr, "\n");
850         /* Print the orphan extents if needed */
851         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
852                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
853
854         /* Print the holes if needed */
855         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
856                 struct file_extent_hole *hole;
857                 struct rb_node *node;
858                 int found = 0;
859
860                 node = rb_first(&rec->holes);
861                 fprintf(stderr, "Found file extent holes:\n");
862                 while (node) {
863                         found = 1;
864                         hole = rb_entry(node, struct file_extent_hole, node);
865                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
866                                 hole->start, hole->len);
867                         node = rb_next(node);
868                 }
869                 if (!found)
870                         fprintf(stderr, "\tstart: 0, len: %llu\n",
871                                 round_up(rec->isize, root->sectorsize));
872         }
873 }
874
875 static void print_ref_error(int errors)
876 {
877         if (errors & REF_ERR_NO_DIR_ITEM)
878                 fprintf(stderr, ", no dir item");
879         if (errors & REF_ERR_NO_DIR_INDEX)
880                 fprintf(stderr, ", no dir index");
881         if (errors & REF_ERR_NO_INODE_REF)
882                 fprintf(stderr, ", no inode ref");
883         if (errors & REF_ERR_DUP_DIR_ITEM)
884                 fprintf(stderr, ", dup dir item");
885         if (errors & REF_ERR_DUP_DIR_INDEX)
886                 fprintf(stderr, ", dup dir index");
887         if (errors & REF_ERR_DUP_INODE_REF)
888                 fprintf(stderr, ", dup inode ref");
889         if (errors & REF_ERR_INDEX_UNMATCH)
890                 fprintf(stderr, ", index mismatch");
891         if (errors & REF_ERR_FILETYPE_UNMATCH)
892                 fprintf(stderr, ", filetype mismatch");
893         if (errors & REF_ERR_NAME_TOO_LONG)
894                 fprintf(stderr, ", name too long");
895         if (errors & REF_ERR_NO_ROOT_REF)
896                 fprintf(stderr, ", no root ref");
897         if (errors & REF_ERR_NO_ROOT_BACKREF)
898                 fprintf(stderr, ", no root backref");
899         if (errors & REF_ERR_DUP_ROOT_REF)
900                 fprintf(stderr, ", dup root ref");
901         if (errors & REF_ERR_DUP_ROOT_BACKREF)
902                 fprintf(stderr, ", dup root backref");
903         fprintf(stderr, "\n");
904 }
905
906 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
907                                           u64 ino, int mod)
908 {
909         struct ptr_node *node;
910         struct cache_extent *cache;
911         struct inode_record *rec = NULL;
912         int ret;
913
914         cache = lookup_cache_extent(inode_cache, ino, 1);
915         if (cache) {
916                 node = container_of(cache, struct ptr_node, cache);
917                 rec = node->data;
918                 if (mod && rec->refs > 1) {
919                         node->data = clone_inode_rec(rec);
920                         if (IS_ERR(node->data))
921                                 return node->data;
922                         rec->refs--;
923                         rec = node->data;
924                 }
925         } else if (mod) {
926                 rec = calloc(1, sizeof(*rec));
927                 if (!rec)
928                         return ERR_PTR(-ENOMEM);
929                 rec->ino = ino;
930                 rec->extent_start = (u64)-1;
931                 rec->refs = 1;
932                 INIT_LIST_HEAD(&rec->backrefs);
933                 INIT_LIST_HEAD(&rec->orphan_extents);
934                 rec->holes = RB_ROOT;
935
936                 node = malloc(sizeof(*node));
937                 if (!node) {
938                         free(rec);
939                         return ERR_PTR(-ENOMEM);
940                 }
941                 node->cache.start = ino;
942                 node->cache.size = 1;
943                 node->data = rec;
944
945                 if (ino == BTRFS_FREE_INO_OBJECTID)
946                         rec->found_link = 1;
947
948                 ret = insert_cache_extent(inode_cache, &node->cache);
949                 if (ret)
950                         return ERR_PTR(-EEXIST);
951         }
952         return rec;
953 }
954
955 static void free_orphan_data_extents(struct list_head *orphan_extents)
956 {
957         struct orphan_data_extent *orphan;
958
959         while (!list_empty(orphan_extents)) {
960                 orphan = list_entry(orphan_extents->next,
961                                     struct orphan_data_extent, list);
962                 list_del(&orphan->list);
963                 free(orphan);
964         }
965 }
966
967 static void free_inode_rec(struct inode_record *rec)
968 {
969         struct inode_backref *backref;
970
971         if (--rec->refs > 0)
972                 return;
973
974         while (!list_empty(&rec->backrefs)) {
975                 backref = to_inode_backref(rec->backrefs.next);
976                 list_del(&backref->list);
977                 free(backref);
978         }
979         free_orphan_data_extents(&rec->orphan_extents);
980         free_file_extent_holes(&rec->holes);
981         free(rec);
982 }
983
984 static int can_free_inode_rec(struct inode_record *rec)
985 {
986         if (!rec->errors && rec->checked && rec->found_inode_item &&
987             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
988                 return 1;
989         return 0;
990 }
991
992 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
993                                  struct inode_record *rec)
994 {
995         struct cache_extent *cache;
996         struct inode_backref *tmp, *backref;
997         struct ptr_node *node;
998         unsigned char filetype;
999
1000         if (!rec->found_inode_item)
1001                 return;
1002
1003         filetype = imode_to_type(rec->imode);
1004         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1005                 if (backref->found_dir_item && backref->found_dir_index) {
1006                         if (backref->filetype != filetype)
1007                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1008                         if (!backref->errors && backref->found_inode_ref &&
1009                             rec->nlink == rec->found_link) {
1010                                 list_del(&backref->list);
1011                                 free(backref);
1012                         }
1013                 }
1014         }
1015
1016         if (!rec->checked || rec->merging)
1017                 return;
1018
1019         if (S_ISDIR(rec->imode)) {
1020                 if (rec->found_size != rec->isize)
1021                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1022                 if (rec->found_file_extent)
1023                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1024         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1025                 if (rec->found_dir_item)
1026                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1027                 if (rec->found_size != rec->nbytes)
1028                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1029                 if (rec->nlink > 0 && !no_holes &&
1030                     (rec->extent_end < rec->isize ||
1031                      first_extent_gap(&rec->holes) < rec->isize))
1032                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1033         }
1034
1035         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1036                 if (rec->found_csum_item && rec->nodatasum)
1037                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1038                 if (rec->some_csum_missing && !rec->nodatasum)
1039                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1040         }
1041
1042         BUG_ON(rec->refs != 1);
1043         if (can_free_inode_rec(rec)) {
1044                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1045                 node = container_of(cache, struct ptr_node, cache);
1046                 BUG_ON(node->data != rec);
1047                 remove_cache_extent(inode_cache, &node->cache);
1048                 free(node);
1049                 free_inode_rec(rec);
1050         }
1051 }
1052
1053 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1054 {
1055         struct btrfs_path path;
1056         struct btrfs_key key;
1057         int ret;
1058
1059         key.objectid = BTRFS_ORPHAN_OBJECTID;
1060         key.type = BTRFS_ORPHAN_ITEM_KEY;
1061         key.offset = ino;
1062
1063         btrfs_init_path(&path);
1064         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1065         btrfs_release_path(&path);
1066         if (ret > 0)
1067                 ret = -ENOENT;
1068         return ret;
1069 }
1070
1071 static int process_inode_item(struct extent_buffer *eb,
1072                               int slot, struct btrfs_key *key,
1073                               struct shared_node *active_node)
1074 {
1075         struct inode_record *rec;
1076         struct btrfs_inode_item *item;
1077
1078         rec = active_node->current;
1079         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1080         if (rec->found_inode_item) {
1081                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1082                 return 1;
1083         }
1084         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1085         rec->nlink = btrfs_inode_nlink(eb, item);
1086         rec->isize = btrfs_inode_size(eb, item);
1087         rec->nbytes = btrfs_inode_nbytes(eb, item);
1088         rec->imode = btrfs_inode_mode(eb, item);
1089         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1090                 rec->nodatasum = 1;
1091         rec->found_inode_item = 1;
1092         if (rec->nlink == 0)
1093                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1094         maybe_free_inode_rec(&active_node->inode_cache, rec);
1095         return 0;
1096 }
1097
1098 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1099                                                 const char *name,
1100                                                 int namelen, u64 dir)
1101 {
1102         struct inode_backref *backref;
1103
1104         list_for_each_entry(backref, &rec->backrefs, list) {
1105                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1106                         break;
1107                 if (backref->dir != dir || backref->namelen != namelen)
1108                         continue;
1109                 if (memcmp(name, backref->name, namelen))
1110                         continue;
1111                 return backref;
1112         }
1113
1114         backref = malloc(sizeof(*backref) + namelen + 1);
1115         if (!backref)
1116                 return NULL;
1117         memset(backref, 0, sizeof(*backref));
1118         backref->dir = dir;
1119         backref->namelen = namelen;
1120         memcpy(backref->name, name, namelen);
1121         backref->name[namelen] = '\0';
1122         list_add_tail(&backref->list, &rec->backrefs);
1123         return backref;
1124 }
1125
1126 static int add_inode_backref(struct cache_tree *inode_cache,
1127                              u64 ino, u64 dir, u64 index,
1128                              const char *name, int namelen,
1129                              int filetype, int itemtype, int errors)
1130 {
1131         struct inode_record *rec;
1132         struct inode_backref *backref;
1133
1134         rec = get_inode_rec(inode_cache, ino, 1);
1135         BUG_ON(IS_ERR(rec));
1136         backref = get_inode_backref(rec, name, namelen, dir);
1137         BUG_ON(!backref);
1138         if (errors)
1139                 backref->errors |= errors;
1140         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1141                 if (backref->found_dir_index)
1142                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1143                 if (backref->found_inode_ref && backref->index != index)
1144                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1145                 if (backref->found_dir_item && backref->filetype != filetype)
1146                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1147
1148                 backref->index = index;
1149                 backref->filetype = filetype;
1150                 backref->found_dir_index = 1;
1151         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1152                 rec->found_link++;
1153                 if (backref->found_dir_item)
1154                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1155                 if (backref->found_dir_index && backref->filetype != filetype)
1156                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1157
1158                 backref->filetype = filetype;
1159                 backref->found_dir_item = 1;
1160         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1161                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1162                 if (backref->found_inode_ref)
1163                         backref->errors |= REF_ERR_DUP_INODE_REF;
1164                 if (backref->found_dir_index && backref->index != index)
1165                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1166                 else
1167                         backref->index = index;
1168
1169                 backref->ref_type = itemtype;
1170                 backref->found_inode_ref = 1;
1171         } else {
1172                 BUG_ON(1);
1173         }
1174
1175         maybe_free_inode_rec(inode_cache, rec);
1176         return 0;
1177 }
1178
1179 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1180                             struct cache_tree *dst_cache)
1181 {
1182         struct inode_backref *backref;
1183         u32 dir_count = 0;
1184         int ret = 0;
1185
1186         dst->merging = 1;
1187         list_for_each_entry(backref, &src->backrefs, list) {
1188                 if (backref->found_dir_index) {
1189                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1190                                         backref->index, backref->name,
1191                                         backref->namelen, backref->filetype,
1192                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1193                 }
1194                 if (backref->found_dir_item) {
1195                         dir_count++;
1196                         add_inode_backref(dst_cache, dst->ino,
1197                                         backref->dir, 0, backref->name,
1198                                         backref->namelen, backref->filetype,
1199                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1200                 }
1201                 if (backref->found_inode_ref) {
1202                         add_inode_backref(dst_cache, dst->ino,
1203                                         backref->dir, backref->index,
1204                                         backref->name, backref->namelen, 0,
1205                                         backref->ref_type, backref->errors);
1206                 }
1207         }
1208
1209         if (src->found_dir_item)
1210                 dst->found_dir_item = 1;
1211         if (src->found_file_extent)
1212                 dst->found_file_extent = 1;
1213         if (src->found_csum_item)
1214                 dst->found_csum_item = 1;
1215         if (src->some_csum_missing)
1216                 dst->some_csum_missing = 1;
1217         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1218                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1219                 if (ret < 0)
1220                         return ret;
1221         }
1222
1223         BUG_ON(src->found_link < dir_count);
1224         dst->found_link += src->found_link - dir_count;
1225         dst->found_size += src->found_size;
1226         if (src->extent_start != (u64)-1) {
1227                 if (dst->extent_start == (u64)-1) {
1228                         dst->extent_start = src->extent_start;
1229                         dst->extent_end = src->extent_end;
1230                 } else {
1231                         if (dst->extent_end > src->extent_start)
1232                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1233                         else if (dst->extent_end < src->extent_start) {
1234                                 ret = add_file_extent_hole(&dst->holes,
1235                                         dst->extent_end,
1236                                         src->extent_start - dst->extent_end);
1237                         }
1238                         if (dst->extent_end < src->extent_end)
1239                                 dst->extent_end = src->extent_end;
1240                 }
1241         }
1242
1243         dst->errors |= src->errors;
1244         if (src->found_inode_item) {
1245                 if (!dst->found_inode_item) {
1246                         dst->nlink = src->nlink;
1247                         dst->isize = src->isize;
1248                         dst->nbytes = src->nbytes;
1249                         dst->imode = src->imode;
1250                         dst->nodatasum = src->nodatasum;
1251                         dst->found_inode_item = 1;
1252                 } else {
1253                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1254                 }
1255         }
1256         dst->merging = 0;
1257
1258         return 0;
1259 }
1260
1261 static int splice_shared_node(struct shared_node *src_node,
1262                               struct shared_node *dst_node)
1263 {
1264         struct cache_extent *cache;
1265         struct ptr_node *node, *ins;
1266         struct cache_tree *src, *dst;
1267         struct inode_record *rec, *conflict;
1268         u64 current_ino = 0;
1269         int splice = 0;
1270         int ret;
1271
1272         if (--src_node->refs == 0)
1273                 splice = 1;
1274         if (src_node->current)
1275                 current_ino = src_node->current->ino;
1276
1277         src = &src_node->root_cache;
1278         dst = &dst_node->root_cache;
1279 again:
1280         cache = search_cache_extent(src, 0);
1281         while (cache) {
1282                 node = container_of(cache, struct ptr_node, cache);
1283                 rec = node->data;
1284                 cache = next_cache_extent(cache);
1285
1286                 if (splice) {
1287                         remove_cache_extent(src, &node->cache);
1288                         ins = node;
1289                 } else {
1290                         ins = malloc(sizeof(*ins));
1291                         BUG_ON(!ins);
1292                         ins->cache.start = node->cache.start;
1293                         ins->cache.size = node->cache.size;
1294                         ins->data = rec;
1295                         rec->refs++;
1296                 }
1297                 ret = insert_cache_extent(dst, &ins->cache);
1298                 if (ret == -EEXIST) {
1299                         conflict = get_inode_rec(dst, rec->ino, 1);
1300                         BUG_ON(IS_ERR(conflict));
1301                         merge_inode_recs(rec, conflict, dst);
1302                         if (rec->checked) {
1303                                 conflict->checked = 1;
1304                                 if (dst_node->current == conflict)
1305                                         dst_node->current = NULL;
1306                         }
1307                         maybe_free_inode_rec(dst, conflict);
1308                         free_inode_rec(rec);
1309                         free(ins);
1310                 } else {
1311                         BUG_ON(ret);
1312                 }
1313         }
1314
1315         if (src == &src_node->root_cache) {
1316                 src = &src_node->inode_cache;
1317                 dst = &dst_node->inode_cache;
1318                 goto again;
1319         }
1320
1321         if (current_ino > 0 && (!dst_node->current ||
1322             current_ino > dst_node->current->ino)) {
1323                 if (dst_node->current) {
1324                         dst_node->current->checked = 1;
1325                         maybe_free_inode_rec(dst, dst_node->current);
1326                 }
1327                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1328                 BUG_ON(IS_ERR(dst_node->current));
1329         }
1330         return 0;
1331 }
1332
1333 static void free_inode_ptr(struct cache_extent *cache)
1334 {
1335         struct ptr_node *node;
1336         struct inode_record *rec;
1337
1338         node = container_of(cache, struct ptr_node, cache);
1339         rec = node->data;
1340         free_inode_rec(rec);
1341         free(node);
1342 }
1343
1344 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1345
1346 static struct shared_node *find_shared_node(struct cache_tree *shared,
1347                                             u64 bytenr)
1348 {
1349         struct cache_extent *cache;
1350         struct shared_node *node;
1351
1352         cache = lookup_cache_extent(shared, bytenr, 1);
1353         if (cache) {
1354                 node = container_of(cache, struct shared_node, cache);
1355                 return node;
1356         }
1357         return NULL;
1358 }
1359
1360 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1361 {
1362         int ret;
1363         struct shared_node *node;
1364
1365         node = calloc(1, sizeof(*node));
1366         if (!node)
1367                 return -ENOMEM;
1368         node->cache.start = bytenr;
1369         node->cache.size = 1;
1370         cache_tree_init(&node->root_cache);
1371         cache_tree_init(&node->inode_cache);
1372         node->refs = refs;
1373
1374         ret = insert_cache_extent(shared, &node->cache);
1375
1376         return ret;
1377 }
1378
1379 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1380                              struct walk_control *wc, int level)
1381 {
1382         struct shared_node *node;
1383         struct shared_node *dest;
1384         int ret;
1385
1386         if (level == wc->active_node)
1387                 return 0;
1388
1389         BUG_ON(wc->active_node <= level);
1390         node = find_shared_node(&wc->shared, bytenr);
1391         if (!node) {
1392                 ret = add_shared_node(&wc->shared, bytenr, refs);
1393                 BUG_ON(ret);
1394                 node = find_shared_node(&wc->shared, bytenr);
1395                 wc->nodes[level] = node;
1396                 wc->active_node = level;
1397                 return 0;
1398         }
1399
1400         if (wc->root_level == wc->active_node &&
1401             btrfs_root_refs(&root->root_item) == 0) {
1402                 if (--node->refs == 0) {
1403                         free_inode_recs_tree(&node->root_cache);
1404                         free_inode_recs_tree(&node->inode_cache);
1405                         remove_cache_extent(&wc->shared, &node->cache);
1406                         free(node);
1407                 }
1408                 return 1;
1409         }
1410
1411         dest = wc->nodes[wc->active_node];
1412         splice_shared_node(node, dest);
1413         if (node->refs == 0) {
1414                 remove_cache_extent(&wc->shared, &node->cache);
1415                 free(node);
1416         }
1417         return 1;
1418 }
1419
1420 static int leave_shared_node(struct btrfs_root *root,
1421                              struct walk_control *wc, int level)
1422 {
1423         struct shared_node *node;
1424         struct shared_node *dest;
1425         int i;
1426
1427         if (level == wc->root_level)
1428                 return 0;
1429
1430         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1431                 if (wc->nodes[i])
1432                         break;
1433         }
1434         BUG_ON(i >= BTRFS_MAX_LEVEL);
1435
1436         node = wc->nodes[wc->active_node];
1437         wc->nodes[wc->active_node] = NULL;
1438         wc->active_node = i;
1439
1440         dest = wc->nodes[wc->active_node];
1441         if (wc->active_node < wc->root_level ||
1442             btrfs_root_refs(&root->root_item) > 0) {
1443                 BUG_ON(node->refs <= 1);
1444                 splice_shared_node(node, dest);
1445         } else {
1446                 BUG_ON(node->refs < 2);
1447                 node->refs--;
1448         }
1449         return 0;
1450 }
1451
1452 /*
1453  * Returns:
1454  * < 0 - on error
1455  * 1   - if the root with id child_root_id is a child of root parent_root_id
1456  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1457  *       has other root(s) as parent(s)
1458  * 2   - if the root child_root_id doesn't have any parent roots
1459  */
1460 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1461                          u64 child_root_id)
1462 {
1463         struct btrfs_path path;
1464         struct btrfs_key key;
1465         struct extent_buffer *leaf;
1466         int has_parent = 0;
1467         int ret;
1468
1469         btrfs_init_path(&path);
1470
1471         key.objectid = parent_root_id;
1472         key.type = BTRFS_ROOT_REF_KEY;
1473         key.offset = child_root_id;
1474         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1475                                 0, 0);
1476         if (ret < 0)
1477                 return ret;
1478         btrfs_release_path(&path);
1479         if (!ret)
1480                 return 1;
1481
1482         key.objectid = child_root_id;
1483         key.type = BTRFS_ROOT_BACKREF_KEY;
1484         key.offset = 0;
1485         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1486                                 0, 0);
1487         if (ret < 0)
1488                 goto out;
1489
1490         while (1) {
1491                 leaf = path.nodes[0];
1492                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1493                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1494                         if (ret)
1495                                 break;
1496                         leaf = path.nodes[0];
1497                 }
1498
1499                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1500                 if (key.objectid != child_root_id ||
1501                     key.type != BTRFS_ROOT_BACKREF_KEY)
1502                         break;
1503
1504                 has_parent = 1;
1505
1506                 if (key.offset == parent_root_id) {
1507                         btrfs_release_path(&path);
1508                         return 1;
1509                 }
1510
1511                 path.slots[0]++;
1512         }
1513 out:
1514         btrfs_release_path(&path);
1515         if (ret < 0)
1516                 return ret;
1517         return has_parent ? 0 : 2;
1518 }
1519
1520 static int process_dir_item(struct btrfs_root *root,
1521                             struct extent_buffer *eb,
1522                             int slot, struct btrfs_key *key,
1523                             struct shared_node *active_node)
1524 {
1525         u32 total;
1526         u32 cur = 0;
1527         u32 len;
1528         u32 name_len;
1529         u32 data_len;
1530         int error;
1531         int nritems = 0;
1532         int filetype;
1533         struct btrfs_dir_item *di;
1534         struct inode_record *rec;
1535         struct cache_tree *root_cache;
1536         struct cache_tree *inode_cache;
1537         struct btrfs_key location;
1538         char namebuf[BTRFS_NAME_LEN];
1539
1540         root_cache = &active_node->root_cache;
1541         inode_cache = &active_node->inode_cache;
1542         rec = active_node->current;
1543         rec->found_dir_item = 1;
1544
1545         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1546         total = btrfs_item_size_nr(eb, slot);
1547         while (cur < total) {
1548                 nritems++;
1549                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1550                 name_len = btrfs_dir_name_len(eb, di);
1551                 data_len = btrfs_dir_data_len(eb, di);
1552                 filetype = btrfs_dir_type(eb, di);
1553
1554                 rec->found_size += name_len;
1555                 if (name_len <= BTRFS_NAME_LEN) {
1556                         len = name_len;
1557                         error = 0;
1558                 } else {
1559                         len = BTRFS_NAME_LEN;
1560                         error = REF_ERR_NAME_TOO_LONG;
1561                 }
1562                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1563
1564                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1565                         add_inode_backref(inode_cache, location.objectid,
1566                                           key->objectid, key->offset, namebuf,
1567                                           len, filetype, key->type, error);
1568                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1569                         add_inode_backref(root_cache, location.objectid,
1570                                           key->objectid, key->offset,
1571                                           namebuf, len, filetype,
1572                                           key->type, error);
1573                 } else {
1574                         fprintf(stderr, "invalid location in dir item %u\n",
1575                                 location.type);
1576                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1577                                           key->objectid, key->offset, namebuf,
1578                                           len, filetype, key->type, error);
1579                 }
1580
1581                 len = sizeof(*di) + name_len + data_len;
1582                 di = (struct btrfs_dir_item *)((char *)di + len);
1583                 cur += len;
1584         }
1585         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1586                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1587
1588         return 0;
1589 }
1590
1591 static int process_inode_ref(struct extent_buffer *eb,
1592                              int slot, struct btrfs_key *key,
1593                              struct shared_node *active_node)
1594 {
1595         u32 total;
1596         u32 cur = 0;
1597         u32 len;
1598         u32 name_len;
1599         u64 index;
1600         int error;
1601         struct cache_tree *inode_cache;
1602         struct btrfs_inode_ref *ref;
1603         char namebuf[BTRFS_NAME_LEN];
1604
1605         inode_cache = &active_node->inode_cache;
1606
1607         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1608         total = btrfs_item_size_nr(eb, slot);
1609         while (cur < total) {
1610                 name_len = btrfs_inode_ref_name_len(eb, ref);
1611                 index = btrfs_inode_ref_index(eb, ref);
1612                 if (name_len <= BTRFS_NAME_LEN) {
1613                         len = name_len;
1614                         error = 0;
1615                 } else {
1616                         len = BTRFS_NAME_LEN;
1617                         error = REF_ERR_NAME_TOO_LONG;
1618                 }
1619                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1620                 add_inode_backref(inode_cache, key->objectid, key->offset,
1621                                   index, namebuf, len, 0, key->type, error);
1622
1623                 len = sizeof(*ref) + name_len;
1624                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1625                 cur += len;
1626         }
1627         return 0;
1628 }
1629
1630 static int process_inode_extref(struct extent_buffer *eb,
1631                                 int slot, struct btrfs_key *key,
1632                                 struct shared_node *active_node)
1633 {
1634         u32 total;
1635         u32 cur = 0;
1636         u32 len;
1637         u32 name_len;
1638         u64 index;
1639         u64 parent;
1640         int error;
1641         struct cache_tree *inode_cache;
1642         struct btrfs_inode_extref *extref;
1643         char namebuf[BTRFS_NAME_LEN];
1644
1645         inode_cache = &active_node->inode_cache;
1646
1647         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1648         total = btrfs_item_size_nr(eb, slot);
1649         while (cur < total) {
1650                 name_len = btrfs_inode_extref_name_len(eb, extref);
1651                 index = btrfs_inode_extref_index(eb, extref);
1652                 parent = btrfs_inode_extref_parent(eb, extref);
1653                 if (name_len <= BTRFS_NAME_LEN) {
1654                         len = name_len;
1655                         error = 0;
1656                 } else {
1657                         len = BTRFS_NAME_LEN;
1658                         error = REF_ERR_NAME_TOO_LONG;
1659                 }
1660                 read_extent_buffer(eb, namebuf,
1661                                    (unsigned long)(extref + 1), len);
1662                 add_inode_backref(inode_cache, key->objectid, parent,
1663                                   index, namebuf, len, 0, key->type, error);
1664
1665                 len = sizeof(*extref) + name_len;
1666                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1667                 cur += len;
1668         }
1669         return 0;
1670
1671 }
1672
1673 static int count_csum_range(struct btrfs_root *root, u64 start,
1674                             u64 len, u64 *found)
1675 {
1676         struct btrfs_key key;
1677         struct btrfs_path path;
1678         struct extent_buffer *leaf;
1679         int ret;
1680         size_t size;
1681         *found = 0;
1682         u64 csum_end;
1683         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1684
1685         btrfs_init_path(&path);
1686
1687         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1688         key.offset = start;
1689         key.type = BTRFS_EXTENT_CSUM_KEY;
1690
1691         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1692                                 &key, &path, 0, 0);
1693         if (ret < 0)
1694                 goto out;
1695         if (ret > 0 && path.slots[0] > 0) {
1696                 leaf = path.nodes[0];
1697                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1698                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1699                     key.type == BTRFS_EXTENT_CSUM_KEY)
1700                         path.slots[0]--;
1701         }
1702
1703         while (len > 0) {
1704                 leaf = path.nodes[0];
1705                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1706                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1707                         if (ret > 0)
1708                                 break;
1709                         else if (ret < 0)
1710                                 goto out;
1711                         leaf = path.nodes[0];
1712                 }
1713
1714                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1715                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1716                     key.type != BTRFS_EXTENT_CSUM_KEY)
1717                         break;
1718
1719                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1720                 if (key.offset >= start + len)
1721                         break;
1722
1723                 if (key.offset > start)
1724                         start = key.offset;
1725
1726                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1727                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1728                 if (csum_end > start) {
1729                         size = min(csum_end - start, len);
1730                         len -= size;
1731                         start += size;
1732                         *found += size;
1733                 }
1734
1735                 path.slots[0]++;
1736         }
1737 out:
1738         btrfs_release_path(&path);
1739         if (ret < 0)
1740                 return ret;
1741         return 0;
1742 }
1743
1744 static int process_file_extent(struct btrfs_root *root,
1745                                 struct extent_buffer *eb,
1746                                 int slot, struct btrfs_key *key,
1747                                 struct shared_node *active_node)
1748 {
1749         struct inode_record *rec;
1750         struct btrfs_file_extent_item *fi;
1751         u64 num_bytes = 0;
1752         u64 disk_bytenr = 0;
1753         u64 extent_offset = 0;
1754         u64 mask = root->sectorsize - 1;
1755         int extent_type;
1756         int ret;
1757
1758         rec = active_node->current;
1759         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1760         rec->found_file_extent = 1;
1761
1762         if (rec->extent_start == (u64)-1) {
1763                 rec->extent_start = key->offset;
1764                 rec->extent_end = key->offset;
1765         }
1766
1767         if (rec->extent_end > key->offset)
1768                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1769         else if (rec->extent_end < key->offset) {
1770                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1771                                            key->offset - rec->extent_end);
1772                 if (ret < 0)
1773                         return ret;
1774         }
1775
1776         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1777         extent_type = btrfs_file_extent_type(eb, fi);
1778
1779         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1780                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1781                 if (num_bytes == 0)
1782                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1783                 rec->found_size += num_bytes;
1784                 num_bytes = (num_bytes + mask) & ~mask;
1785         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1786                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1787                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1788                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1789                 extent_offset = btrfs_file_extent_offset(eb, fi);
1790                 if (num_bytes == 0 || (num_bytes & mask))
1791                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1792                 if (num_bytes + extent_offset >
1793                     btrfs_file_extent_ram_bytes(eb, fi))
1794                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1795                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1796                     (btrfs_file_extent_compression(eb, fi) ||
1797                      btrfs_file_extent_encryption(eb, fi) ||
1798                      btrfs_file_extent_other_encoding(eb, fi)))
1799                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1800                 if (disk_bytenr > 0)
1801                         rec->found_size += num_bytes;
1802         } else {
1803                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1804         }
1805         rec->extent_end = key->offset + num_bytes;
1806
1807         /*
1808          * The data reloc tree will copy full extents into its inode and then
1809          * copy the corresponding csums.  Because the extent it copied could be
1810          * a preallocated extent that hasn't been written to yet there may be no
1811          * csums to copy, ergo we won't have csums for our file extent.  This is
1812          * ok so just don't bother checking csums if the inode belongs to the
1813          * data reloc tree.
1814          */
1815         if (disk_bytenr > 0 &&
1816             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1817                 u64 found;
1818                 if (btrfs_file_extent_compression(eb, fi))
1819                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1820                 else
1821                         disk_bytenr += extent_offset;
1822
1823                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1824                 if (ret < 0)
1825                         return ret;
1826                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1827                         if (found > 0)
1828                                 rec->found_csum_item = 1;
1829                         if (found < num_bytes)
1830                                 rec->some_csum_missing = 1;
1831                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1832                         if (found > 0)
1833                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1834                 }
1835         }
1836         return 0;
1837 }
1838
1839 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1840                             struct walk_control *wc)
1841 {
1842         struct btrfs_key key;
1843         u32 nritems;
1844         int i;
1845         int ret = 0;
1846         struct cache_tree *inode_cache;
1847         struct shared_node *active_node;
1848
1849         if (wc->root_level == wc->active_node &&
1850             btrfs_root_refs(&root->root_item) == 0)
1851                 return 0;
1852
1853         active_node = wc->nodes[wc->active_node];
1854         inode_cache = &active_node->inode_cache;
1855         nritems = btrfs_header_nritems(eb);
1856         for (i = 0; i < nritems; i++) {
1857                 btrfs_item_key_to_cpu(eb, &key, i);
1858
1859                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1860                         continue;
1861                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1862                         continue;
1863
1864                 if (active_node->current == NULL ||
1865                     active_node->current->ino < key.objectid) {
1866                         if (active_node->current) {
1867                                 active_node->current->checked = 1;
1868                                 maybe_free_inode_rec(inode_cache,
1869                                                      active_node->current);
1870                         }
1871                         active_node->current = get_inode_rec(inode_cache,
1872                                                              key.objectid, 1);
1873                         BUG_ON(IS_ERR(active_node->current));
1874                 }
1875                 switch (key.type) {
1876                 case BTRFS_DIR_ITEM_KEY:
1877                 case BTRFS_DIR_INDEX_KEY:
1878                         ret = process_dir_item(root, eb, i, &key, active_node);
1879                         break;
1880                 case BTRFS_INODE_REF_KEY:
1881                         ret = process_inode_ref(eb, i, &key, active_node);
1882                         break;
1883                 case BTRFS_INODE_EXTREF_KEY:
1884                         ret = process_inode_extref(eb, i, &key, active_node);
1885                         break;
1886                 case BTRFS_INODE_ITEM_KEY:
1887                         ret = process_inode_item(eb, i, &key, active_node);
1888                         break;
1889                 case BTRFS_EXTENT_DATA_KEY:
1890                         ret = process_file_extent(root, eb, i, &key,
1891                                                   active_node);
1892                         break;
1893                 default:
1894                         break;
1895                 };
1896         }
1897         return ret;
1898 }
1899
1900 static void reada_walk_down(struct btrfs_root *root,
1901                             struct extent_buffer *node, int slot)
1902 {
1903         u64 bytenr;
1904         u64 ptr_gen;
1905         u32 nritems;
1906         u32 blocksize;
1907         int i;
1908         int level;
1909
1910         level = btrfs_header_level(node);
1911         if (level != 1)
1912                 return;
1913
1914         nritems = btrfs_header_nritems(node);
1915         blocksize = root->nodesize;
1916         for (i = slot; i < nritems; i++) {
1917                 bytenr = btrfs_node_blockptr(node, i);
1918                 ptr_gen = btrfs_node_ptr_generation(node, i);
1919                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1920         }
1921 }
1922
1923 /*
1924  * Check the child node/leaf by the following condition:
1925  * 1. the first item key of the node/leaf should be the same with the one
1926  *    in parent.
1927  * 2. block in parent node should match the child node/leaf.
1928  * 3. generation of parent node and child's header should be consistent.
1929  *
1930  * Or the child node/leaf pointed by the key in parent is not valid.
1931  *
1932  * We hope to check leaf owner too, but since subvol may share leaves,
1933  * which makes leaf owner check not so strong, key check should be
1934  * sufficient enough for that case.
1935  */
1936 static int check_child_node(struct btrfs_root *root,
1937                             struct extent_buffer *parent, int slot,
1938                             struct extent_buffer *child)
1939 {
1940         struct btrfs_key parent_key;
1941         struct btrfs_key child_key;
1942         int ret = 0;
1943
1944         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1945         if (btrfs_header_level(child) == 0)
1946                 btrfs_item_key_to_cpu(child, &child_key, 0);
1947         else
1948                 btrfs_node_key_to_cpu(child, &child_key, 0);
1949
1950         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1951                 ret = -EINVAL;
1952                 fprintf(stderr,
1953                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1954                         parent_key.objectid, parent_key.type, parent_key.offset,
1955                         child_key.objectid, child_key.type, child_key.offset);
1956         }
1957         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1958                 ret = -EINVAL;
1959                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1960                         btrfs_node_blockptr(parent, slot),
1961                         btrfs_header_bytenr(child));
1962         }
1963         if (btrfs_node_ptr_generation(parent, slot) !=
1964             btrfs_header_generation(child)) {
1965                 ret = -EINVAL;
1966                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1967                         btrfs_header_generation(child),
1968                         btrfs_node_ptr_generation(parent, slot));
1969         }
1970         return ret;
1971 }
1972
1973 struct node_refs {
1974         u64 bytenr[BTRFS_MAX_LEVEL];
1975         u64 refs[BTRFS_MAX_LEVEL];
1976 };
1977
1978 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1979                           struct walk_control *wc, int *level,
1980                           struct node_refs *nrefs)
1981 {
1982         enum btrfs_tree_block_status status;
1983         u64 bytenr;
1984         u64 ptr_gen;
1985         struct extent_buffer *next;
1986         struct extent_buffer *cur;
1987         u32 blocksize;
1988         int ret, err = 0;
1989         u64 refs;
1990
1991         WARN_ON(*level < 0);
1992         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1993
1994         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1995                 refs = nrefs->refs[*level];
1996                 ret = 0;
1997         } else {
1998                 ret = btrfs_lookup_extent_info(NULL, root,
1999                                        path->nodes[*level]->start,
2000                                        *level, 1, &refs, NULL);
2001                 if (ret < 0) {
2002                         err = ret;
2003                         goto out;
2004                 }
2005                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2006                 nrefs->refs[*level] = refs;
2007         }
2008
2009         if (refs > 1) {
2010                 ret = enter_shared_node(root, path->nodes[*level]->start,
2011                                         refs, wc, *level);
2012                 if (ret > 0) {
2013                         err = ret;
2014                         goto out;
2015                 }
2016         }
2017
2018         while (*level >= 0) {
2019                 WARN_ON(*level < 0);
2020                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2021                 cur = path->nodes[*level];
2022
2023                 if (btrfs_header_level(cur) != *level)
2024                         WARN_ON(1);
2025
2026                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2027                         break;
2028                 if (*level == 0) {
2029                         ret = process_one_leaf(root, cur, wc);
2030                         if (ret < 0)
2031                                 err = ret;
2032                         break;
2033                 }
2034                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2035                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2036                 blocksize = root->nodesize;
2037
2038                 if (bytenr == nrefs->bytenr[*level - 1]) {
2039                         refs = nrefs->refs[*level - 1];
2040                 } else {
2041                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2042                                         *level - 1, 1, &refs, NULL);
2043                         if (ret < 0) {
2044                                 refs = 0;
2045                         } else {
2046                                 nrefs->bytenr[*level - 1] = bytenr;
2047                                 nrefs->refs[*level - 1] = refs;
2048                         }
2049                 }
2050
2051                 if (refs > 1) {
2052                         ret = enter_shared_node(root, bytenr, refs,
2053                                                 wc, *level - 1);
2054                         if (ret > 0) {
2055                                 path->slots[*level]++;
2056                                 continue;
2057                         }
2058                 }
2059
2060                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2061                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2062                         free_extent_buffer(next);
2063                         reada_walk_down(root, cur, path->slots[*level]);
2064                         next = read_tree_block(root, bytenr, blocksize,
2065                                                ptr_gen);
2066                         if (!extent_buffer_uptodate(next)) {
2067                                 struct btrfs_key node_key;
2068
2069                                 btrfs_node_key_to_cpu(path->nodes[*level],
2070                                                       &node_key,
2071                                                       path->slots[*level]);
2072                                 btrfs_add_corrupt_extent_record(root->fs_info,
2073                                                 &node_key,
2074                                                 path->nodes[*level]->start,
2075                                                 root->nodesize, *level);
2076                                 err = -EIO;
2077                                 goto out;
2078                         }
2079                 }
2080
2081                 ret = check_child_node(root, cur, path->slots[*level], next);
2082                 if (ret) {
2083                         err = ret;
2084                         goto out;
2085                 }
2086
2087                 if (btrfs_is_leaf(next))
2088                         status = btrfs_check_leaf(root, NULL, next);
2089                 else
2090                         status = btrfs_check_node(root, NULL, next);
2091                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2092                         free_extent_buffer(next);
2093                         err = -EIO;
2094                         goto out;
2095                 }
2096
2097                 *level = *level - 1;
2098                 free_extent_buffer(path->nodes[*level]);
2099                 path->nodes[*level] = next;
2100                 path->slots[*level] = 0;
2101         }
2102 out:
2103         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2104         return err;
2105 }
2106
2107 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2108                         struct walk_control *wc, int *level)
2109 {
2110         int i;
2111         struct extent_buffer *leaf;
2112
2113         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2114                 leaf = path->nodes[i];
2115                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2116                         path->slots[i]++;
2117                         *level = i;
2118                         return 0;
2119                 } else {
2120                         free_extent_buffer(path->nodes[*level]);
2121                         path->nodes[*level] = NULL;
2122                         BUG_ON(*level > wc->active_node);
2123                         if (*level == wc->active_node)
2124                                 leave_shared_node(root, wc, *level);
2125                         *level = i + 1;
2126                 }
2127         }
2128         return 1;
2129 }
2130
2131 static int check_root_dir(struct inode_record *rec)
2132 {
2133         struct inode_backref *backref;
2134         int ret = -1;
2135
2136         if (!rec->found_inode_item || rec->errors)
2137                 goto out;
2138         if (rec->nlink != 1 || rec->found_link != 0)
2139                 goto out;
2140         if (list_empty(&rec->backrefs))
2141                 goto out;
2142         backref = to_inode_backref(rec->backrefs.next);
2143         if (!backref->found_inode_ref)
2144                 goto out;
2145         if (backref->index != 0 || backref->namelen != 2 ||
2146             memcmp(backref->name, "..", 2))
2147                 goto out;
2148         if (backref->found_dir_index || backref->found_dir_item)
2149                 goto out;
2150         ret = 0;
2151 out:
2152         return ret;
2153 }
2154
2155 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2156                               struct btrfs_root *root, struct btrfs_path *path,
2157                               struct inode_record *rec)
2158 {
2159         struct btrfs_inode_item *ei;
2160         struct btrfs_key key;
2161         int ret;
2162
2163         key.objectid = rec->ino;
2164         key.type = BTRFS_INODE_ITEM_KEY;
2165         key.offset = (u64)-1;
2166
2167         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2168         if (ret < 0)
2169                 goto out;
2170         if (ret) {
2171                 if (!path->slots[0]) {
2172                         ret = -ENOENT;
2173                         goto out;
2174                 }
2175                 path->slots[0]--;
2176                 ret = 0;
2177         }
2178         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2179         if (key.objectid != rec->ino) {
2180                 ret = -ENOENT;
2181                 goto out;
2182         }
2183
2184         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2185                             struct btrfs_inode_item);
2186         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2187         btrfs_mark_buffer_dirty(path->nodes[0]);
2188         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2189         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2190                root->root_key.objectid);
2191 out:
2192         btrfs_release_path(path);
2193         return ret;
2194 }
2195
2196 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2197                                     struct btrfs_root *root,
2198                                     struct btrfs_path *path,
2199                                     struct inode_record *rec)
2200 {
2201         int ret;
2202
2203         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2204         btrfs_release_path(path);
2205         if (!ret)
2206                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2207         return ret;
2208 }
2209
2210 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2211                                struct btrfs_root *root,
2212                                struct btrfs_path *path,
2213                                struct inode_record *rec)
2214 {
2215         struct btrfs_inode_item *ei;
2216         struct btrfs_key key;
2217         int ret = 0;
2218
2219         key.objectid = rec->ino;
2220         key.type = BTRFS_INODE_ITEM_KEY;
2221         key.offset = 0;
2222
2223         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2224         if (ret) {
2225                 if (ret > 0)
2226                         ret = -ENOENT;
2227                 goto out;
2228         }
2229
2230         /* Since ret == 0, no need to check anything */
2231         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2232                             struct btrfs_inode_item);
2233         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2234         btrfs_mark_buffer_dirty(path->nodes[0]);
2235         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2236         printf("reset nbytes for ino %llu root %llu\n",
2237                rec->ino, root->root_key.objectid);
2238 out:
2239         btrfs_release_path(path);
2240         return ret;
2241 }
2242
2243 static int add_missing_dir_index(struct btrfs_root *root,
2244                                  struct cache_tree *inode_cache,
2245                                  struct inode_record *rec,
2246                                  struct inode_backref *backref)
2247 {
2248         struct btrfs_path *path;
2249         struct btrfs_trans_handle *trans;
2250         struct btrfs_dir_item *dir_item;
2251         struct extent_buffer *leaf;
2252         struct btrfs_key key;
2253         struct btrfs_disk_key disk_key;
2254         struct inode_record *dir_rec;
2255         unsigned long name_ptr;
2256         u32 data_size = sizeof(*dir_item) + backref->namelen;
2257         int ret;
2258
2259         path = btrfs_alloc_path();
2260         if (!path)
2261                 return -ENOMEM;
2262
2263         trans = btrfs_start_transaction(root, 1);
2264         if (IS_ERR(trans)) {
2265                 btrfs_free_path(path);
2266                 return PTR_ERR(trans);
2267         }
2268
2269         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2270                 (unsigned long long)rec->ino);
2271         key.objectid = backref->dir;
2272         key.type = BTRFS_DIR_INDEX_KEY;
2273         key.offset = backref->index;
2274
2275         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2276         BUG_ON(ret);
2277
2278         leaf = path->nodes[0];
2279         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2280
2281         disk_key.objectid = cpu_to_le64(rec->ino);
2282         disk_key.type = BTRFS_INODE_ITEM_KEY;
2283         disk_key.offset = 0;
2284
2285         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2286         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2287         btrfs_set_dir_data_len(leaf, dir_item, 0);
2288         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2289         name_ptr = (unsigned long)(dir_item + 1);
2290         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2291         btrfs_mark_buffer_dirty(leaf);
2292         btrfs_free_path(path);
2293         btrfs_commit_transaction(trans, root);
2294
2295         backref->found_dir_index = 1;
2296         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2297         BUG_ON(IS_ERR(dir_rec));
2298         if (!dir_rec)
2299                 return 0;
2300         dir_rec->found_size += backref->namelen;
2301         if (dir_rec->found_size == dir_rec->isize &&
2302             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2303                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2304         if (dir_rec->found_size != dir_rec->isize)
2305                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2306
2307         return 0;
2308 }
2309
2310 static int delete_dir_index(struct btrfs_root *root,
2311                             struct cache_tree *inode_cache,
2312                             struct inode_record *rec,
2313                             struct inode_backref *backref)
2314 {
2315         struct btrfs_trans_handle *trans;
2316         struct btrfs_dir_item *di;
2317         struct btrfs_path *path;
2318         int ret = 0;
2319
2320         path = btrfs_alloc_path();
2321         if (!path)
2322                 return -ENOMEM;
2323
2324         trans = btrfs_start_transaction(root, 1);
2325         if (IS_ERR(trans)) {
2326                 btrfs_free_path(path);
2327                 return PTR_ERR(trans);
2328         }
2329
2330
2331         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2332                 (unsigned long long)backref->dir,
2333                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2334                 (unsigned long long)root->objectid);
2335
2336         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2337                                     backref->name, backref->namelen,
2338                                     backref->index, -1);
2339         if (IS_ERR(di)) {
2340                 ret = PTR_ERR(di);
2341                 btrfs_free_path(path);
2342                 btrfs_commit_transaction(trans, root);
2343                 if (ret == -ENOENT)
2344                         return 0;
2345                 return ret;
2346         }
2347
2348         if (!di)
2349                 ret = btrfs_del_item(trans, root, path);
2350         else
2351                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2352         BUG_ON(ret);
2353         btrfs_free_path(path);
2354         btrfs_commit_transaction(trans, root);
2355         return ret;
2356 }
2357
2358 static int create_inode_item(struct btrfs_root *root,
2359                              struct inode_record *rec,
2360                              struct inode_backref *backref, int root_dir)
2361 {
2362         struct btrfs_trans_handle *trans;
2363         struct btrfs_inode_item inode_item;
2364         time_t now = time(NULL);
2365         int ret;
2366
2367         trans = btrfs_start_transaction(root, 1);
2368         if (IS_ERR(trans)) {
2369                 ret = PTR_ERR(trans);
2370                 return ret;
2371         }
2372
2373         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2374                 "be incomplete, please check permissions and content after "
2375                 "the fsck completes.\n", (unsigned long long)root->objectid,
2376                 (unsigned long long)rec->ino);
2377
2378         memset(&inode_item, 0, sizeof(inode_item));
2379         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2380         if (root_dir)
2381                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2382         else
2383                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2384         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2385         if (rec->found_dir_item) {
2386                 if (rec->found_file_extent)
2387                         fprintf(stderr, "root %llu inode %llu has both a dir "
2388                                 "item and extents, unsure if it is a dir or a "
2389                                 "regular file so setting it as a directory\n",
2390                                 (unsigned long long)root->objectid,
2391                                 (unsigned long long)rec->ino);
2392                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2393                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2394         } else if (!rec->found_dir_item) {
2395                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2396                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2397         }
2398         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2399         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2400         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2401         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2402         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2403         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2404         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2405         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2406
2407         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2408         BUG_ON(ret);
2409         btrfs_commit_transaction(trans, root);
2410         return 0;
2411 }
2412
2413 static int repair_inode_backrefs(struct btrfs_root *root,
2414                                  struct inode_record *rec,
2415                                  struct cache_tree *inode_cache,
2416                                  int delete)
2417 {
2418         struct inode_backref *tmp, *backref;
2419         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2420         int ret = 0;
2421         int repaired = 0;
2422
2423         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2424                 if (!delete && rec->ino == root_dirid) {
2425                         if (!rec->found_inode_item) {
2426                                 ret = create_inode_item(root, rec, backref, 1);
2427                                 if (ret)
2428                                         break;
2429                                 repaired++;
2430                         }
2431                 }
2432
2433                 /* Index 0 for root dir's are special, don't mess with it */
2434                 if (rec->ino == root_dirid && backref->index == 0)
2435                         continue;
2436
2437                 if (delete &&
2438                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2439                      (backref->found_dir_index && backref->found_inode_ref &&
2440                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2441                         ret = delete_dir_index(root, inode_cache, rec, backref);
2442                         if (ret)
2443                                 break;
2444                         repaired++;
2445                         list_del(&backref->list);
2446                         free(backref);
2447                 }
2448
2449                 if (!delete && !backref->found_dir_index &&
2450                     backref->found_dir_item && backref->found_inode_ref) {
2451                         ret = add_missing_dir_index(root, inode_cache, rec,
2452                                                     backref);
2453                         if (ret)
2454                                 break;
2455                         repaired++;
2456                         if (backref->found_dir_item &&
2457                             backref->found_dir_index &&
2458                             backref->found_dir_index) {
2459                                 if (!backref->errors &&
2460                                     backref->found_inode_ref) {
2461                                         list_del(&backref->list);
2462                                         free(backref);
2463                                 }
2464                         }
2465                 }
2466
2467                 if (!delete && (!backref->found_dir_index &&
2468                                 !backref->found_dir_item &&
2469                                 backref->found_inode_ref)) {
2470                         struct btrfs_trans_handle *trans;
2471                         struct btrfs_key location;
2472
2473                         ret = check_dir_conflict(root, backref->name,
2474                                                  backref->namelen,
2475                                                  backref->dir,
2476                                                  backref->index);
2477                         if (ret) {
2478                                 /*
2479                                  * let nlink fixing routine to handle it,
2480                                  * which can do it better.
2481                                  */
2482                                 ret = 0;
2483                                 break;
2484                         }
2485                         location.objectid = rec->ino;
2486                         location.type = BTRFS_INODE_ITEM_KEY;
2487                         location.offset = 0;
2488
2489                         trans = btrfs_start_transaction(root, 1);
2490                         if (IS_ERR(trans)) {
2491                                 ret = PTR_ERR(trans);
2492                                 break;
2493                         }
2494                         fprintf(stderr, "adding missing dir index/item pair "
2495                                 "for inode %llu\n",
2496                                 (unsigned long long)rec->ino);
2497                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2498                                                     backref->namelen,
2499                                                     backref->dir, &location,
2500                                                     imode_to_type(rec->imode),
2501                                                     backref->index);
2502                         BUG_ON(ret);
2503                         btrfs_commit_transaction(trans, root);
2504                         repaired++;
2505                 }
2506
2507                 if (!delete && (backref->found_inode_ref &&
2508                                 backref->found_dir_index &&
2509                                 backref->found_dir_item &&
2510                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2511                                 !rec->found_inode_item)) {
2512                         ret = create_inode_item(root, rec, backref, 0);
2513                         if (ret)
2514                                 break;
2515                         repaired++;
2516                 }
2517
2518         }
2519         return ret ? ret : repaired;
2520 }
2521
2522 /*
2523  * To determine the file type for nlink/inode_item repair
2524  *
2525  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2526  * Return -ENOENT if file type is not found.
2527  */
2528 static int find_file_type(struct inode_record *rec, u8 *type)
2529 {
2530         struct inode_backref *backref;
2531
2532         /* For inode item recovered case */
2533         if (rec->found_inode_item) {
2534                 *type = imode_to_type(rec->imode);
2535                 return 0;
2536         }
2537
2538         list_for_each_entry(backref, &rec->backrefs, list) {
2539                 if (backref->found_dir_index || backref->found_dir_item) {
2540                         *type = backref->filetype;
2541                         return 0;
2542                 }
2543         }
2544         return -ENOENT;
2545 }
2546
2547 /*
2548  * To determine the file name for nlink repair
2549  *
2550  * Return 0 if file name is found, set name and namelen.
2551  * Return -ENOENT if file name is not found.
2552  */
2553 static int find_file_name(struct inode_record *rec,
2554                           char *name, int *namelen)
2555 {
2556         struct inode_backref *backref;
2557
2558         list_for_each_entry(backref, &rec->backrefs, list) {
2559                 if (backref->found_dir_index || backref->found_dir_item ||
2560                     backref->found_inode_ref) {
2561                         memcpy(name, backref->name, backref->namelen);
2562                         *namelen = backref->namelen;
2563                         return 0;
2564                 }
2565         }
2566         return -ENOENT;
2567 }
2568
2569 /* Reset the nlink of the inode to the correct one */
2570 static int reset_nlink(struct btrfs_trans_handle *trans,
2571                        struct btrfs_root *root,
2572                        struct btrfs_path *path,
2573                        struct inode_record *rec)
2574 {
2575         struct inode_backref *backref;
2576         struct inode_backref *tmp;
2577         struct btrfs_key key;
2578         struct btrfs_inode_item *inode_item;
2579         int ret = 0;
2580
2581         /* We don't believe this either, reset it and iterate backref */
2582         rec->found_link = 0;
2583
2584         /* Remove all backref including the valid ones */
2585         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2586                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2587                                    backref->index, backref->name,
2588                                    backref->namelen, 0);
2589                 if (ret < 0)
2590                         goto out;
2591
2592                 /* remove invalid backref, so it won't be added back */
2593                 if (!(backref->found_dir_index &&
2594                       backref->found_dir_item &&
2595                       backref->found_inode_ref)) {
2596                         list_del(&backref->list);
2597                         free(backref);
2598                 } else {
2599                         rec->found_link++;
2600                 }
2601         }
2602
2603         /* Set nlink to 0 */
2604         key.objectid = rec->ino;
2605         key.type = BTRFS_INODE_ITEM_KEY;
2606         key.offset = 0;
2607         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2608         if (ret < 0)
2609                 goto out;
2610         if (ret > 0) {
2611                 ret = -ENOENT;
2612                 goto out;
2613         }
2614         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2615                                     struct btrfs_inode_item);
2616         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2617         btrfs_mark_buffer_dirty(path->nodes[0]);
2618         btrfs_release_path(path);
2619
2620         /*
2621          * Add back valid inode_ref/dir_item/dir_index,
2622          * add_link() will handle the nlink inc, so new nlink must be correct
2623          */
2624         list_for_each_entry(backref, &rec->backrefs, list) {
2625                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2626                                      backref->name, backref->namelen,
2627                                      backref->filetype, &backref->index, 1);
2628                 if (ret < 0)
2629                         goto out;
2630         }
2631 out:
2632         btrfs_release_path(path);
2633         return ret;
2634 }
2635
2636 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2637                                struct btrfs_root *root,
2638                                struct btrfs_path *path,
2639                                struct inode_record *rec)
2640 {
2641         char *dir_name = "lost+found";
2642         char namebuf[BTRFS_NAME_LEN] = {0};
2643         u64 lost_found_ino;
2644         u32 mode = 0700;
2645         u8 type = 0;
2646         int namelen = 0;
2647         int name_recovered = 0;
2648         int type_recovered = 0;
2649         int ret = 0;
2650
2651         /*
2652          * Get file name and type first before these invalid inode ref
2653          * are deleted by remove_all_invalid_backref()
2654          */
2655         name_recovered = !find_file_name(rec, namebuf, &namelen);
2656         type_recovered = !find_file_type(rec, &type);
2657
2658         if (!name_recovered) {
2659                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2660                        rec->ino, rec->ino);
2661                 namelen = count_digits(rec->ino);
2662                 sprintf(namebuf, "%llu", rec->ino);
2663                 name_recovered = 1;
2664         }
2665         if (!type_recovered) {
2666                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2667                        rec->ino);
2668                 type = BTRFS_FT_REG_FILE;
2669                 type_recovered = 1;
2670         }
2671
2672         ret = reset_nlink(trans, root, path, rec);
2673         if (ret < 0) {
2674                 fprintf(stderr,
2675                         "Failed to reset nlink for inode %llu: %s\n",
2676                         rec->ino, strerror(-ret));
2677                 goto out;
2678         }
2679
2680         if (rec->found_link == 0) {
2681                 lost_found_ino = root->highest_inode;
2682                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2683                         ret = -EOVERFLOW;
2684                         goto out;
2685                 }
2686                 lost_found_ino++;
2687                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2688                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2689                                   mode);
2690                 if (ret < 0) {
2691                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2692                                 dir_name, strerror(-ret));
2693                         goto out;
2694                 }
2695                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2696                                      namebuf, namelen, type, NULL, 1);
2697                 /*
2698                  * Add ".INO" suffix several times to handle case where
2699                  * "FILENAME.INO" is already taken by another file.
2700                  */
2701                 while (ret == -EEXIST) {
2702                         /*
2703                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2704                          */
2705                         if (namelen + count_digits(rec->ino) + 1 >
2706                             BTRFS_NAME_LEN) {
2707                                 ret = -EFBIG;
2708                                 goto out;
2709                         }
2710                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2711                                  ".%llu", rec->ino);
2712                         namelen += count_digits(rec->ino) + 1;
2713                         ret = btrfs_add_link(trans, root, rec->ino,
2714                                              lost_found_ino, namebuf,
2715                                              namelen, type, NULL, 1);
2716                 }
2717                 if (ret < 0) {
2718                         fprintf(stderr,
2719                                 "Failed to link the inode %llu to %s dir: %s\n",
2720                                 rec->ino, dir_name, strerror(-ret));
2721                         goto out;
2722                 }
2723                 /*
2724                  * Just increase the found_link, don't actually add the
2725                  * backref. This will make things easier and this inode
2726                  * record will be freed after the repair is done.
2727                  * So fsck will not report problem about this inode.
2728                  */
2729                 rec->found_link++;
2730                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2731                        namelen, namebuf, dir_name);
2732         }
2733         printf("Fixed the nlink of inode %llu\n", rec->ino);
2734 out:
2735         /*
2736          * Clear the flag anyway, or we will loop forever for the same inode
2737          * as it will not be removed from the bad inode list and the dead loop
2738          * happens.
2739          */
2740         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2741         btrfs_release_path(path);
2742         return ret;
2743 }
2744
2745 /*
2746  * Check if there is any normal(reg or prealloc) file extent for given
2747  * ino.
2748  * This is used to determine the file type when neither its dir_index/item or
2749  * inode_item exists.
2750  *
2751  * This will *NOT* report error, if any error happens, just consider it does
2752  * not have any normal file extent.
2753  */
2754 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2755 {
2756         struct btrfs_path *path;
2757         struct btrfs_key key;
2758         struct btrfs_key found_key;
2759         struct btrfs_file_extent_item *fi;
2760         u8 type;
2761         int ret = 0;
2762
2763         path = btrfs_alloc_path();
2764         if (!path)
2765                 goto out;
2766         key.objectid = ino;
2767         key.type = BTRFS_EXTENT_DATA_KEY;
2768         key.offset = 0;
2769
2770         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2771         if (ret < 0) {
2772                 ret = 0;
2773                 goto out;
2774         }
2775         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2776                 ret = btrfs_next_leaf(root, path);
2777                 if (ret) {
2778                         ret = 0;
2779                         goto out;
2780                 }
2781         }
2782         while (1) {
2783                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2784                                       path->slots[0]);
2785                 if (found_key.objectid != ino ||
2786                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2787                         break;
2788                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2789                                     struct btrfs_file_extent_item);
2790                 type = btrfs_file_extent_type(path->nodes[0], fi);
2791                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2792                         ret = 1;
2793                         goto out;
2794                 }
2795         }
2796 out:
2797         btrfs_free_path(path);
2798         return ret;
2799 }
2800
2801 static u32 btrfs_type_to_imode(u8 type)
2802 {
2803         static u32 imode_by_btrfs_type[] = {
2804                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2805                 [BTRFS_FT_DIR]          = S_IFDIR,
2806                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2807                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2808                 [BTRFS_FT_FIFO]         = S_IFIFO,
2809                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2810                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2811         };
2812
2813         return imode_by_btrfs_type[(type)];
2814 }
2815
2816 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2817                                 struct btrfs_root *root,
2818                                 struct btrfs_path *path,
2819                                 struct inode_record *rec)
2820 {
2821         u8 filetype;
2822         u32 mode = 0700;
2823         int type_recovered = 0;
2824         int ret = 0;
2825
2826         printf("Trying to rebuild inode:%llu\n", rec->ino);
2827
2828         type_recovered = !find_file_type(rec, &filetype);
2829
2830         /*
2831          * Try to determine inode type if type not found.
2832          *
2833          * For found regular file extent, it must be FILE.
2834          * For found dir_item/index, it must be DIR.
2835          *
2836          * For undetermined one, use FILE as fallback.
2837          *
2838          * TODO:
2839          * 1. If found backref(inode_index/item is already handled) to it,
2840          *    it must be DIR.
2841          *    Need new inode-inode ref structure to allow search for that.
2842          */
2843         if (!type_recovered) {
2844                 if (rec->found_file_extent &&
2845                     find_normal_file_extent(root, rec->ino)) {
2846                         type_recovered = 1;
2847                         filetype = BTRFS_FT_REG_FILE;
2848                 } else if (rec->found_dir_item) {
2849                         type_recovered = 1;
2850                         filetype = BTRFS_FT_DIR;
2851                 } else if (!list_empty(&rec->orphan_extents)) {
2852                         type_recovered = 1;
2853                         filetype = BTRFS_FT_REG_FILE;
2854                 } else{
2855                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2856                                rec->ino);
2857                         type_recovered = 1;
2858                         filetype = BTRFS_FT_REG_FILE;
2859                 }
2860         }
2861
2862         ret = btrfs_new_inode(trans, root, rec->ino,
2863                               mode | btrfs_type_to_imode(filetype));
2864         if (ret < 0)
2865                 goto out;
2866
2867         /*
2868          * Here inode rebuild is done, we only rebuild the inode item,
2869          * don't repair the nlink(like move to lost+found).
2870          * That is the job of nlink repair.
2871          *
2872          * We just fill the record and return
2873          */
2874         rec->found_dir_item = 1;
2875         rec->imode = mode | btrfs_type_to_imode(filetype);
2876         rec->nlink = 0;
2877         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2878         /* Ensure the inode_nlinks repair function will be called */
2879         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2880 out:
2881         return ret;
2882 }
2883
2884 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2885                                       struct btrfs_root *root,
2886                                       struct btrfs_path *path,
2887                                       struct inode_record *rec)
2888 {
2889         struct orphan_data_extent *orphan;
2890         struct orphan_data_extent *tmp;
2891         int ret = 0;
2892
2893         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2894                 /*
2895                  * Check for conflicting file extents
2896                  *
2897                  * Here we don't know whether the extents is compressed or not,
2898                  * so we can only assume it not compressed nor data offset,
2899                  * and use its disk_len as extent length.
2900                  */
2901                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2902                                        orphan->offset, orphan->disk_len, 0);
2903                 btrfs_release_path(path);
2904                 if (ret < 0)
2905                         goto out;
2906                 if (!ret) {
2907                         fprintf(stderr,
2908                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2909                                 orphan->disk_bytenr, orphan->disk_len);
2910                         ret = btrfs_free_extent(trans,
2911                                         root->fs_info->extent_root,
2912                                         orphan->disk_bytenr, orphan->disk_len,
2913                                         0, root->objectid, orphan->objectid,
2914                                         orphan->offset);
2915                         if (ret < 0)
2916                                 goto out;
2917                 }
2918                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2919                                 orphan->offset, orphan->disk_bytenr,
2920                                 orphan->disk_len, orphan->disk_len);
2921                 if (ret < 0)
2922                         goto out;
2923
2924                 /* Update file size info */
2925                 rec->found_size += orphan->disk_len;
2926                 if (rec->found_size == rec->nbytes)
2927                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2928
2929                 /* Update the file extent hole info too */
2930                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2931                                            orphan->disk_len);
2932                 if (ret < 0)
2933                         goto out;
2934                 if (RB_EMPTY_ROOT(&rec->holes))
2935                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2936
2937                 list_del(&orphan->list);
2938                 free(orphan);
2939         }
2940         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2941 out:
2942         return ret;
2943 }
2944
2945 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2946                                         struct btrfs_root *root,
2947                                         struct btrfs_path *path,
2948                                         struct inode_record *rec)
2949 {
2950         struct rb_node *node;
2951         struct file_extent_hole *hole;
2952         int found = 0;
2953         int ret = 0;
2954
2955         node = rb_first(&rec->holes);
2956
2957         while (node) {
2958                 found = 1;
2959                 hole = rb_entry(node, struct file_extent_hole, node);
2960                 ret = btrfs_punch_hole(trans, root, rec->ino,
2961                                        hole->start, hole->len);
2962                 if (ret < 0)
2963                         goto out;
2964                 ret = del_file_extent_hole(&rec->holes, hole->start,
2965                                            hole->len);
2966                 if (ret < 0)
2967                         goto out;
2968                 if (RB_EMPTY_ROOT(&rec->holes))
2969                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2970                 node = rb_first(&rec->holes);
2971         }
2972         /* special case for a file losing all its file extent */
2973         if (!found) {
2974                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2975                                        round_up(rec->isize, root->sectorsize));
2976                 if (ret < 0)
2977                         goto out;
2978         }
2979         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2980                rec->ino, root->objectid);
2981 out:
2982         return ret;
2983 }
2984
2985 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2986 {
2987         struct btrfs_trans_handle *trans;
2988         struct btrfs_path *path;
2989         int ret = 0;
2990
2991         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2992                              I_ERR_NO_ORPHAN_ITEM |
2993                              I_ERR_LINK_COUNT_WRONG |
2994                              I_ERR_NO_INODE_ITEM |
2995                              I_ERR_FILE_EXTENT_ORPHAN |
2996                              I_ERR_FILE_EXTENT_DISCOUNT|
2997                              I_ERR_FILE_NBYTES_WRONG)))
2998                 return rec->errors;
2999
3000         path = btrfs_alloc_path();
3001         if (!path)
3002                 return -ENOMEM;
3003
3004         /*
3005          * For nlink repair, it may create a dir and add link, so
3006          * 2 for parent(256)'s dir_index and dir_item
3007          * 2 for lost+found dir's inode_item and inode_ref
3008          * 1 for the new inode_ref of the file
3009          * 2 for lost+found dir's dir_index and dir_item for the file
3010          */
3011         trans = btrfs_start_transaction(root, 7);
3012         if (IS_ERR(trans)) {
3013                 btrfs_free_path(path);
3014                 return PTR_ERR(trans);
3015         }
3016
3017         if (rec->errors & I_ERR_NO_INODE_ITEM)
3018                 ret = repair_inode_no_item(trans, root, path, rec);
3019         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3020                 ret = repair_inode_orphan_extent(trans, root, path, rec);
3021         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3022                 ret = repair_inode_discount_extent(trans, root, path, rec);
3023         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3024                 ret = repair_inode_isize(trans, root, path, rec);
3025         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3026                 ret = repair_inode_orphan_item(trans, root, path, rec);
3027         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3028                 ret = repair_inode_nlinks(trans, root, path, rec);
3029         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3030                 ret = repair_inode_nbytes(trans, root, path, rec);
3031         btrfs_commit_transaction(trans, root);
3032         btrfs_free_path(path);
3033         return ret;
3034 }
3035
3036 static int check_inode_recs(struct btrfs_root *root,
3037                             struct cache_tree *inode_cache)
3038 {
3039         struct cache_extent *cache;
3040         struct ptr_node *node;
3041         struct inode_record *rec;
3042         struct inode_backref *backref;
3043         int stage = 0;
3044         int ret = 0;
3045         int err = 0;
3046         u64 error = 0;
3047         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3048
3049         if (btrfs_root_refs(&root->root_item) == 0) {
3050                 if (!cache_tree_empty(inode_cache))
3051                         fprintf(stderr, "warning line %d\n", __LINE__);
3052                 return 0;
3053         }
3054
3055         /*
3056          * We need to record the highest inode number for later 'lost+found'
3057          * dir creation.
3058          * We must select an ino not used/referred by any existing inode, or
3059          * 'lost+found' ino may be a missing ino in a corrupted leaf,
3060          * this may cause 'lost+found' dir has wrong nlinks.
3061          */
3062         cache = last_cache_extent(inode_cache);
3063         if (cache) {
3064                 node = container_of(cache, struct ptr_node, cache);
3065                 rec = node->data;
3066                 if (rec->ino > root->highest_inode)
3067                         root->highest_inode = rec->ino;
3068         }
3069
3070         /*
3071          * We need to repair backrefs first because we could change some of the
3072          * errors in the inode recs.
3073          *
3074          * We also need to go through and delete invalid backrefs first and then
3075          * add the correct ones second.  We do this because we may get EEXIST
3076          * when adding back the correct index because we hadn't yet deleted the
3077          * invalid index.
3078          *
3079          * For example, if we were missing a dir index then the directories
3080          * isize would be wrong, so if we fixed the isize to what we thought it
3081          * would be and then fixed the backref we'd still have a invalid fs, so
3082          * we need to add back the dir index and then check to see if the isize
3083          * is still wrong.
3084          */
3085         while (stage < 3) {
3086                 stage++;
3087                 if (stage == 3 && !err)
3088                         break;
3089
3090                 cache = search_cache_extent(inode_cache, 0);
3091                 while (repair && cache) {
3092                         node = container_of(cache, struct ptr_node, cache);
3093                         rec = node->data;
3094                         cache = next_cache_extent(cache);
3095
3096                         /* Need to free everything up and rescan */
3097                         if (stage == 3) {
3098                                 remove_cache_extent(inode_cache, &node->cache);
3099                                 free(node);
3100                                 free_inode_rec(rec);
3101                                 continue;
3102                         }
3103
3104                         if (list_empty(&rec->backrefs))
3105                                 continue;
3106
3107                         ret = repair_inode_backrefs(root, rec, inode_cache,
3108                                                     stage == 1);
3109                         if (ret < 0) {
3110                                 err = ret;
3111                                 stage = 2;
3112                                 break;
3113                         } if (ret > 0) {
3114                                 err = -EAGAIN;
3115                         }
3116                 }
3117         }
3118         if (err)
3119                 return err;
3120
3121         rec = get_inode_rec(inode_cache, root_dirid, 0);
3122         BUG_ON(IS_ERR(rec));
3123         if (rec) {
3124                 ret = check_root_dir(rec);
3125                 if (ret) {
3126                         fprintf(stderr, "root %llu root dir %llu error\n",
3127                                 (unsigned long long)root->root_key.objectid,
3128                                 (unsigned long long)root_dirid);
3129                         print_inode_error(root, rec);
3130                         error++;
3131                 }
3132         } else {
3133                 if (repair) {
3134                         struct btrfs_trans_handle *trans;
3135
3136                         trans = btrfs_start_transaction(root, 1);
3137                         if (IS_ERR(trans)) {
3138                                 err = PTR_ERR(trans);
3139                                 return err;
3140                         }
3141
3142                         fprintf(stderr,
3143                                 "root %llu missing its root dir, recreating\n",
3144                                 (unsigned long long)root->objectid);
3145
3146                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3147                         BUG_ON(ret);
3148
3149                         btrfs_commit_transaction(trans, root);
3150                         return -EAGAIN;
3151                 }
3152
3153                 fprintf(stderr, "root %llu root dir %llu not found\n",
3154                         (unsigned long long)root->root_key.objectid,
3155                         (unsigned long long)root_dirid);
3156         }
3157
3158         while (1) {
3159                 cache = search_cache_extent(inode_cache, 0);
3160                 if (!cache)
3161                         break;
3162                 node = container_of(cache, struct ptr_node, cache);
3163                 rec = node->data;
3164                 remove_cache_extent(inode_cache, &node->cache);
3165                 free(node);
3166                 if (rec->ino == root_dirid ||
3167                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3168                         free_inode_rec(rec);
3169                         continue;
3170                 }
3171
3172                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3173                         ret = check_orphan_item(root, rec->ino);
3174                         if (ret == 0)
3175                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3176                         if (can_free_inode_rec(rec)) {
3177                                 free_inode_rec(rec);
3178                                 continue;
3179                         }
3180                 }
3181
3182                 if (!rec->found_inode_item)
3183                         rec->errors |= I_ERR_NO_INODE_ITEM;
3184                 if (rec->found_link != rec->nlink)
3185                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3186                 if (repair) {
3187                         ret = try_repair_inode(root, rec);
3188                         if (ret == 0 && can_free_inode_rec(rec)) {
3189                                 free_inode_rec(rec);
3190                                 continue;
3191                         }
3192                         ret = 0;
3193                 }
3194
3195                 if (!(repair && ret == 0))
3196                         error++;
3197                 print_inode_error(root, rec);
3198                 list_for_each_entry(backref, &rec->backrefs, list) {
3199                         if (!backref->found_dir_item)
3200                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3201                         if (!backref->found_dir_index)
3202                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3203                         if (!backref->found_inode_ref)
3204                                 backref->errors |= REF_ERR_NO_INODE_REF;
3205                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3206                                 " namelen %u name %s filetype %d errors %x",
3207                                 (unsigned long long)backref->dir,
3208                                 (unsigned long long)backref->index,
3209                                 backref->namelen, backref->name,
3210                                 backref->filetype, backref->errors);
3211                         print_ref_error(backref->errors);
3212                 }
3213                 free_inode_rec(rec);
3214         }
3215         return (error > 0) ? -1 : 0;
3216 }
3217
3218 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3219                                         u64 objectid)
3220 {
3221         struct cache_extent *cache;
3222         struct root_record *rec = NULL;
3223         int ret;
3224
3225         cache = lookup_cache_extent(root_cache, objectid, 1);
3226         if (cache) {
3227                 rec = container_of(cache, struct root_record, cache);
3228         } else {
3229                 rec = calloc(1, sizeof(*rec));
3230                 if (!rec)
3231                         return ERR_PTR(-ENOMEM);
3232                 rec->objectid = objectid;
3233                 INIT_LIST_HEAD(&rec->backrefs);
3234                 rec->cache.start = objectid;
3235                 rec->cache.size = 1;
3236
3237                 ret = insert_cache_extent(root_cache, &rec->cache);
3238                 if (ret)
3239                         return ERR_PTR(-EEXIST);
3240         }
3241         return rec;
3242 }
3243
3244 static struct root_backref *get_root_backref(struct root_record *rec,
3245                                              u64 ref_root, u64 dir, u64 index,
3246                                              const char *name, int namelen)
3247 {
3248         struct root_backref *backref;
3249
3250         list_for_each_entry(backref, &rec->backrefs, list) {
3251                 if (backref->ref_root != ref_root || backref->dir != dir ||
3252                     backref->namelen != namelen)
3253                         continue;
3254                 if (memcmp(name, backref->name, namelen))
3255                         continue;
3256                 return backref;
3257         }
3258
3259         backref = calloc(1, sizeof(*backref) + namelen + 1);
3260         if (!backref)
3261                 return NULL;
3262         backref->ref_root = ref_root;
3263         backref->dir = dir;
3264         backref->index = index;
3265         backref->namelen = namelen;
3266         memcpy(backref->name, name, namelen);
3267         backref->name[namelen] = '\0';
3268         list_add_tail(&backref->list, &rec->backrefs);
3269         return backref;
3270 }
3271
3272 static void free_root_record(struct cache_extent *cache)
3273 {
3274         struct root_record *rec;
3275         struct root_backref *backref;
3276
3277         rec = container_of(cache, struct root_record, cache);
3278         while (!list_empty(&rec->backrefs)) {
3279                 backref = to_root_backref(rec->backrefs.next);
3280                 list_del(&backref->list);
3281                 free(backref);
3282         }
3283
3284         kfree(rec);
3285 }
3286
3287 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3288
3289 static int add_root_backref(struct cache_tree *root_cache,
3290                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3291                             const char *name, int namelen,
3292                             int item_type, int errors)
3293 {
3294         struct root_record *rec;
3295         struct root_backref *backref;
3296
3297         rec = get_root_rec(root_cache, root_id);
3298         BUG_ON(IS_ERR(rec));
3299         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3300         BUG_ON(!backref);
3301
3302         backref->errors |= errors;
3303
3304         if (item_type != BTRFS_DIR_ITEM_KEY) {
3305                 if (backref->found_dir_index || backref->found_back_ref ||
3306                     backref->found_forward_ref) {
3307                         if (backref->index != index)
3308                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3309                 } else {
3310                         backref->index = index;
3311                 }
3312         }
3313
3314         if (item_type == BTRFS_DIR_ITEM_KEY) {
3315                 if (backref->found_forward_ref)
3316                         rec->found_ref++;
3317                 backref->found_dir_item = 1;
3318         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3319                 backref->found_dir_index = 1;
3320         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3321                 if (backref->found_forward_ref)
3322                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3323                 else if (backref->found_dir_item)
3324                         rec->found_ref++;
3325                 backref->found_forward_ref = 1;
3326         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3327                 if (backref->found_back_ref)
3328                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3329                 backref->found_back_ref = 1;
3330         } else {
3331                 BUG_ON(1);
3332         }
3333
3334         if (backref->found_forward_ref && backref->found_dir_item)
3335                 backref->reachable = 1;
3336         return 0;
3337 }
3338
3339 static int merge_root_recs(struct btrfs_root *root,
3340                            struct cache_tree *src_cache,
3341                            struct cache_tree *dst_cache)
3342 {
3343         struct cache_extent *cache;
3344         struct ptr_node *node;
3345         struct inode_record *rec;
3346         struct inode_backref *backref;
3347         int ret = 0;
3348
3349         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3350                 free_inode_recs_tree(src_cache);
3351                 return 0;
3352         }
3353
3354         while (1) {
3355                 cache = search_cache_extent(src_cache, 0);
3356                 if (!cache)
3357                         break;
3358                 node = container_of(cache, struct ptr_node, cache);
3359                 rec = node->data;
3360                 remove_cache_extent(src_cache, &node->cache);
3361                 free(node);
3362
3363                 ret = is_child_root(root, root->objectid, rec->ino);
3364                 if (ret < 0)
3365                         break;
3366                 else if (ret == 0)
3367                         goto skip;
3368
3369                 list_for_each_entry(backref, &rec->backrefs, list) {
3370                         BUG_ON(backref->found_inode_ref);
3371                         if (backref->found_dir_item)
3372                                 add_root_backref(dst_cache, rec->ino,
3373                                         root->root_key.objectid, backref->dir,
3374                                         backref->index, backref->name,
3375                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3376                                         backref->errors);
3377                         if (backref->found_dir_index)
3378                                 add_root_backref(dst_cache, rec->ino,
3379                                         root->root_key.objectid, backref->dir,
3380                                         backref->index, backref->name,
3381                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3382                                         backref->errors);
3383                 }
3384 skip:
3385                 free_inode_rec(rec);
3386         }
3387         if (ret < 0)
3388                 return ret;
3389         return 0;
3390 }
3391
3392 static int check_root_refs(struct btrfs_root *root,
3393                            struct cache_tree *root_cache)
3394 {
3395         struct root_record *rec;
3396         struct root_record *ref_root;
3397         struct root_backref *backref;
3398         struct cache_extent *cache;
3399         int loop = 1;
3400         int ret;
3401         int error;
3402         int errors = 0;
3403
3404         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3405         BUG_ON(IS_ERR(rec));
3406         rec->found_ref = 1;
3407
3408         /* fixme: this can not detect circular references */
3409         while (loop) {
3410                 loop = 0;
3411                 cache = search_cache_extent(root_cache, 0);
3412                 while (1) {
3413                         if (!cache)
3414                                 break;
3415                         rec = container_of(cache, struct root_record, cache);
3416                         cache = next_cache_extent(cache);
3417
3418                         if (rec->found_ref == 0)
3419                                 continue;
3420
3421                         list_for_each_entry(backref, &rec->backrefs, list) {
3422                                 if (!backref->reachable)
3423                                         continue;
3424
3425                                 ref_root = get_root_rec(root_cache,
3426                                                         backref->ref_root);
3427                                 BUG_ON(IS_ERR(ref_root));
3428                                 if (ref_root->found_ref > 0)
3429                                         continue;
3430
3431                                 backref->reachable = 0;
3432                                 rec->found_ref--;
3433                                 if (rec->found_ref == 0)
3434                                         loop = 1;
3435                         }
3436                 }
3437         }
3438
3439         cache = search_cache_extent(root_cache, 0);
3440         while (1) {
3441                 if (!cache)
3442                         break;
3443                 rec = container_of(cache, struct root_record, cache);
3444                 cache = next_cache_extent(cache);
3445
3446                 if (rec->found_ref == 0 &&
3447                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3448                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3449                         ret = check_orphan_item(root->fs_info->tree_root,
3450                                                 rec->objectid);
3451                         if (ret == 0)
3452                                 continue;
3453
3454                         /*
3455                          * If we don't have a root item then we likely just have
3456                          * a dir item in a snapshot for this root but no actual
3457                          * ref key or anything so it's meaningless.
3458                          */
3459                         if (!rec->found_root_item)
3460                                 continue;
3461                         errors++;
3462                         fprintf(stderr, "fs tree %llu not referenced\n",
3463                                 (unsigned long long)rec->objectid);
3464                 }
3465
3466                 error = 0;
3467                 if (rec->found_ref > 0 && !rec->found_root_item)
3468                         error = 1;
3469                 list_for_each_entry(backref, &rec->backrefs, list) {
3470                         if (!backref->found_dir_item)
3471                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3472                         if (!backref->found_dir_index)
3473                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3474                         if (!backref->found_back_ref)
3475                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3476                         if (!backref->found_forward_ref)
3477                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3478                         if (backref->reachable && backref->errors)
3479                                 error = 1;
3480                 }
3481                 if (!error)
3482                         continue;
3483
3484                 errors++;
3485                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3486                         (unsigned long long)rec->objectid, rec->found_ref,
3487                          rec->found_root_item ? "" : "not found");
3488
3489                 list_for_each_entry(backref, &rec->backrefs, list) {
3490                         if (!backref->reachable)
3491                                 continue;
3492                         if (!backref->errors && rec->found_root_item)
3493                                 continue;
3494                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3495                                 " index %llu namelen %u name %s errors %x\n",
3496                                 (unsigned long long)backref->ref_root,
3497                                 (unsigned long long)backref->dir,
3498                                 (unsigned long long)backref->index,
3499                                 backref->namelen, backref->name,
3500                                 backref->errors);
3501                         print_ref_error(backref->errors);
3502                 }
3503         }
3504         return errors > 0 ? 1 : 0;
3505 }
3506
3507 static int process_root_ref(struct extent_buffer *eb, int slot,
3508                             struct btrfs_key *key,
3509                             struct cache_tree *root_cache)
3510 {
3511         u64 dirid;
3512         u64 index;
3513         u32 len;
3514         u32 name_len;
3515         struct btrfs_root_ref *ref;
3516         char namebuf[BTRFS_NAME_LEN];
3517         int error;
3518
3519         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3520
3521         dirid = btrfs_root_ref_dirid(eb, ref);
3522         index = btrfs_root_ref_sequence(eb, ref);
3523         name_len = btrfs_root_ref_name_len(eb, ref);
3524
3525         if (name_len <= BTRFS_NAME_LEN) {
3526                 len = name_len;
3527                 error = 0;
3528         } else {
3529                 len = BTRFS_NAME_LEN;
3530                 error = REF_ERR_NAME_TOO_LONG;
3531         }
3532         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3533
3534         if (key->type == BTRFS_ROOT_REF_KEY) {
3535                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3536                                  index, namebuf, len, key->type, error);
3537         } else {
3538                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3539                                  index, namebuf, len, key->type, error);
3540         }
3541         return 0;
3542 }
3543
3544 static void free_corrupt_block(struct cache_extent *cache)
3545 {
3546         struct btrfs_corrupt_block *corrupt;
3547
3548         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3549         free(corrupt);
3550 }
3551
3552 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3553
3554 /*
3555  * Repair the btree of the given root.
3556  *
3557  * The fix is to remove the node key in corrupt_blocks cache_tree.
3558  * and rebalance the tree.
3559  * After the fix, the btree should be writeable.
3560  */
3561 static int repair_btree(struct btrfs_root *root,
3562                         struct cache_tree *corrupt_blocks)
3563 {
3564         struct btrfs_trans_handle *trans;
3565         struct btrfs_path *path;
3566         struct btrfs_corrupt_block *corrupt;
3567         struct cache_extent *cache;
3568         struct btrfs_key key;
3569         u64 offset;
3570         int level;
3571         int ret = 0;
3572
3573         if (cache_tree_empty(corrupt_blocks))
3574                 return 0;
3575
3576         path = btrfs_alloc_path();
3577         if (!path)
3578                 return -ENOMEM;
3579
3580         trans = btrfs_start_transaction(root, 1);
3581         if (IS_ERR(trans)) {
3582                 ret = PTR_ERR(trans);
3583                 fprintf(stderr, "Error starting transaction: %s\n",
3584                         strerror(-ret));
3585                 goto out_free_path;
3586         }
3587         cache = first_cache_extent(corrupt_blocks);
3588         while (cache) {
3589                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3590                                        cache);
3591                 level = corrupt->level;
3592                 path->lowest_level = level;
3593                 key.objectid = corrupt->key.objectid;
3594                 key.type = corrupt->key.type;
3595                 key.offset = corrupt->key.offset;
3596
3597                 /*
3598                  * Here we don't want to do any tree balance, since it may
3599                  * cause a balance with corrupted brother leaf/node,
3600                  * so ins_len set to 0 here.
3601                  * Balance will be done after all corrupt node/leaf is deleted.
3602                  */
3603                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3604                 if (ret < 0)
3605                         goto out;
3606                 offset = btrfs_node_blockptr(path->nodes[level],
3607                                              path->slots[level]);
3608
3609                 /* Remove the ptr */
3610                 ret = btrfs_del_ptr(trans, root, path, level,
3611                                     path->slots[level]);
3612                 if (ret < 0)
3613                         goto out;
3614                 /*
3615                  * Remove the corresponding extent
3616                  * return value is not concerned.
3617                  */
3618                 btrfs_release_path(path);
3619                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3620                                         0, root->root_key.objectid,
3621                                         level - 1, 0);
3622                 cache = next_cache_extent(cache);
3623         }
3624
3625         /* Balance the btree using btrfs_search_slot() */
3626         cache = first_cache_extent(corrupt_blocks);
3627         while (cache) {
3628                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3629                                        cache);
3630                 memcpy(&key, &corrupt->key, sizeof(key));
3631                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3632                 if (ret < 0)
3633                         goto out;
3634                 /* return will always >0 since it won't find the item */
3635                 ret = 0;
3636                 btrfs_release_path(path);
3637                 cache = next_cache_extent(cache);
3638         }
3639 out:
3640         btrfs_commit_transaction(trans, root);
3641 out_free_path:
3642         btrfs_free_path(path);
3643         return ret;
3644 }
3645
3646 static int check_fs_root(struct btrfs_root *root,
3647                          struct cache_tree *root_cache,
3648                          struct walk_control *wc)
3649 {
3650         int ret = 0;
3651         int err = 0;
3652         int wret;
3653         int level;
3654         struct btrfs_path path;
3655         struct shared_node root_node;
3656         struct root_record *rec;
3657         struct btrfs_root_item *root_item = &root->root_item;
3658         struct cache_tree corrupt_blocks;
3659         struct orphan_data_extent *orphan;
3660         struct orphan_data_extent *tmp;
3661         enum btrfs_tree_block_status status;
3662         struct node_refs nrefs;
3663
3664         /*
3665          * Reuse the corrupt_block cache tree to record corrupted tree block
3666          *
3667          * Unlike the usage in extent tree check, here we do it in a per
3668          * fs/subvol tree base.
3669          */
3670         cache_tree_init(&corrupt_blocks);
3671         root->fs_info->corrupt_blocks = &corrupt_blocks;
3672
3673         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3674                 rec = get_root_rec(root_cache, root->root_key.objectid);
3675                 BUG_ON(IS_ERR(rec));
3676                 if (btrfs_root_refs(root_item) > 0)
3677                         rec->found_root_item = 1;
3678         }
3679
3680         btrfs_init_path(&path);
3681         memset(&root_node, 0, sizeof(root_node));
3682         cache_tree_init(&root_node.root_cache);
3683         cache_tree_init(&root_node.inode_cache);
3684         memset(&nrefs, 0, sizeof(nrefs));
3685
3686         /* Move the orphan extent record to corresponding inode_record */
3687         list_for_each_entry_safe(orphan, tmp,
3688                                  &root->orphan_data_extents, list) {
3689                 struct inode_record *inode;
3690
3691                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3692                                       1);
3693                 BUG_ON(IS_ERR(inode));
3694                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3695                 list_move(&orphan->list, &inode->orphan_extents);
3696         }
3697
3698         level = btrfs_header_level(root->node);
3699         memset(wc->nodes, 0, sizeof(wc->nodes));
3700         wc->nodes[level] = &root_node;
3701         wc->active_node = level;
3702         wc->root_level = level;
3703
3704         /* We may not have checked the root block, lets do that now */
3705         if (btrfs_is_leaf(root->node))
3706                 status = btrfs_check_leaf(root, NULL, root->node);
3707         else
3708                 status = btrfs_check_node(root, NULL, root->node);
3709         if (status != BTRFS_TREE_BLOCK_CLEAN)
3710                 return -EIO;
3711
3712         if (btrfs_root_refs(root_item) > 0 ||
3713             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3714                 path.nodes[level] = root->node;
3715                 extent_buffer_get(root->node);
3716                 path.slots[level] = 0;
3717         } else {
3718                 struct btrfs_key key;
3719                 struct btrfs_disk_key found_key;
3720
3721                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3722                 level = root_item->drop_level;
3723                 path.lowest_level = level;
3724                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3725                 if (wret < 0)
3726                         goto skip_walking;
3727                 btrfs_node_key(path.nodes[level], &found_key,
3728                                 path.slots[level]);
3729                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3730                                         sizeof(found_key)));
3731         }
3732
3733         while (1) {
3734                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3735                 if (wret < 0)
3736                         ret = wret;
3737                 if (wret != 0)
3738                         break;
3739
3740                 wret = walk_up_tree(root, &path, wc, &level);
3741                 if (wret < 0)
3742                         ret = wret;
3743                 if (wret != 0)
3744                         break;
3745         }
3746 skip_walking:
3747         btrfs_release_path(&path);
3748
3749         if (!cache_tree_empty(&corrupt_blocks)) {
3750                 struct cache_extent *cache;
3751                 struct btrfs_corrupt_block *corrupt;
3752
3753                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3754                        root->root_key.objectid);
3755                 cache = first_cache_extent(&corrupt_blocks);
3756                 while (cache) {
3757                         corrupt = container_of(cache,
3758                                                struct btrfs_corrupt_block,
3759                                                cache);
3760                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3761                                cache->start, corrupt->level,
3762                                corrupt->key.objectid, corrupt->key.type,
3763                                corrupt->key.offset);
3764                         cache = next_cache_extent(cache);
3765                 }
3766                 if (repair) {
3767                         printf("Try to repair the btree for root %llu\n",
3768                                root->root_key.objectid);
3769                         ret = repair_btree(root, &corrupt_blocks);
3770                         if (ret < 0)
3771                                 fprintf(stderr, "Failed to repair btree: %s\n",
3772                                         strerror(-ret));
3773                         if (!ret)
3774                                 printf("Btree for root %llu is fixed\n",
3775                                        root->root_key.objectid);
3776                 }
3777         }
3778
3779         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3780         if (err < 0)
3781                 ret = err;
3782
3783         if (root_node.current) {
3784                 root_node.current->checked = 1;
3785                 maybe_free_inode_rec(&root_node.inode_cache,
3786                                 root_node.current);
3787         }
3788
3789         err = check_inode_recs(root, &root_node.inode_cache);
3790         if (!ret)
3791                 ret = err;
3792
3793         free_corrupt_blocks_tree(&corrupt_blocks);
3794         root->fs_info->corrupt_blocks = NULL;
3795         free_orphan_data_extents(&root->orphan_data_extents);
3796         return ret;
3797 }
3798
3799 static int fs_root_objectid(u64 objectid)
3800 {
3801         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3802             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3803                 return 1;
3804         return is_fstree(objectid);
3805 }
3806
3807 static int check_fs_roots(struct btrfs_root *root,
3808                           struct cache_tree *root_cache)
3809 {
3810         struct btrfs_path path;
3811         struct btrfs_key key;
3812         struct walk_control wc;
3813         struct extent_buffer *leaf, *tree_node;
3814         struct btrfs_root *tmp_root;
3815         struct btrfs_root *tree_root = root->fs_info->tree_root;
3816         int ret;
3817         int err = 0;
3818
3819         if (ctx.progress_enabled) {
3820                 ctx.tp = TASK_FS_ROOTS;
3821                 task_start(ctx.info);
3822         }
3823
3824         /*
3825          * Just in case we made any changes to the extent tree that weren't
3826          * reflected into the free space cache yet.
3827          */
3828         if (repair)
3829                 reset_cached_block_groups(root->fs_info);
3830         memset(&wc, 0, sizeof(wc));
3831         cache_tree_init(&wc.shared);
3832         btrfs_init_path(&path);
3833
3834 again:
3835         key.offset = 0;
3836         key.objectid = 0;
3837         key.type = BTRFS_ROOT_ITEM_KEY;
3838         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3839         if (ret < 0) {
3840                 err = 1;
3841                 goto out;
3842         }
3843         tree_node = tree_root->node;
3844         while (1) {
3845                 if (tree_node != tree_root->node) {
3846                         free_root_recs_tree(root_cache);
3847                         btrfs_release_path(&path);
3848                         goto again;
3849                 }
3850                 leaf = path.nodes[0];
3851                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3852                         ret = btrfs_next_leaf(tree_root, &path);
3853                         if (ret) {
3854                                 if (ret < 0)
3855                                         err = 1;
3856                                 break;
3857                         }
3858                         leaf = path.nodes[0];
3859                 }
3860                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3861                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3862                     fs_root_objectid(key.objectid)) {
3863                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3864                                 tmp_root = btrfs_read_fs_root_no_cache(
3865                                                 root->fs_info, &key);
3866                         } else {
3867                                 key.offset = (u64)-1;
3868                                 tmp_root = btrfs_read_fs_root(
3869                                                 root->fs_info, &key);
3870                         }
3871                         if (IS_ERR(tmp_root)) {
3872                                 err = 1;
3873                                 goto next;
3874                         }
3875                         ret = check_fs_root(tmp_root, root_cache, &wc);
3876                         if (ret == -EAGAIN) {
3877                                 free_root_recs_tree(root_cache);
3878                                 btrfs_release_path(&path);
3879                                 goto again;
3880                         }
3881                         if (ret)
3882                                 err = 1;
3883                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3884                                 btrfs_free_fs_root(tmp_root);
3885                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3886                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3887                         process_root_ref(leaf, path.slots[0], &key,
3888                                          root_cache);
3889                 }
3890 next:
3891                 path.slots[0]++;
3892         }
3893 out:
3894         btrfs_release_path(&path);
3895         if (err)
3896                 free_extent_cache_tree(&wc.shared);
3897         if (!cache_tree_empty(&wc.shared))
3898                 fprintf(stderr, "warning line %d\n", __LINE__);
3899
3900         task_stop(ctx.info);
3901
3902         return err;
3903 }
3904
3905 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3906 {
3907         struct rb_node *n;
3908         struct extent_backref *back;
3909         struct tree_backref *tback;
3910         struct data_backref *dback;
3911         u64 found = 0;
3912         int err = 0;
3913
3914         for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
3915                 back = rb_node_to_extent_backref(n);
3916                 if (!back->found_extent_tree) {
3917                         err = 1;
3918                         if (!print_errs)
3919                                 goto out;
3920                         if (back->is_data) {
3921                                 dback = to_data_backref(back);
3922                                 fprintf(stderr, "Backref %llu %s %llu"
3923                                         " owner %llu offset %llu num_refs %lu"
3924                                         " not found in extent tree\n",
3925                                         (unsigned long long)rec->start,
3926                                         back->full_backref ?
3927                                         "parent" : "root",
3928                                         back->full_backref ?
3929                                         (unsigned long long)dback->parent:
3930                                         (unsigned long long)dback->root,
3931                                         (unsigned long long)dback->owner,
3932                                         (unsigned long long)dback->offset,
3933                                         (unsigned long)dback->num_refs);
3934                         } else {
3935                                 tback = to_tree_backref(back);
3936                                 fprintf(stderr, "Backref %llu parent %llu"
3937                                         " root %llu not found in extent tree\n",
3938                                         (unsigned long long)rec->start,
3939                                         (unsigned long long)tback->parent,
3940                                         (unsigned long long)tback->root);
3941                         }
3942                 }
3943                 if (!back->is_data && !back->found_ref) {
3944                         err = 1;
3945                         if (!print_errs)
3946                                 goto out;
3947                         tback = to_tree_backref(back);
3948                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3949                                 (unsigned long long)rec->start,
3950                                 back->full_backref ? "parent" : "root",
3951                                 back->full_backref ?
3952                                 (unsigned long long)tback->parent :
3953                                 (unsigned long long)tback->root, back);
3954                 }
3955                 if (back->is_data) {
3956                         dback = to_data_backref(back);
3957                         if (dback->found_ref != dback->num_refs) {
3958                                 err = 1;
3959                                 if (!print_errs)
3960                                         goto out;
3961                                 fprintf(stderr, "Incorrect local backref count"
3962                                         " on %llu %s %llu owner %llu"
3963                                         " offset %llu found %u wanted %u back %p\n",
3964                                         (unsigned long long)rec->start,
3965                                         back->full_backref ?
3966                                         "parent" : "root",
3967                                         back->full_backref ?
3968                                         (unsigned long long)dback->parent:
3969                                         (unsigned long long)dback->root,
3970                                         (unsigned long long)dback->owner,
3971                                         (unsigned long long)dback->offset,
3972                                         dback->found_ref, dback->num_refs, back);
3973                         }
3974                         if (dback->disk_bytenr != rec->start) {
3975                                 err = 1;
3976                                 if (!print_errs)
3977                                         goto out;
3978                                 fprintf(stderr, "Backref disk bytenr does not"
3979                                         " match extent record, bytenr=%llu, "
3980                                         "ref bytenr=%llu\n",
3981                                         (unsigned long long)rec->start,
3982                                         (unsigned long long)dback->disk_bytenr);
3983                         }
3984
3985                         if (dback->bytes != rec->nr) {
3986                                 err = 1;
3987                                 if (!print_errs)
3988                                         goto out;
3989                                 fprintf(stderr, "Backref bytes do not match "
3990                                         "extent backref, bytenr=%llu, ref "
3991                                         "bytes=%llu, backref bytes=%llu\n",
3992                                         (unsigned long long)rec->start,
3993                                         (unsigned long long)rec->nr,
3994                                         (unsigned long long)dback->bytes);
3995                         }
3996                 }
3997                 if (!back->is_data) {
3998                         found += 1;
3999                 } else {
4000                         dback = to_data_backref(back);
4001                         found += dback->found_ref;
4002                 }
4003         }
4004         if (found != rec->refs) {
4005                 err = 1;
4006                 if (!print_errs)
4007                         goto out;
4008                 fprintf(stderr, "Incorrect global backref count "
4009                         "on %llu found %llu wanted %llu\n",
4010                         (unsigned long long)rec->start,
4011                         (unsigned long long)found,
4012                         (unsigned long long)rec->refs);
4013         }
4014 out:
4015         return err;
4016 }
4017
4018 static void __free_one_backref(struct rb_node *node)
4019 {
4020         struct extent_backref *back = rb_node_to_extent_backref(node);
4021
4022         free(back);
4023 }
4024
4025 static void free_all_extent_backrefs(struct extent_record *rec)
4026 {
4027         rb_free_nodes(&rec->backref_tree, __free_one_backref);
4028 }
4029
4030 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4031                                      struct cache_tree *extent_cache)
4032 {
4033         struct cache_extent *cache;
4034         struct extent_record *rec;
4035
4036         while (1) {
4037                 cache = first_cache_extent(extent_cache);
4038                 if (!cache)
4039                         break;
4040                 rec = container_of(cache, struct extent_record, cache);
4041                 remove_cache_extent(extent_cache, cache);
4042                 free_all_extent_backrefs(rec);
4043                 free(rec);
4044         }
4045 }
4046
4047 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4048                                  struct extent_record *rec)
4049 {
4050         if (rec->content_checked && rec->owner_ref_checked &&
4051             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4052             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4053             !rec->bad_full_backref && !rec->crossing_stripes &&
4054             !rec->wrong_chunk_type) {
4055                 remove_cache_extent(extent_cache, &rec->cache);
4056                 free_all_extent_backrefs(rec);
4057                 list_del_init(&rec->list);
4058                 free(rec);
4059         }
4060         return 0;
4061 }
4062
4063 static int check_owner_ref(struct btrfs_root *root,
4064                             struct extent_record *rec,
4065                             struct extent_buffer *buf)
4066 {
4067         struct extent_backref *node, *tmp;
4068         struct tree_backref *back;
4069         struct btrfs_root *ref_root;
4070         struct btrfs_key key;
4071         struct btrfs_path path;
4072         struct extent_buffer *parent;
4073         int level;
4074         int found = 0;
4075         int ret;
4076
4077         rbtree_postorder_for_each_entry_safe(node, tmp,
4078                                              &rec->backref_tree, node) {
4079                 if (node->is_data)
4080                         continue;
4081                 if (!node->found_ref)
4082                         continue;
4083                 if (node->full_backref)
4084                         continue;
4085                 back = to_tree_backref(node);
4086                 if (btrfs_header_owner(buf) == back->root)
4087                         return 0;
4088         }
4089         BUG_ON(rec->is_root);
4090
4091         /* try to find the block by search corresponding fs tree */
4092         key.objectid = btrfs_header_owner(buf);
4093         key.type = BTRFS_ROOT_ITEM_KEY;
4094         key.offset = (u64)-1;
4095
4096         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4097         if (IS_ERR(ref_root))
4098                 return 1;
4099
4100         level = btrfs_header_level(buf);
4101         if (level == 0)
4102                 btrfs_item_key_to_cpu(buf, &key, 0);
4103         else
4104                 btrfs_node_key_to_cpu(buf, &key, 0);
4105
4106         btrfs_init_path(&path);
4107         path.lowest_level = level + 1;
4108         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4109         if (ret < 0)
4110                 return 0;
4111
4112         parent = path.nodes[level + 1];
4113         if (parent && buf->start == btrfs_node_blockptr(parent,
4114                                                         path.slots[level + 1]))
4115                 found = 1;
4116
4117         btrfs_release_path(&path);
4118         return found ? 0 : 1;
4119 }
4120
4121 static int is_extent_tree_record(struct extent_record *rec)
4122 {
4123         struct extent_backref *ref, *tmp;
4124         struct tree_backref *back;
4125         int is_extent = 0;
4126
4127         rbtree_postorder_for_each_entry_safe(ref, tmp,
4128                                              &rec->backref_tree, node) {
4129                 if (ref->is_data)
4130                         return 0;
4131                 back = to_tree_backref(ref);
4132                 if (ref->full_backref)
4133                         return 0;
4134                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4135                         is_extent = 1;
4136         }
4137         return is_extent;
4138 }
4139
4140
4141 static int record_bad_block_io(struct btrfs_fs_info *info,
4142                                struct cache_tree *extent_cache,
4143                                u64 start, u64 len)
4144 {
4145         struct extent_record *rec;
4146         struct cache_extent *cache;
4147         struct btrfs_key key;
4148
4149         cache = lookup_cache_extent(extent_cache, start, len);
4150         if (!cache)
4151                 return 0;
4152
4153         rec = container_of(cache, struct extent_record, cache);
4154         if (!is_extent_tree_record(rec))
4155                 return 0;
4156
4157         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4158         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4159 }
4160
4161 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4162                        struct extent_buffer *buf, int slot)
4163 {
4164         if (btrfs_header_level(buf)) {
4165                 struct btrfs_key_ptr ptr1, ptr2;
4166
4167                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4168                                    sizeof(struct btrfs_key_ptr));
4169                 read_extent_buffer(buf, &ptr2,
4170                                    btrfs_node_key_ptr_offset(slot + 1),
4171                                    sizeof(struct btrfs_key_ptr));
4172                 write_extent_buffer(buf, &ptr1,
4173                                     btrfs_node_key_ptr_offset(slot + 1),
4174                                     sizeof(struct btrfs_key_ptr));
4175                 write_extent_buffer(buf, &ptr2,
4176                                     btrfs_node_key_ptr_offset(slot),
4177                                     sizeof(struct btrfs_key_ptr));
4178                 if (slot == 0) {
4179                         struct btrfs_disk_key key;
4180                         btrfs_node_key(buf, &key, 0);
4181                         btrfs_fixup_low_keys(root, path, &key,
4182                                              btrfs_header_level(buf) + 1);
4183                 }
4184         } else {
4185                 struct btrfs_item *item1, *item2;
4186                 struct btrfs_key k1, k2;
4187                 char *item1_data, *item2_data;
4188                 u32 item1_offset, item2_offset, item1_size, item2_size;
4189
4190                 item1 = btrfs_item_nr(slot);
4191                 item2 = btrfs_item_nr(slot + 1);
4192                 btrfs_item_key_to_cpu(buf, &k1, slot);
4193                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4194                 item1_offset = btrfs_item_offset(buf, item1);
4195                 item2_offset = btrfs_item_offset(buf, item2);
4196                 item1_size = btrfs_item_size(buf, item1);
4197                 item2_size = btrfs_item_size(buf, item2);
4198
4199                 item1_data = malloc(item1_size);
4200                 if (!item1_data)
4201                         return -ENOMEM;
4202                 item2_data = malloc(item2_size);
4203                 if (!item2_data) {
4204                         free(item1_data);
4205                         return -ENOMEM;
4206                 }
4207
4208                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4209                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4210
4211                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4212                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4213                 free(item1_data);
4214                 free(item2_data);
4215
4216                 btrfs_set_item_offset(buf, item1, item2_offset);
4217                 btrfs_set_item_offset(buf, item2, item1_offset);
4218                 btrfs_set_item_size(buf, item1, item2_size);
4219                 btrfs_set_item_size(buf, item2, item1_size);
4220
4221                 path->slots[0] = slot;
4222                 btrfs_set_item_key_unsafe(root, path, &k2);
4223                 path->slots[0] = slot + 1;
4224                 btrfs_set_item_key_unsafe(root, path, &k1);
4225         }
4226         return 0;
4227 }
4228
4229 static int fix_key_order(struct btrfs_trans_handle *trans,
4230                          struct btrfs_root *root,
4231                          struct btrfs_path *path)
4232 {
4233         struct extent_buffer *buf;
4234         struct btrfs_key k1, k2;
4235         int i;
4236         int level = path->lowest_level;
4237         int ret = -EIO;
4238
4239         buf = path->nodes[level];
4240         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4241                 if (level) {
4242                         btrfs_node_key_to_cpu(buf, &k1, i);
4243                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4244                 } else {
4245                         btrfs_item_key_to_cpu(buf, &k1, i);
4246                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4247                 }
4248                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4249                         continue;
4250                 ret = swap_values(root, path, buf, i);
4251                 if (ret)
4252                         break;
4253                 btrfs_mark_buffer_dirty(buf);
4254                 i = 0;
4255         }
4256         return ret;
4257 }
4258
4259 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4260                              struct btrfs_root *root,
4261                              struct btrfs_path *path,
4262                              struct extent_buffer *buf, int slot)
4263 {
4264         struct btrfs_key key;
4265         int nritems = btrfs_header_nritems(buf);
4266
4267         btrfs_item_key_to_cpu(buf, &key, slot);
4268
4269         /* These are all the keys we can deal with missing. */
4270         if (key.type != BTRFS_DIR_INDEX_KEY &&
4271             key.type != BTRFS_EXTENT_ITEM_KEY &&
4272             key.type != BTRFS_METADATA_ITEM_KEY &&
4273             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4274             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4275                 return -1;
4276
4277         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4278                (unsigned long long)key.objectid, key.type,
4279                (unsigned long long)key.offset, slot, buf->start);
4280         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4281                               btrfs_item_nr_offset(slot + 1),
4282                               sizeof(struct btrfs_item) *
4283                               (nritems - slot - 1));
4284         btrfs_set_header_nritems(buf, nritems - 1);
4285         if (slot == 0) {
4286                 struct btrfs_disk_key disk_key;
4287
4288                 btrfs_item_key(buf, &disk_key, 0);
4289                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4290         }
4291         btrfs_mark_buffer_dirty(buf);
4292         return 0;
4293 }
4294
4295 static int fix_item_offset(struct btrfs_trans_handle *trans,
4296                            struct btrfs_root *root,
4297                            struct btrfs_path *path)
4298 {
4299         struct extent_buffer *buf;
4300         int i;
4301         int ret = 0;
4302
4303         /* We should only get this for leaves */
4304         BUG_ON(path->lowest_level);
4305         buf = path->nodes[0];
4306 again:
4307         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4308                 unsigned int shift = 0, offset;
4309
4310                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4311                     BTRFS_LEAF_DATA_SIZE(root)) {
4312                         if (btrfs_item_end_nr(buf, i) >
4313                             BTRFS_LEAF_DATA_SIZE(root)) {
4314                                 ret = delete_bogus_item(trans, root, path,
4315                                                         buf, i);
4316                                 if (!ret)
4317                                         goto again;
4318                                 fprintf(stderr, "item is off the end of the "
4319                                         "leaf, can't fix\n");
4320                                 ret = -EIO;
4321                                 break;
4322                         }
4323                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4324                                 btrfs_item_end_nr(buf, i);
4325                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4326                            btrfs_item_offset_nr(buf, i - 1)) {
4327                         if (btrfs_item_end_nr(buf, i) >
4328                             btrfs_item_offset_nr(buf, i - 1)) {
4329                                 ret = delete_bogus_item(trans, root, path,
4330                                                         buf, i);
4331                                 if (!ret)
4332                                         goto again;
4333                                 fprintf(stderr, "items overlap, can't fix\n");
4334                                 ret = -EIO;
4335                                 break;
4336                         }
4337                         shift = btrfs_item_offset_nr(buf, i - 1) -
4338                                 btrfs_item_end_nr(buf, i);
4339                 }
4340                 if (!shift)
4341                         continue;
4342
4343                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4344                        i, shift, (unsigned long long)buf->start);
4345                 offset = btrfs_item_offset_nr(buf, i);
4346                 memmove_extent_buffer(buf,
4347                                       btrfs_leaf_data(buf) + offset + shift,
4348                                       btrfs_leaf_data(buf) + offset,
4349                                       btrfs_item_size_nr(buf, i));
4350                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4351                                       offset + shift);
4352                 btrfs_mark_buffer_dirty(buf);
4353         }
4354
4355         /*
4356          * We may have moved things, in which case we want to exit so we don't
4357          * write those changes out.  Once we have proper abort functionality in
4358          * progs this can be changed to something nicer.
4359          */
4360         BUG_ON(ret);
4361         return ret;
4362 }
4363
4364 /*
4365  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4366  * then just return -EIO.
4367  */
4368 static int try_to_fix_bad_block(struct btrfs_root *root,
4369                                 struct extent_buffer *buf,
4370                                 enum btrfs_tree_block_status status)
4371 {
4372         struct btrfs_trans_handle *trans;
4373         struct ulist *roots;
4374         struct ulist_node *node;
4375         struct btrfs_root *search_root;
4376         struct btrfs_path *path;
4377         struct ulist_iterator iter;
4378         struct btrfs_key root_key, key;
4379         int ret;
4380
4381         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4382             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4383                 return -EIO;
4384
4385         path = btrfs_alloc_path();
4386         if (!path)
4387                 return -EIO;
4388
4389         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4390                                    0, &roots);
4391         if (ret) {
4392                 btrfs_free_path(path);
4393                 return -EIO;
4394         }
4395
4396         ULIST_ITER_INIT(&iter);
4397         while ((node = ulist_next(roots, &iter))) {
4398                 root_key.objectid = node->val;
4399                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4400                 root_key.offset = (u64)-1;
4401
4402                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4403                 if (IS_ERR(root)) {
4404                         ret = -EIO;
4405                         break;
4406                 }
4407
4408
4409                 trans = btrfs_start_transaction(search_root, 0);
4410                 if (IS_ERR(trans)) {
4411                         ret = PTR_ERR(trans);
4412                         break;
4413                 }
4414
4415                 path->lowest_level = btrfs_header_level(buf);
4416                 path->skip_check_block = 1;
4417                 if (path->lowest_level)
4418                         btrfs_node_key_to_cpu(buf, &key, 0);
4419                 else
4420                         btrfs_item_key_to_cpu(buf, &key, 0);
4421                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4422                 if (ret) {
4423                         ret = -EIO;
4424                         btrfs_commit_transaction(trans, search_root);
4425                         break;
4426                 }
4427                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4428                         ret = fix_key_order(trans, search_root, path);
4429                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4430                         ret = fix_item_offset(trans, search_root, path);
4431                 if (ret) {
4432                         btrfs_commit_transaction(trans, search_root);
4433                         break;
4434                 }
4435                 btrfs_release_path(path);
4436                 btrfs_commit_transaction(trans, search_root);
4437         }
4438         ulist_free(roots);
4439         btrfs_free_path(path);
4440         return ret;
4441 }
4442
4443 static int check_block(struct btrfs_root *root,
4444                        struct cache_tree *extent_cache,
4445                        struct extent_buffer *buf, u64 flags)
4446 {
4447         struct extent_record *rec;
4448         struct cache_extent *cache;
4449         struct btrfs_key key;
4450         enum btrfs_tree_block_status status;
4451         int ret = 0;
4452         int level;
4453
4454         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4455         if (!cache)
4456                 return 1;
4457         rec = container_of(cache, struct extent_record, cache);
4458         rec->generation = btrfs_header_generation(buf);
4459
4460         level = btrfs_header_level(buf);
4461         if (btrfs_header_nritems(buf) > 0) {
4462
4463                 if (level == 0)
4464                         btrfs_item_key_to_cpu(buf, &key, 0);
4465                 else
4466                         btrfs_node_key_to_cpu(buf, &key, 0);
4467
4468                 rec->info_objectid = key.objectid;
4469         }
4470         rec->info_level = level;
4471
4472         if (btrfs_is_leaf(buf))
4473                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4474         else
4475                 status = btrfs_check_node(root, &rec->parent_key, buf);
4476
4477         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4478                 if (repair)
4479                         status = try_to_fix_bad_block(root, buf, status);
4480                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4481                         ret = -EIO;
4482                         fprintf(stderr, "bad block %llu\n",
4483                                 (unsigned long long)buf->start);
4484                 } else {
4485                         /*
4486                          * Signal to callers we need to start the scan over
4487                          * again since we'll have cowed blocks.
4488                          */
4489                         ret = -EAGAIN;
4490                 }
4491         } else {
4492                 rec->content_checked = 1;
4493                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4494                         rec->owner_ref_checked = 1;
4495                 else {
4496                         ret = check_owner_ref(root, rec, buf);
4497                         if (!ret)
4498                                 rec->owner_ref_checked = 1;
4499                 }
4500         }
4501         if (!ret)
4502                 maybe_free_extent_rec(extent_cache, rec);
4503         return ret;
4504 }
4505
4506
4507 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4508                                                 u64 parent, u64 root)
4509 {
4510         struct rb_node *node;
4511         struct tree_backref *back = NULL;
4512         struct tree_backref match = {
4513                 .node = {
4514                         .is_data = 0,
4515                 },
4516         };
4517
4518         if (parent) {
4519                 match.parent = parent;
4520                 match.node.full_backref = 1;
4521         } else {
4522                 match.root = root;
4523         }
4524
4525         node = rb_search(&rec->backref_tree, &match.node.node,
4526                          (rb_compare_keys)compare_extent_backref, NULL);
4527         if (node)
4528                 back = to_tree_backref(rb_node_to_extent_backref(node));
4529
4530         return back;
4531 }
4532
4533 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4534                                                 u64 parent, u64 root)
4535 {
4536         struct tree_backref *ref = malloc(sizeof(*ref));
4537
4538         if (!ref)
4539                 return NULL;
4540         memset(&ref->node, 0, sizeof(ref->node));
4541         if (parent > 0) {
4542                 ref->parent = parent;
4543                 ref->node.full_backref = 1;
4544         } else {
4545                 ref->root = root;
4546                 ref->node.full_backref = 0;
4547         }
4548         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4549
4550         return ref;
4551 }
4552
4553 static struct data_backref *find_data_backref(struct extent_record *rec,
4554                                                 u64 parent, u64 root,
4555                                                 u64 owner, u64 offset,
4556                                                 int found_ref,
4557                                                 u64 disk_bytenr, u64 bytes)
4558 {
4559         struct rb_node *node;
4560         struct data_backref *back = NULL;
4561         struct data_backref match = {
4562                 .node = {
4563                         .is_data = 1,
4564                 },
4565                 .owner = owner,
4566                 .offset = offset,
4567                 .bytes = bytes,
4568                 .found_ref = found_ref,
4569                 .disk_bytenr = disk_bytenr,
4570         };
4571
4572         if (parent) {
4573                 match.parent = parent;
4574                 match.node.full_backref = 1;
4575         } else {
4576                 match.root = root;
4577         }
4578
4579         node = rb_search(&rec->backref_tree, &match.node.node,
4580                          (rb_compare_keys)compare_extent_backref, NULL);
4581         if (node)
4582                 back = to_data_backref(rb_node_to_extent_backref(node));
4583
4584         return back;
4585 }
4586
4587 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4588                                                 u64 parent, u64 root,
4589                                                 u64 owner, u64 offset,
4590                                                 u64 max_size)
4591 {
4592         struct data_backref *ref = malloc(sizeof(*ref));
4593
4594         if (!ref)
4595                 return NULL;
4596         memset(&ref->node, 0, sizeof(ref->node));
4597         ref->node.is_data = 1;
4598
4599         if (parent > 0) {
4600                 ref->parent = parent;
4601                 ref->owner = 0;
4602                 ref->offset = 0;
4603                 ref->node.full_backref = 1;
4604         } else {
4605                 ref->root = root;
4606                 ref->owner = owner;
4607                 ref->offset = offset;
4608                 ref->node.full_backref = 0;
4609         }
4610         ref->bytes = max_size;
4611         ref->found_ref = 0;
4612         ref->num_refs = 0;
4613         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4614         if (max_size > rec->max_size)
4615                 rec->max_size = max_size;
4616         return ref;
4617 }
4618
4619 /* Check if the type of extent matches with its chunk */
4620 static void check_extent_type(struct extent_record *rec)
4621 {
4622         struct btrfs_block_group_cache *bg_cache;
4623
4624         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4625         if (!bg_cache)
4626                 return;
4627
4628         /* data extent, check chunk directly*/
4629         if (!rec->metadata) {
4630                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4631                         rec->wrong_chunk_type = 1;
4632                 return;
4633         }
4634
4635         /* metadata extent, check the obvious case first */
4636         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4637                                  BTRFS_BLOCK_GROUP_METADATA))) {
4638                 rec->wrong_chunk_type = 1;
4639                 return;
4640         }
4641
4642         /*
4643          * Check SYSTEM extent, as it's also marked as metadata, we can only
4644          * make sure it's a SYSTEM extent by its backref
4645          */
4646         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4647                 struct extent_backref *node;
4648                 struct tree_backref *tback;
4649                 u64 bg_type;
4650
4651                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4652                 if (node->is_data) {
4653                         /* tree block shouldn't have data backref */
4654                         rec->wrong_chunk_type = 1;
4655                         return;
4656                 }
4657                 tback = container_of(node, struct tree_backref, node);
4658
4659                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4660                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4661                 else
4662                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4663                 if (!(bg_cache->flags & bg_type))
4664                         rec->wrong_chunk_type = 1;
4665         }
4666 }
4667
4668 /*
4669  * Allocate a new extent record, fill default values from @tmpl and insert int
4670  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4671  * the cache, otherwise it fails.
4672  */
4673 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4674                 struct extent_record *tmpl)
4675 {
4676         struct extent_record *rec;
4677         int ret = 0;
4678
4679         rec = malloc(sizeof(*rec));
4680         if (!rec)
4681                 return -ENOMEM;
4682         rec->start = tmpl->start;
4683         rec->max_size = tmpl->max_size;
4684         rec->nr = max(tmpl->nr, tmpl->max_size);
4685         rec->found_rec = tmpl->found_rec;
4686         rec->content_checked = tmpl->content_checked;
4687         rec->owner_ref_checked = tmpl->owner_ref_checked;
4688         rec->num_duplicates = 0;
4689         rec->metadata = tmpl->metadata;
4690         rec->flag_block_full_backref = FLAG_UNSET;
4691         rec->bad_full_backref = 0;
4692         rec->crossing_stripes = 0;
4693         rec->wrong_chunk_type = 0;
4694         rec->is_root = tmpl->is_root;
4695         rec->refs = tmpl->refs;
4696         rec->extent_item_refs = tmpl->extent_item_refs;
4697         rec->parent_generation = tmpl->parent_generation;
4698         INIT_LIST_HEAD(&rec->backrefs);
4699         INIT_LIST_HEAD(&rec->dups);
4700         INIT_LIST_HEAD(&rec->list);
4701         rec->backref_tree = RB_ROOT;
4702         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4703         rec->cache.start = tmpl->start;
4704         rec->cache.size = tmpl->nr;
4705         ret = insert_cache_extent(extent_cache, &rec->cache);
4706         BUG_ON(ret);
4707         bytes_used += rec->nr;
4708
4709         if (tmpl->metadata)
4710                 rec->crossing_stripes = check_crossing_stripes(rec->start,
4711                                 global_info->tree_root->nodesize);
4712         check_extent_type(rec);
4713         return ret;
4714 }
4715
4716 /*
4717  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4718  * some are hints:
4719  * - refs              - if found, increase refs
4720  * - is_root           - if found, set
4721  * - content_checked   - if found, set
4722  * - owner_ref_checked - if found, set
4723  *
4724  * If not found, create a new one, initialize and insert.
4725  */
4726 static int add_extent_rec(struct cache_tree *extent_cache,
4727                 struct extent_record *tmpl)
4728 {
4729         struct extent_record *rec;
4730         struct cache_extent *cache;
4731         int ret = 0;
4732         int dup = 0;
4733
4734         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4735         if (cache) {
4736                 rec = container_of(cache, struct extent_record, cache);
4737                 if (tmpl->refs)
4738                         rec->refs++;
4739                 if (rec->nr == 1)
4740                         rec->nr = max(tmpl->nr, tmpl->max_size);
4741
4742                 /*
4743                  * We need to make sure to reset nr to whatever the extent
4744                  * record says was the real size, this way we can compare it to
4745                  * the backrefs.
4746                  */
4747                 if (tmpl->found_rec) {
4748                         if (tmpl->start != rec->start || rec->found_rec) {
4749                                 struct extent_record *tmp;
4750
4751                                 dup = 1;
4752                                 if (list_empty(&rec->list))
4753                                         list_add_tail(&rec->list,
4754                                                       &duplicate_extents);
4755
4756                                 /*
4757                                  * We have to do this song and dance in case we
4758                                  * find an extent record that falls inside of
4759                                  * our current extent record but does not have
4760                                  * the same objectid.
4761                                  */
4762                                 tmp = malloc(sizeof(*tmp));
4763                                 if (!tmp)
4764                                         return -ENOMEM;
4765                                 tmp->start = tmpl->start;
4766                                 tmp->max_size = tmpl->max_size;
4767                                 tmp->nr = tmpl->nr;
4768                                 tmp->found_rec = 1;
4769                                 tmp->metadata = tmpl->metadata;
4770                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4771                                 INIT_LIST_HEAD(&tmp->list);
4772                                 list_add_tail(&tmp->list, &rec->dups);
4773                                 rec->num_duplicates++;
4774                         } else {
4775                                 rec->nr = tmpl->nr;
4776                                 rec->found_rec = 1;
4777                         }
4778                 }
4779
4780                 if (tmpl->extent_item_refs && !dup) {
4781                         if (rec->extent_item_refs) {
4782                                 fprintf(stderr, "block %llu rec "
4783                                         "extent_item_refs %llu, passed %llu\n",
4784                                         (unsigned long long)tmpl->start,
4785                                         (unsigned long long)
4786                                                         rec->extent_item_refs,
4787                                         (unsigned long long)tmpl->extent_item_refs);
4788                         }
4789                         rec->extent_item_refs = tmpl->extent_item_refs;
4790                 }
4791                 if (tmpl->is_root)
4792                         rec->is_root = 1;
4793                 if (tmpl->content_checked)
4794                         rec->content_checked = 1;
4795                 if (tmpl->owner_ref_checked)
4796                         rec->owner_ref_checked = 1;
4797                 memcpy(&rec->parent_key, &tmpl->parent_key,
4798                                 sizeof(tmpl->parent_key));
4799                 if (tmpl->parent_generation)
4800                         rec->parent_generation = tmpl->parent_generation;
4801                 if (rec->max_size < tmpl->max_size)
4802                         rec->max_size = tmpl->max_size;
4803
4804                 /*
4805                  * A metadata extent can't cross stripe_len boundary, otherwise
4806                  * kernel scrub won't be able to handle it.
4807                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4808                  * it.
4809                  */
4810                 if (tmpl->metadata)
4811                         rec->crossing_stripes = check_crossing_stripes(
4812                                 rec->start, global_info->tree_root->nodesize);
4813                 check_extent_type(rec);
4814                 maybe_free_extent_rec(extent_cache, rec);
4815                 return ret;
4816         }
4817
4818         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4819
4820         return ret;
4821 }
4822
4823 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4824                             u64 parent, u64 root, int found_ref)
4825 {
4826         struct extent_record *rec;
4827         struct tree_backref *back;
4828         struct cache_extent *cache;
4829
4830         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4831         if (!cache) {
4832                 struct extent_record tmpl;
4833
4834                 memset(&tmpl, 0, sizeof(tmpl));
4835                 tmpl.start = bytenr;
4836                 tmpl.nr = 1;
4837                 tmpl.metadata = 1;
4838
4839                 add_extent_rec_nolookup(extent_cache, &tmpl);
4840
4841                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4842                 if (!cache)
4843                         abort();
4844         }
4845
4846         rec = container_of(cache, struct extent_record, cache);
4847         if (rec->start != bytenr) {
4848                 abort();
4849         }
4850
4851         back = find_tree_backref(rec, parent, root);
4852         if (!back) {
4853                 back = alloc_tree_backref(rec, parent, root);
4854                 BUG_ON(!back);
4855         }
4856
4857         if (found_ref) {
4858                 if (back->node.found_ref) {
4859                         fprintf(stderr, "Extent back ref already exists "
4860                                 "for %llu parent %llu root %llu \n",
4861                                 (unsigned long long)bytenr,
4862                                 (unsigned long long)parent,
4863                                 (unsigned long long)root);
4864                 }
4865                 back->node.found_ref = 1;
4866         } else {
4867                 if (back->node.found_extent_tree) {
4868                         fprintf(stderr, "Extent back ref already exists "
4869                                 "for %llu parent %llu root %llu \n",
4870                                 (unsigned long long)bytenr,
4871                                 (unsigned long long)parent,
4872                                 (unsigned long long)root);
4873                 }
4874                 back->node.found_extent_tree = 1;
4875         }
4876         check_extent_type(rec);
4877         maybe_free_extent_rec(extent_cache, rec);
4878         return 0;
4879 }
4880
4881 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4882                             u64 parent, u64 root, u64 owner, u64 offset,
4883                             u32 num_refs, int found_ref, u64 max_size)
4884 {
4885         struct extent_record *rec;
4886         struct data_backref *back;
4887         struct cache_extent *cache;
4888
4889         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4890         if (!cache) {
4891                 struct extent_record tmpl;
4892
4893                 memset(&tmpl, 0, sizeof(tmpl));
4894                 tmpl.start = bytenr;
4895                 tmpl.nr = 1;
4896                 tmpl.max_size = max_size;
4897
4898                 add_extent_rec_nolookup(extent_cache, &tmpl);
4899
4900                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4901                 if (!cache)
4902                         abort();
4903         }
4904
4905         rec = container_of(cache, struct extent_record, cache);
4906         if (rec->max_size < max_size)
4907                 rec->max_size = max_size;
4908
4909         /*
4910          * If found_ref is set then max_size is the real size and must match the
4911          * existing refs.  So if we have already found a ref then we need to
4912          * make sure that this ref matches the existing one, otherwise we need
4913          * to add a new backref so we can notice that the backrefs don't match
4914          * and we need to figure out who is telling the truth.  This is to
4915          * account for that awful fsync bug I introduced where we'd end up with
4916          * a btrfs_file_extent_item that would have its length include multiple
4917          * prealloc extents or point inside of a prealloc extent.
4918          */
4919         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4920                                  bytenr, max_size);
4921         if (!back) {
4922                 back = alloc_data_backref(rec, parent, root, owner, offset,
4923                                           max_size);
4924                 BUG_ON(!back);
4925         }
4926
4927         if (found_ref) {
4928                 BUG_ON(num_refs != 1);
4929                 if (back->node.found_ref)
4930                         BUG_ON(back->bytes != max_size);
4931                 back->node.found_ref = 1;
4932                 back->found_ref += 1;
4933                 back->bytes = max_size;
4934                 back->disk_bytenr = bytenr;
4935                 rec->refs += 1;
4936                 rec->content_checked = 1;
4937                 rec->owner_ref_checked = 1;
4938         } else {
4939                 if (back->node.found_extent_tree) {
4940                         fprintf(stderr, "Extent back ref already exists "
4941                                 "for %llu parent %llu root %llu "
4942                                 "owner %llu offset %llu num_refs %lu\n",
4943                                 (unsigned long long)bytenr,
4944                                 (unsigned long long)parent,
4945                                 (unsigned long long)root,
4946                                 (unsigned long long)owner,
4947                                 (unsigned long long)offset,
4948                                 (unsigned long)num_refs);
4949                 }
4950                 back->num_refs = num_refs;
4951                 back->node.found_extent_tree = 1;
4952         }
4953         maybe_free_extent_rec(extent_cache, rec);
4954         return 0;
4955 }
4956
4957 static int add_pending(struct cache_tree *pending,
4958                        struct cache_tree *seen, u64 bytenr, u32 size)
4959 {
4960         int ret;
4961         ret = add_cache_extent(seen, bytenr, size);
4962         if (ret)
4963                 return ret;
4964         add_cache_extent(pending, bytenr, size);
4965         return 0;
4966 }
4967
4968 static int pick_next_pending(struct cache_tree *pending,
4969                         struct cache_tree *reada,
4970                         struct cache_tree *nodes,
4971                         u64 last, struct block_info *bits, int bits_nr,
4972                         int *reada_bits)
4973 {
4974         unsigned long node_start = last;
4975         struct cache_extent *cache;
4976         int ret;
4977
4978         cache = search_cache_extent(reada, 0);
4979         if (cache) {
4980                 bits[0].start = cache->start;
4981                 bits[0].size = cache->size;
4982                 *reada_bits = 1;
4983                 return 1;
4984         }
4985         *reada_bits = 0;
4986         if (node_start > 32768)
4987                 node_start -= 32768;
4988
4989         cache = search_cache_extent(nodes, node_start);
4990         if (!cache)
4991                 cache = search_cache_extent(nodes, 0);
4992
4993         if (!cache) {
4994                  cache = search_cache_extent(pending, 0);
4995                  if (!cache)
4996                          return 0;
4997                  ret = 0;
4998                  do {
4999                          bits[ret].start = cache->start;
5000                          bits[ret].size = cache->size;
5001                          cache = next_cache_extent(cache);
5002                          ret++;
5003                  } while (cache && ret < bits_nr);
5004                  return ret;
5005         }
5006
5007         ret = 0;
5008         do {
5009                 bits[ret].start = cache->start;
5010                 bits[ret].size = cache->size;
5011                 cache = next_cache_extent(cache);
5012                 ret++;
5013         } while (cache && ret < bits_nr);
5014
5015         if (bits_nr - ret > 8) {
5016                 u64 lookup = bits[0].start + bits[0].size;
5017                 struct cache_extent *next;
5018                 next = search_cache_extent(pending, lookup);
5019                 while(next) {
5020                         if (next->start - lookup > 32768)
5021                                 break;
5022                         bits[ret].start = next->start;
5023                         bits[ret].size = next->size;
5024                         lookup = next->start + next->size;
5025                         ret++;
5026                         if (ret == bits_nr)
5027                                 break;
5028                         next = next_cache_extent(next);
5029                         if (!next)
5030                                 break;
5031                 }
5032         }
5033         return ret;
5034 }
5035
5036 static void free_chunk_record(struct cache_extent *cache)
5037 {
5038         struct chunk_record *rec;
5039
5040         rec = container_of(cache, struct chunk_record, cache);
5041         list_del_init(&rec->list);
5042         list_del_init(&rec->dextents);
5043         free(rec);
5044 }
5045
5046 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5047 {
5048         cache_tree_free_extents(chunk_cache, free_chunk_record);
5049 }
5050
5051 static void free_device_record(struct rb_node *node)
5052 {
5053         struct device_record *rec;
5054
5055         rec = container_of(node, struct device_record, node);
5056         free(rec);
5057 }
5058
5059 FREE_RB_BASED_TREE(device_cache, free_device_record);
5060
5061 int insert_block_group_record(struct block_group_tree *tree,
5062                               struct block_group_record *bg_rec)
5063 {
5064         int ret;
5065
5066         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5067         if (ret)
5068                 return ret;
5069
5070         list_add_tail(&bg_rec->list, &tree->block_groups);
5071         return 0;
5072 }
5073
5074 static void free_block_group_record(struct cache_extent *cache)
5075 {
5076         struct block_group_record *rec;
5077
5078         rec = container_of(cache, struct block_group_record, cache);
5079         list_del_init(&rec->list);
5080         free(rec);
5081 }
5082
5083 void free_block_group_tree(struct block_group_tree *tree)
5084 {
5085         cache_tree_free_extents(&tree->tree, free_block_group_record);
5086 }
5087
5088 int insert_device_extent_record(struct device_extent_tree *tree,
5089                                 struct device_extent_record *de_rec)
5090 {
5091         int ret;
5092
5093         /*
5094          * Device extent is a bit different from the other extents, because
5095          * the extents which belong to the different devices may have the
5096          * same start and size, so we need use the special extent cache
5097          * search/insert functions.
5098          */
5099         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5100         if (ret)
5101                 return ret;
5102
5103         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5104         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5105         return 0;
5106 }
5107
5108 static void free_device_extent_record(struct cache_extent *cache)
5109 {
5110         struct device_extent_record *rec;
5111
5112         rec = container_of(cache, struct device_extent_record, cache);
5113         if (!list_empty(&rec->chunk_list))
5114                 list_del_init(&rec->chunk_list);
5115         if (!list_empty(&rec->device_list))
5116                 list_del_init(&rec->device_list);
5117         free(rec);
5118 }
5119
5120 void free_device_extent_tree(struct device_extent_tree *tree)
5121 {
5122         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5123 }
5124
5125 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5126 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5127                                  struct extent_buffer *leaf, int slot)
5128 {
5129         struct btrfs_extent_ref_v0 *ref0;
5130         struct btrfs_key key;
5131
5132         btrfs_item_key_to_cpu(leaf, &key, slot);
5133         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5134         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5135                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5136         } else {
5137                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5138                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5139         }
5140         return 0;
5141 }
5142 #endif
5143
5144 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5145                                             struct btrfs_key *key,
5146                                             int slot)
5147 {
5148         struct btrfs_chunk *ptr;
5149         struct chunk_record *rec;
5150         int num_stripes, i;
5151
5152         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5153         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5154
5155         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5156         if (!rec) {
5157                 fprintf(stderr, "memory allocation failed\n");
5158                 exit(-1);
5159         }
5160
5161         INIT_LIST_HEAD(&rec->list);
5162         INIT_LIST_HEAD(&rec->dextents);
5163         rec->bg_rec = NULL;
5164
5165         rec->cache.start = key->offset;
5166         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5167
5168         rec->generation = btrfs_header_generation(leaf);
5169
5170         rec->objectid = key->objectid;
5171         rec->type = key->type;
5172         rec->offset = key->offset;
5173
5174         rec->length = rec->cache.size;
5175         rec->owner = btrfs_chunk_owner(leaf, ptr);
5176         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5177         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5178         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5179         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5180         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5181         rec->num_stripes = num_stripes;
5182         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5183
5184         for (i = 0; i < rec->num_stripes; ++i) {
5185                 rec->stripes[i].devid =
5186                         btrfs_stripe_devid_nr(leaf, ptr, i);
5187                 rec->stripes[i].offset =
5188                         btrfs_stripe_offset_nr(leaf, ptr, i);
5189                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5190                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5191                                 BTRFS_UUID_SIZE);
5192         }
5193
5194         return rec;
5195 }
5196
5197 static int process_chunk_item(struct cache_tree *chunk_cache,
5198                               struct btrfs_key *key, struct extent_buffer *eb,
5199                               int slot)
5200 {
5201         struct chunk_record *rec;
5202         int ret = 0;
5203
5204         rec = btrfs_new_chunk_record(eb, key, slot);
5205         ret = insert_cache_extent(chunk_cache, &rec->cache);
5206         if (ret) {
5207                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5208                         rec->offset, rec->length);
5209                 free(rec);
5210         }
5211
5212         return ret;
5213 }
5214
5215 static int process_device_item(struct rb_root *dev_cache,
5216                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5217 {
5218         struct btrfs_dev_item *ptr;
5219         struct device_record *rec;
5220         int ret = 0;
5221
5222         ptr = btrfs_item_ptr(eb,
5223                 slot, struct btrfs_dev_item);
5224
5225         rec = malloc(sizeof(*rec));
5226         if (!rec) {
5227                 fprintf(stderr, "memory allocation failed\n");
5228                 return -ENOMEM;
5229         }
5230
5231         rec->devid = key->offset;
5232         rec->generation = btrfs_header_generation(eb);
5233
5234         rec->objectid = key->objectid;
5235         rec->type = key->type;
5236         rec->offset = key->offset;
5237
5238         rec->devid = btrfs_device_id(eb, ptr);
5239         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5240         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5241
5242         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5243         if (ret) {
5244                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5245                 free(rec);
5246         }
5247
5248         return ret;
5249 }
5250
5251 struct block_group_record *
5252 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5253                              int slot)
5254 {
5255         struct btrfs_block_group_item *ptr;
5256         struct block_group_record *rec;
5257
5258         rec = calloc(1, sizeof(*rec));
5259         if (!rec) {
5260                 fprintf(stderr, "memory allocation failed\n");
5261                 exit(-1);
5262         }
5263
5264         rec->cache.start = key->objectid;
5265         rec->cache.size = key->offset;
5266
5267         rec->generation = btrfs_header_generation(leaf);
5268
5269         rec->objectid = key->objectid;
5270         rec->type = key->type;
5271         rec->offset = key->offset;
5272
5273         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5274         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5275
5276         INIT_LIST_HEAD(&rec->list);
5277
5278         return rec;
5279 }
5280
5281 static int process_block_group_item(struct block_group_tree *block_group_cache,
5282                                     struct btrfs_key *key,
5283                                     struct extent_buffer *eb, int slot)
5284 {
5285         struct block_group_record *rec;
5286         int ret = 0;
5287
5288         rec = btrfs_new_block_group_record(eb, key, slot);
5289         ret = insert_block_group_record(block_group_cache, rec);
5290         if (ret) {
5291                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5292                         rec->objectid, rec->offset);
5293                 free(rec);
5294         }
5295
5296         return ret;
5297 }
5298
5299 struct device_extent_record *
5300 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5301                                struct btrfs_key *key, int slot)
5302 {
5303         struct device_extent_record *rec;
5304         struct btrfs_dev_extent *ptr;
5305
5306         rec = calloc(1, sizeof(*rec));
5307         if (!rec) {
5308                 fprintf(stderr, "memory allocation failed\n");
5309                 exit(-1);
5310         }
5311
5312         rec->cache.objectid = key->objectid;
5313         rec->cache.start = key->offset;
5314
5315         rec->generation = btrfs_header_generation(leaf);
5316
5317         rec->objectid = key->objectid;
5318         rec->type = key->type;
5319         rec->offset = key->offset;
5320
5321         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5322         rec->chunk_objecteid =
5323                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5324         rec->chunk_offset =
5325                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5326         rec->length = btrfs_dev_extent_length(leaf, ptr);
5327         rec->cache.size = rec->length;
5328
5329         INIT_LIST_HEAD(&rec->chunk_list);
5330         INIT_LIST_HEAD(&rec->device_list);
5331
5332         return rec;
5333 }
5334
5335 static int
5336 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5337                            struct btrfs_key *key, struct extent_buffer *eb,
5338                            int slot)
5339 {
5340         struct device_extent_record *rec;
5341         int ret;
5342
5343         rec = btrfs_new_device_extent_record(eb, key, slot);
5344         ret = insert_device_extent_record(dev_extent_cache, rec);
5345         if (ret) {
5346                 fprintf(stderr,
5347                         "Device extent[%llu, %llu, %llu] existed.\n",
5348                         rec->objectid, rec->offset, rec->length);
5349                 free(rec);
5350         }
5351
5352         return ret;
5353 }
5354
5355 static int process_extent_item(struct btrfs_root *root,
5356                                struct cache_tree *extent_cache,
5357                                struct extent_buffer *eb, int slot)
5358 {
5359         struct btrfs_extent_item *ei;
5360         struct btrfs_extent_inline_ref *iref;
5361         struct btrfs_extent_data_ref *dref;
5362         struct btrfs_shared_data_ref *sref;
5363         struct btrfs_key key;
5364         struct extent_record tmpl;
5365         unsigned long end;
5366         unsigned long ptr;
5367         int type;
5368         u32 item_size = btrfs_item_size_nr(eb, slot);
5369         u64 refs = 0;
5370         u64 offset;
5371         u64 num_bytes;
5372         int metadata = 0;
5373
5374         btrfs_item_key_to_cpu(eb, &key, slot);
5375
5376         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5377                 metadata = 1;
5378                 num_bytes = root->nodesize;
5379         } else {
5380                 num_bytes = key.offset;
5381         }
5382
5383         if (item_size < sizeof(*ei)) {
5384 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5385                 struct btrfs_extent_item_v0 *ei0;
5386                 BUG_ON(item_size != sizeof(*ei0));
5387                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5388                 refs = btrfs_extent_refs_v0(eb, ei0);
5389 #else
5390                 BUG();
5391 #endif
5392                 memset(&tmpl, 0, sizeof(tmpl));
5393                 tmpl.start = key.objectid;
5394                 tmpl.nr = num_bytes;
5395                 tmpl.extent_item_refs = refs;
5396                 tmpl.metadata = metadata;
5397                 tmpl.found_rec = 1;
5398                 tmpl.max_size = num_bytes;
5399
5400                 return add_extent_rec(extent_cache, &tmpl);
5401         }
5402
5403         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5404         refs = btrfs_extent_refs(eb, ei);
5405         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5406                 metadata = 1;
5407         else
5408                 metadata = 0;
5409
5410         memset(&tmpl, 0, sizeof(tmpl));
5411         tmpl.start = key.objectid;
5412         tmpl.nr = num_bytes;
5413         tmpl.extent_item_refs = refs;
5414         tmpl.metadata = metadata;
5415         tmpl.found_rec = 1;
5416         tmpl.max_size = num_bytes;
5417         add_extent_rec(extent_cache, &tmpl);
5418
5419         ptr = (unsigned long)(ei + 1);
5420         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5421             key.type == BTRFS_EXTENT_ITEM_KEY)
5422                 ptr += sizeof(struct btrfs_tree_block_info);
5423
5424         end = (unsigned long)ei + item_size;
5425         while (ptr < end) {
5426                 iref = (struct btrfs_extent_inline_ref *)ptr;
5427                 type = btrfs_extent_inline_ref_type(eb, iref);
5428                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5429                 switch (type) {
5430                 case BTRFS_TREE_BLOCK_REF_KEY:
5431                         add_tree_backref(extent_cache, key.objectid,
5432                                          0, offset, 0);
5433                         break;
5434                 case BTRFS_SHARED_BLOCK_REF_KEY:
5435                         add_tree_backref(extent_cache, key.objectid,
5436                                          offset, 0, 0);
5437                         break;
5438                 case BTRFS_EXTENT_DATA_REF_KEY:
5439                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5440                         add_data_backref(extent_cache, key.objectid, 0,
5441                                         btrfs_extent_data_ref_root(eb, dref),
5442                                         btrfs_extent_data_ref_objectid(eb,
5443                                                                        dref),
5444                                         btrfs_extent_data_ref_offset(eb, dref),
5445                                         btrfs_extent_data_ref_count(eb, dref),
5446                                         0, num_bytes);
5447                         break;
5448                 case BTRFS_SHARED_DATA_REF_KEY:
5449                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5450                         add_data_backref(extent_cache, key.objectid, offset,
5451                                         0, 0, 0,
5452                                         btrfs_shared_data_ref_count(eb, sref),
5453                                         0, num_bytes);
5454                         break;
5455                 default:
5456                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5457                                 key.objectid, key.type, num_bytes);
5458                         goto out;
5459                 }
5460                 ptr += btrfs_extent_inline_ref_size(type);
5461         }
5462         WARN_ON(ptr > end);
5463 out:
5464         return 0;
5465 }
5466
5467 static int check_cache_range(struct btrfs_root *root,
5468                              struct btrfs_block_group_cache *cache,
5469                              u64 offset, u64 bytes)
5470 {
5471         struct btrfs_free_space *entry;
5472         u64 *logical;
5473         u64 bytenr;
5474         int stripe_len;
5475         int i, nr, ret;
5476
5477         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5478                 bytenr = btrfs_sb_offset(i);
5479                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5480                                        cache->key.objectid, bytenr, 0,
5481                                        &logical, &nr, &stripe_len);
5482                 if (ret)
5483                         return ret;
5484
5485                 while (nr--) {
5486                         if (logical[nr] + stripe_len <= offset)
5487                                 continue;
5488                         if (offset + bytes <= logical[nr])
5489                                 continue;
5490                         if (logical[nr] == offset) {
5491                                 if (stripe_len >= bytes) {
5492                                         kfree(logical);
5493                                         return 0;
5494                                 }
5495                                 bytes -= stripe_len;
5496                                 offset += stripe_len;
5497                         } else if (logical[nr] < offset) {
5498                                 if (logical[nr] + stripe_len >=
5499                                     offset + bytes) {
5500                                         kfree(logical);
5501                                         return 0;
5502                                 }
5503                                 bytes = (offset + bytes) -
5504                                         (logical[nr] + stripe_len);
5505                                 offset = logical[nr] + stripe_len;
5506                         } else {
5507                                 /*
5508                                  * Could be tricky, the super may land in the
5509                                  * middle of the area we're checking.  First
5510                                  * check the easiest case, it's at the end.
5511                                  */
5512                                 if (logical[nr] + stripe_len >=
5513                                     bytes + offset) {
5514                                         bytes = logical[nr] - offset;
5515                                         continue;
5516                                 }
5517
5518                                 /* Check the left side */
5519                                 ret = check_cache_range(root, cache,
5520                                                         offset,
5521                                                         logical[nr] - offset);
5522                                 if (ret) {
5523                                         kfree(logical);
5524                                         return ret;
5525                                 }
5526
5527                                 /* Now we continue with the right side */
5528                                 bytes = (offset + bytes) -
5529                                         (logical[nr] + stripe_len);
5530                                 offset = logical[nr] + stripe_len;
5531                         }
5532                 }
5533
5534                 kfree(logical);
5535         }
5536
5537         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5538         if (!entry) {
5539                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5540                         offset, offset+bytes);
5541                 return -EINVAL;
5542         }
5543
5544         if (entry->offset != offset) {
5545                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5546                         entry->offset);
5547                 return -EINVAL;
5548         }
5549
5550         if (entry->bytes != bytes) {
5551                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5552                         bytes, entry->bytes, offset);
5553                 return -EINVAL;
5554         }
5555
5556         unlink_free_space(cache->free_space_ctl, entry);
5557         free(entry);
5558         return 0;
5559 }
5560
5561 static int verify_space_cache(struct btrfs_root *root,
5562                               struct btrfs_block_group_cache *cache)
5563 {
5564         struct btrfs_path *path;
5565         struct extent_buffer *leaf;
5566         struct btrfs_key key;
5567         u64 last;
5568         int ret = 0;
5569
5570         path = btrfs_alloc_path();
5571         if (!path)
5572                 return -ENOMEM;
5573
5574         root = root->fs_info->extent_root;
5575
5576         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5577
5578         key.objectid = last;
5579         key.offset = 0;
5580         key.type = BTRFS_EXTENT_ITEM_KEY;
5581
5582         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5583         if (ret < 0)
5584                 goto out;
5585         ret = 0;
5586         while (1) {
5587                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5588                         ret = btrfs_next_leaf(root, path);
5589                         if (ret < 0)
5590                                 goto out;
5591                         if (ret > 0) {
5592                                 ret = 0;
5593                                 break;
5594                         }
5595                 }
5596                 leaf = path->nodes[0];
5597                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5598                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5599                         break;
5600                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5601                     key.type != BTRFS_METADATA_ITEM_KEY) {
5602                         path->slots[0]++;
5603                         continue;
5604                 }
5605
5606                 if (last == key.objectid) {
5607                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5608                                 last = key.objectid + key.offset;
5609                         else
5610                                 last = key.objectid + root->nodesize;
5611                         path->slots[0]++;
5612                         continue;
5613                 }
5614
5615                 ret = check_cache_range(root, cache, last,
5616                                         key.objectid - last);
5617                 if (ret)
5618                         break;
5619                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5620                         last = key.objectid + key.offset;
5621                 else
5622                         last = key.objectid + root->nodesize;
5623                 path->slots[0]++;
5624         }
5625
5626         if (last < cache->key.objectid + cache->key.offset)
5627                 ret = check_cache_range(root, cache, last,
5628                                         cache->key.objectid +
5629                                         cache->key.offset - last);
5630
5631 out:
5632         btrfs_free_path(path);
5633
5634         if (!ret &&
5635             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5636                 fprintf(stderr, "There are still entries left in the space "
5637                         "cache\n");
5638                 ret = -EINVAL;
5639         }
5640
5641         return ret;
5642 }
5643
5644 static int check_space_cache(struct btrfs_root *root)
5645 {
5646         struct btrfs_block_group_cache *cache;
5647         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5648         int ret;
5649         int error = 0;
5650
5651         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5652             btrfs_super_generation(root->fs_info->super_copy) !=
5653             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5654                 printf("cache and super generation don't match, space cache "
5655                        "will be invalidated\n");
5656                 return 0;
5657         }
5658
5659         if (ctx.progress_enabled) {
5660                 ctx.tp = TASK_FREE_SPACE;
5661                 task_start(ctx.info);
5662         }
5663
5664         while (1) {
5665                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5666                 if (!cache)
5667                         break;
5668
5669                 start = cache->key.objectid + cache->key.offset;
5670                 if (!cache->free_space_ctl) {
5671                         if (btrfs_init_free_space_ctl(cache,
5672                                                       root->sectorsize)) {
5673                                 ret = -ENOMEM;
5674                                 break;
5675                         }
5676                 } else {
5677                         btrfs_remove_free_space_cache(cache);
5678                 }
5679
5680                 if (btrfs_fs_compat_ro(root->fs_info,
5681                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5682                         ret = exclude_super_stripes(root, cache);
5683                         if (ret) {
5684                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5685                                         strerror(-ret));
5686                                 error++;
5687                                 continue;
5688                         }
5689                         ret = load_free_space_tree(root->fs_info, cache);
5690                         free_excluded_extents(root, cache);
5691                         if (ret < 0) {
5692                                 fprintf(stderr, "could not load free space tree: %s\n",
5693                                         strerror(-ret));
5694                                 error++;
5695                                 continue;
5696                         }
5697                         error += ret;
5698                 } else {
5699                         ret = load_free_space_cache(root->fs_info, cache);
5700                         if (!ret)
5701                                 continue;
5702                 }
5703
5704                 ret = verify_space_cache(root, cache);
5705                 if (ret) {
5706                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5707                                 cache->key.objectid);
5708                         error++;
5709                 }
5710         }
5711
5712         task_stop(ctx.info);
5713
5714         return error ? -EINVAL : 0;
5715 }
5716
5717 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5718                         u64 num_bytes, unsigned long leaf_offset,
5719                         struct extent_buffer *eb) {
5720
5721         u64 offset = 0;
5722         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5723         char *data;
5724         unsigned long csum_offset;
5725         u32 csum;
5726         u32 csum_expected;
5727         u64 read_len;
5728         u64 data_checked = 0;
5729         u64 tmp;
5730         int ret = 0;
5731         int mirror;
5732         int num_copies;
5733
5734         if (num_bytes % root->sectorsize)
5735                 return -EINVAL;
5736
5737         data = malloc(num_bytes);
5738         if (!data)
5739                 return -ENOMEM;
5740
5741         while (offset < num_bytes) {
5742                 mirror = 0;
5743 again:
5744                 read_len = num_bytes - offset;
5745                 /* read as much space once a time */
5746                 ret = read_extent_data(root, data + offset,
5747                                 bytenr + offset, &read_len, mirror);
5748                 if (ret)
5749                         goto out;
5750                 data_checked = 0;
5751                 /* verify every 4k data's checksum */
5752                 while (data_checked < read_len) {
5753                         csum = ~(u32)0;
5754                         tmp = offset + data_checked;
5755
5756                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5757                                                csum, root->sectorsize);
5758                         btrfs_csum_final(csum, (char *)&csum);
5759
5760                         csum_offset = leaf_offset +
5761                                  tmp / root->sectorsize * csum_size;
5762                         read_extent_buffer(eb, (char *)&csum_expected,
5763                                            csum_offset, csum_size);
5764                         /* try another mirror */
5765                         if (csum != csum_expected) {
5766                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5767                                                 mirror, bytenr + tmp,
5768                                                 csum, csum_expected);
5769                                 num_copies = btrfs_num_copies(
5770                                                 &root->fs_info->mapping_tree,
5771                                                 bytenr, num_bytes);
5772                                 if (mirror < num_copies - 1) {
5773                                         mirror += 1;
5774                                         goto again;
5775                                 }
5776                         }
5777                         data_checked += root->sectorsize;
5778                 }
5779                 offset += read_len;
5780         }
5781 out:
5782         free(data);
5783         return ret;
5784 }
5785
5786 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5787                                u64 num_bytes)
5788 {
5789         struct btrfs_path *path;
5790         struct extent_buffer *leaf;
5791         struct btrfs_key key;
5792         int ret;
5793
5794         path = btrfs_alloc_path();
5795         if (!path) {
5796                 fprintf(stderr, "Error allocating path\n");
5797                 return -ENOMEM;
5798         }
5799
5800         key.objectid = bytenr;
5801         key.type = BTRFS_EXTENT_ITEM_KEY;
5802         key.offset = (u64)-1;
5803
5804 again:
5805         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5806                                 0, 0);
5807         if (ret < 0) {
5808                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5809                 btrfs_free_path(path);
5810                 return ret;
5811         } else if (ret) {
5812                 if (path->slots[0] > 0) {
5813                         path->slots[0]--;
5814                 } else {
5815                         ret = btrfs_prev_leaf(root, path);
5816                         if (ret < 0) {
5817                                 goto out;
5818                         } else if (ret > 0) {
5819                                 ret = 0;
5820                                 goto out;
5821                         }
5822                 }
5823         }
5824
5825         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5826
5827         /*
5828          * Block group items come before extent items if they have the same
5829          * bytenr, so walk back one more just in case.  Dear future traveller,
5830          * first congrats on mastering time travel.  Now if it's not too much
5831          * trouble could you go back to 2006 and tell Chris to make the
5832          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5833          * EXTENT_ITEM_KEY please?
5834          */
5835         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5836                 if (path->slots[0] > 0) {
5837                         path->slots[0]--;
5838                 } else {
5839                         ret = btrfs_prev_leaf(root, path);
5840                         if (ret < 0) {
5841                                 goto out;
5842                         } else if (ret > 0) {
5843                                 ret = 0;
5844                                 goto out;
5845                         }
5846                 }
5847                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5848         }
5849
5850         while (num_bytes) {
5851                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5852                         ret = btrfs_next_leaf(root, path);
5853                         if (ret < 0) {
5854                                 fprintf(stderr, "Error going to next leaf "
5855                                         "%d\n", ret);
5856                                 btrfs_free_path(path);
5857                                 return ret;
5858                         } else if (ret) {
5859                                 break;
5860                         }
5861                 }
5862                 leaf = path->nodes[0];
5863                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5864                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5865                         path->slots[0]++;
5866                         continue;
5867                 }
5868                 if (key.objectid + key.offset < bytenr) {
5869                         path->slots[0]++;
5870                         continue;
5871                 }
5872                 if (key.objectid > bytenr + num_bytes)
5873                         break;
5874
5875                 if (key.objectid == bytenr) {
5876                         if (key.offset >= num_bytes) {
5877                                 num_bytes = 0;
5878                                 break;
5879                         }
5880                         num_bytes -= key.offset;
5881                         bytenr += key.offset;
5882                 } else if (key.objectid < bytenr) {
5883                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5884                                 num_bytes = 0;
5885                                 break;
5886                         }
5887                         num_bytes = (bytenr + num_bytes) -
5888                                 (key.objectid + key.offset);
5889                         bytenr = key.objectid + key.offset;
5890                 } else {
5891                         if (key.objectid + key.offset < bytenr + num_bytes) {
5892                                 u64 new_start = key.objectid + key.offset;
5893                                 u64 new_bytes = bytenr + num_bytes - new_start;
5894
5895                                 /*
5896                                  * Weird case, the extent is in the middle of
5897                                  * our range, we'll have to search one side
5898                                  * and then the other.  Not sure if this happens
5899                                  * in real life, but no harm in coding it up
5900                                  * anyway just in case.
5901                                  */
5902                                 btrfs_release_path(path);
5903                                 ret = check_extent_exists(root, new_start,
5904                                                           new_bytes);
5905                                 if (ret) {
5906                                         fprintf(stderr, "Right section didn't "
5907                                                 "have a record\n");
5908                                         break;
5909                                 }
5910                                 num_bytes = key.objectid - bytenr;
5911                                 goto again;
5912                         }
5913                         num_bytes = key.objectid - bytenr;
5914                 }
5915                 path->slots[0]++;
5916         }
5917         ret = 0;
5918
5919 out:
5920         if (num_bytes && !ret) {
5921                 fprintf(stderr, "There are no extents for csum range "
5922                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5923                 ret = 1;
5924         }
5925
5926         btrfs_free_path(path);
5927         return ret;
5928 }
5929
5930 static int check_csums(struct btrfs_root *root)
5931 {
5932         struct btrfs_path *path;
5933         struct extent_buffer *leaf;
5934         struct btrfs_key key;
5935         u64 offset = 0, num_bytes = 0;
5936         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5937         int errors = 0;
5938         int ret;
5939         u64 data_len;
5940         unsigned long leaf_offset;
5941
5942         root = root->fs_info->csum_root;
5943         if (!extent_buffer_uptodate(root->node)) {
5944                 fprintf(stderr, "No valid csum tree found\n");
5945                 return -ENOENT;
5946         }
5947
5948         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5949         key.type = BTRFS_EXTENT_CSUM_KEY;
5950         key.offset = 0;
5951
5952         path = btrfs_alloc_path();
5953         if (!path)
5954                 return -ENOMEM;
5955
5956         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5957         if (ret < 0) {
5958                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5959                 btrfs_free_path(path);
5960                 return ret;
5961         }
5962
5963         if (ret > 0 && path->slots[0])
5964                 path->slots[0]--;
5965         ret = 0;
5966
5967         while (1) {
5968                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5969                         ret = btrfs_next_leaf(root, path);
5970                         if (ret < 0) {
5971                                 fprintf(stderr, "Error going to next leaf "
5972                                         "%d\n", ret);
5973                                 break;
5974                         }
5975                         if (ret)
5976                                 break;
5977                 }
5978                 leaf = path->nodes[0];
5979
5980                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5981                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5982                         path->slots[0]++;
5983                         continue;
5984                 }
5985
5986                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5987                               csum_size) * root->sectorsize;
5988                 if (!check_data_csum)
5989                         goto skip_csum_check;
5990                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5991                 ret = check_extent_csums(root, key.offset, data_len,
5992                                          leaf_offset, leaf);
5993                 if (ret)
5994                         break;
5995 skip_csum_check:
5996                 if (!num_bytes) {
5997                         offset = key.offset;
5998                 } else if (key.offset != offset + num_bytes) {
5999                         ret = check_extent_exists(root, offset, num_bytes);
6000                         if (ret) {
6001                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6002                                         "there is no extent record\n",
6003                                         offset, offset+num_bytes);
6004                                 errors++;
6005                         }
6006                         offset = key.offset;
6007                         num_bytes = 0;
6008                 }
6009                 num_bytes += data_len;
6010                 path->slots[0]++;
6011         }
6012
6013         btrfs_free_path(path);
6014         return errors;
6015 }
6016
6017 static int is_dropped_key(struct btrfs_key *key,
6018                           struct btrfs_key *drop_key) {
6019         if (key->objectid < drop_key->objectid)
6020                 return 1;
6021         else if (key->objectid == drop_key->objectid) {
6022                 if (key->type < drop_key->type)
6023                         return 1;
6024                 else if (key->type == drop_key->type) {
6025                         if (key->offset < drop_key->offset)
6026                                 return 1;
6027                 }
6028         }
6029         return 0;
6030 }
6031
6032 /*
6033  * Here are the rules for FULL_BACKREF.
6034  *
6035  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6036  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6037  *      FULL_BACKREF set.
6038  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6039  *    if it happened after the relocation occurred since we'll have dropped the
6040  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6041  *    have no real way to know for sure.
6042  *
6043  * We process the blocks one root at a time, and we start from the lowest root
6044  * objectid and go to the highest.  So we can just lookup the owner backref for
6045  * the record and if we don't find it then we know it doesn't exist and we have
6046  * a FULL BACKREF.
6047  *
6048  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6049  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6050  * be set or not and then we can check later once we've gathered all the refs.
6051  */
6052 static int calc_extent_flag(struct btrfs_root *root,
6053                            struct cache_tree *extent_cache,
6054                            struct extent_buffer *buf,
6055                            struct root_item_record *ri,
6056                            u64 *flags)
6057 {
6058         struct extent_record *rec;
6059         struct cache_extent *cache;
6060         struct tree_backref *tback;
6061         u64 owner = 0;
6062
6063         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6064         /* we have added this extent before */
6065         BUG_ON(!cache);
6066         rec = container_of(cache, struct extent_record, cache);
6067
6068         /*
6069          * Except file/reloc tree, we can not have
6070          * FULL BACKREF MODE
6071          */
6072         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6073                 goto normal;
6074         /*
6075          * root node
6076          */
6077         if (buf->start == ri->bytenr)
6078                 goto normal;
6079
6080         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6081                 goto full_backref;
6082
6083         owner = btrfs_header_owner(buf);
6084         if (owner == ri->objectid)
6085                 goto normal;
6086
6087         tback = find_tree_backref(rec, 0, owner);
6088         if (!tback)
6089                 goto full_backref;
6090 normal:
6091         *flags = 0;
6092         if (rec->flag_block_full_backref != FLAG_UNSET &&
6093             rec->flag_block_full_backref != 0)
6094                 rec->bad_full_backref = 1;
6095         return 0;
6096 full_backref:
6097         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6098         if (rec->flag_block_full_backref != FLAG_UNSET &&
6099             rec->flag_block_full_backref != 1)
6100                 rec->bad_full_backref = 1;
6101         return 0;
6102 }
6103
6104 static int run_next_block(struct btrfs_root *root,
6105                           struct block_info *bits,
6106                           int bits_nr,
6107                           u64 *last,
6108                           struct cache_tree *pending,
6109                           struct cache_tree *seen,
6110                           struct cache_tree *reada,
6111                           struct cache_tree *nodes,
6112                           struct cache_tree *extent_cache,
6113                           struct cache_tree *chunk_cache,
6114                           struct rb_root *dev_cache,
6115                           struct block_group_tree *block_group_cache,
6116                           struct device_extent_tree *dev_extent_cache,
6117                           struct root_item_record *ri)
6118 {
6119         struct extent_buffer *buf;
6120         struct extent_record *rec = NULL;
6121         u64 bytenr;
6122         u32 size;
6123         u64 parent;
6124         u64 owner;
6125         u64 flags;
6126         u64 ptr;
6127         u64 gen = 0;
6128         int ret = 0;
6129         int i;
6130         int nritems;
6131         struct btrfs_key key;
6132         struct cache_extent *cache;
6133         int reada_bits;
6134
6135         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6136                                     bits_nr, &reada_bits);
6137         if (nritems == 0)
6138                 return 1;
6139
6140         if (!reada_bits) {
6141                 for(i = 0; i < nritems; i++) {
6142                         ret = add_cache_extent(reada, bits[i].start,
6143                                                bits[i].size);
6144                         if (ret == -EEXIST)
6145                                 continue;
6146
6147                         /* fixme, get the parent transid */
6148                         readahead_tree_block(root, bits[i].start,
6149                                              bits[i].size, 0);
6150                 }
6151         }
6152         *last = bits[0].start;
6153         bytenr = bits[0].start;
6154         size = bits[0].size;
6155
6156         cache = lookup_cache_extent(pending, bytenr, size);
6157         if (cache) {
6158                 remove_cache_extent(pending, cache);
6159                 free(cache);
6160         }
6161         cache = lookup_cache_extent(reada, bytenr, size);
6162         if (cache) {
6163                 remove_cache_extent(reada, cache);
6164                 free(cache);
6165         }
6166         cache = lookup_cache_extent(nodes, bytenr, size);
6167         if (cache) {
6168                 remove_cache_extent(nodes, cache);
6169                 free(cache);
6170         }
6171         cache = lookup_cache_extent(extent_cache, bytenr, size);
6172         if (cache) {
6173                 rec = container_of(cache, struct extent_record, cache);
6174                 gen = rec->parent_generation;
6175         }
6176
6177         /* fixme, get the real parent transid */
6178         buf = read_tree_block(root, bytenr, size, gen);
6179         if (!extent_buffer_uptodate(buf)) {
6180                 record_bad_block_io(root->fs_info,
6181                                     extent_cache, bytenr, size);
6182                 goto out;
6183         }
6184
6185         nritems = btrfs_header_nritems(buf);
6186
6187         flags = 0;
6188         if (!init_extent_tree) {
6189                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6190                                        btrfs_header_level(buf), 1, NULL,
6191                                        &flags);
6192                 if (ret < 0) {
6193                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6194                         if (ret < 0) {
6195                                 fprintf(stderr, "Couldn't calc extent flags\n");
6196                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6197                         }
6198                 }
6199         } else {
6200                 flags = 0;
6201                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6202                 if (ret < 0) {
6203                         fprintf(stderr, "Couldn't calc extent flags\n");
6204                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6205                 }
6206         }
6207
6208         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6209                 if (ri != NULL &&
6210                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6211                     ri->objectid == btrfs_header_owner(buf)) {
6212                         /*
6213                          * Ok we got to this block from it's original owner and
6214                          * we have FULL_BACKREF set.  Relocation can leave
6215                          * converted blocks over so this is altogether possible,
6216                          * however it's not possible if the generation > the
6217                          * last snapshot, so check for this case.
6218                          */
6219                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6220                             btrfs_header_generation(buf) > ri->last_snapshot) {
6221                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6222                                 rec->bad_full_backref = 1;
6223                         }
6224                 }
6225         } else {
6226                 if (ri != NULL &&
6227                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6228                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6229                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6230                         rec->bad_full_backref = 1;
6231                 }
6232         }
6233
6234         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6235                 rec->flag_block_full_backref = 1;
6236                 parent = bytenr;
6237                 owner = 0;
6238         } else {
6239                 rec->flag_block_full_backref = 0;
6240                 parent = 0;
6241                 owner = btrfs_header_owner(buf);
6242         }
6243
6244         ret = check_block(root, extent_cache, buf, flags);
6245         if (ret)
6246                 goto out;
6247
6248         if (btrfs_is_leaf(buf)) {
6249                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6250                 for (i = 0; i < nritems; i++) {
6251                         struct btrfs_file_extent_item *fi;
6252                         btrfs_item_key_to_cpu(buf, &key, i);
6253                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6254                                 process_extent_item(root, extent_cache, buf,
6255                                                     i);
6256                                 continue;
6257                         }
6258                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6259                                 process_extent_item(root, extent_cache, buf,
6260                                                     i);
6261                                 continue;
6262                         }
6263                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6264                                 total_csum_bytes +=
6265                                         btrfs_item_size_nr(buf, i);
6266                                 continue;
6267                         }
6268                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6269                                 process_chunk_item(chunk_cache, &key, buf, i);
6270                                 continue;
6271                         }
6272                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6273                                 process_device_item(dev_cache, &key, buf, i);
6274                                 continue;
6275                         }
6276                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6277                                 process_block_group_item(block_group_cache,
6278                                         &key, buf, i);
6279                                 continue;
6280                         }
6281                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6282                                 process_device_extent_item(dev_extent_cache,
6283                                         &key, buf, i);
6284                                 continue;
6285
6286                         }
6287                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6288 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6289                                 process_extent_ref_v0(extent_cache, buf, i);
6290 #else
6291                                 BUG();
6292 #endif
6293                                 continue;
6294                         }
6295
6296                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6297                                 add_tree_backref(extent_cache, key.objectid, 0,
6298                                                  key.offset, 0);
6299                                 continue;
6300                         }
6301                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6302                                 add_tree_backref(extent_cache, key.objectid,
6303                                                  key.offset, 0, 0);
6304                                 continue;
6305                         }
6306                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6307                                 struct btrfs_extent_data_ref *ref;
6308                                 ref = btrfs_item_ptr(buf, i,
6309                                                 struct btrfs_extent_data_ref);
6310                                 add_data_backref(extent_cache,
6311                                         key.objectid, 0,
6312                                         btrfs_extent_data_ref_root(buf, ref),
6313                                         btrfs_extent_data_ref_objectid(buf,
6314                                                                        ref),
6315                                         btrfs_extent_data_ref_offset(buf, ref),
6316                                         btrfs_extent_data_ref_count(buf, ref),
6317                                         0, root->sectorsize);
6318                                 continue;
6319                         }
6320                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6321                                 struct btrfs_shared_data_ref *ref;
6322                                 ref = btrfs_item_ptr(buf, i,
6323                                                 struct btrfs_shared_data_ref);
6324                                 add_data_backref(extent_cache,
6325                                         key.objectid, key.offset, 0, 0, 0,
6326                                         btrfs_shared_data_ref_count(buf, ref),
6327                                         0, root->sectorsize);
6328                                 continue;
6329                         }
6330                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6331                                 struct bad_item *bad;
6332
6333                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6334                                         continue;
6335                                 if (!owner)
6336                                         continue;
6337                                 bad = malloc(sizeof(struct bad_item));
6338                                 if (!bad)
6339                                         continue;
6340                                 INIT_LIST_HEAD(&bad->list);
6341                                 memcpy(&bad->key, &key,
6342                                        sizeof(struct btrfs_key));
6343                                 bad->root_id = owner;
6344                                 list_add_tail(&bad->list, &delete_items);
6345                                 continue;
6346                         }
6347                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6348                                 continue;
6349                         fi = btrfs_item_ptr(buf, i,
6350                                             struct btrfs_file_extent_item);
6351                         if (btrfs_file_extent_type(buf, fi) ==
6352                             BTRFS_FILE_EXTENT_INLINE)
6353                                 continue;
6354                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6355                                 continue;
6356
6357                         data_bytes_allocated +=
6358                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6359                         if (data_bytes_allocated < root->sectorsize) {
6360                                 abort();
6361                         }
6362                         data_bytes_referenced +=
6363                                 btrfs_file_extent_num_bytes(buf, fi);
6364                         add_data_backref(extent_cache,
6365                                 btrfs_file_extent_disk_bytenr(buf, fi),
6366                                 parent, owner, key.objectid, key.offset -
6367                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6368                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6369                 }
6370         } else {
6371                 int level;
6372                 struct btrfs_key first_key;
6373
6374                 first_key.objectid = 0;
6375
6376                 if (nritems > 0)
6377                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6378                 level = btrfs_header_level(buf);
6379                 for (i = 0; i < nritems; i++) {
6380                         struct extent_record tmpl;
6381
6382                         ptr = btrfs_node_blockptr(buf, i);
6383                         size = root->nodesize;
6384                         btrfs_node_key_to_cpu(buf, &key, i);
6385                         if (ri != NULL) {
6386                                 if ((level == ri->drop_level)
6387                                     && is_dropped_key(&key, &ri->drop_key)) {
6388                                         continue;
6389                                 }
6390                         }
6391
6392                         memset(&tmpl, 0, sizeof(tmpl));
6393                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6394                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6395                         tmpl.start = ptr;
6396                         tmpl.nr = size;
6397                         tmpl.refs = 1;
6398                         tmpl.metadata = 1;
6399                         tmpl.max_size = size;
6400                         ret = add_extent_rec(extent_cache, &tmpl);
6401                         BUG_ON(ret);
6402
6403                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6404
6405                         if (level > 1) {
6406                                 add_pending(nodes, seen, ptr, size);
6407                         } else {
6408                                 add_pending(pending, seen, ptr, size);
6409                         }
6410                 }
6411                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6412                                       nritems) * sizeof(struct btrfs_key_ptr);
6413         }
6414         total_btree_bytes += buf->len;
6415         if (fs_root_objectid(btrfs_header_owner(buf)))
6416                 total_fs_tree_bytes += buf->len;
6417         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6418                 total_extent_tree_bytes += buf->len;
6419         if (!found_old_backref &&
6420             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6421             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6422             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6423                 found_old_backref = 1;
6424 out:
6425         free_extent_buffer(buf);
6426         return ret;
6427 }
6428
6429 static int add_root_to_pending(struct extent_buffer *buf,
6430                                struct cache_tree *extent_cache,
6431                                struct cache_tree *pending,
6432                                struct cache_tree *seen,
6433                                struct cache_tree *nodes,
6434                                u64 objectid)
6435 {
6436         struct extent_record tmpl;
6437
6438         if (btrfs_header_level(buf) > 0)
6439                 add_pending(nodes, seen, buf->start, buf->len);
6440         else
6441                 add_pending(pending, seen, buf->start, buf->len);
6442
6443         memset(&tmpl, 0, sizeof(tmpl));
6444         tmpl.start = buf->start;
6445         tmpl.nr = buf->len;
6446         tmpl.is_root = 1;
6447         tmpl.refs = 1;
6448         tmpl.metadata = 1;
6449         tmpl.max_size = buf->len;
6450         add_extent_rec(extent_cache, &tmpl);
6451
6452         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6453             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6454                 add_tree_backref(extent_cache, buf->start, buf->start,
6455                                  0, 1);
6456         else
6457                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6458         return 0;
6459 }
6460
6461 /* as we fix the tree, we might be deleting blocks that
6462  * we're tracking for repair.  This hook makes sure we
6463  * remove any backrefs for blocks as we are fixing them.
6464  */
6465 static int free_extent_hook(struct btrfs_trans_handle *trans,
6466                             struct btrfs_root *root,
6467                             u64 bytenr, u64 num_bytes, u64 parent,
6468                             u64 root_objectid, u64 owner, u64 offset,
6469                             int refs_to_drop)
6470 {
6471         struct extent_record *rec;
6472         struct cache_extent *cache;
6473         int is_data;
6474         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6475
6476         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6477         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6478         if (!cache)
6479                 return 0;
6480
6481         rec = container_of(cache, struct extent_record, cache);
6482         if (is_data) {
6483                 struct data_backref *back;
6484                 back = find_data_backref(rec, parent, root_objectid, owner,
6485                                          offset, 1, bytenr, num_bytes);
6486                 if (!back)
6487                         goto out;
6488                 if (back->node.found_ref) {
6489                         back->found_ref -= refs_to_drop;
6490                         if (rec->refs)
6491                                 rec->refs -= refs_to_drop;
6492                 }
6493                 if (back->node.found_extent_tree) {
6494                         back->num_refs -= refs_to_drop;
6495                         if (rec->extent_item_refs)
6496                                 rec->extent_item_refs -= refs_to_drop;
6497                 }
6498                 if (back->found_ref == 0)
6499                         back->node.found_ref = 0;
6500                 if (back->num_refs == 0)
6501                         back->node.found_extent_tree = 0;
6502
6503                 if (!back->node.found_extent_tree && back->node.found_ref) {
6504                         rb_erase(&back->node.node, &rec->backref_tree);
6505                         free(back);
6506                 }
6507         } else {
6508                 struct tree_backref *back;
6509                 back = find_tree_backref(rec, parent, root_objectid);
6510                 if (!back)
6511                         goto out;
6512                 if (back->node.found_ref) {
6513                         if (rec->refs)
6514                                 rec->refs--;
6515                         back->node.found_ref = 0;
6516                 }
6517                 if (back->node.found_extent_tree) {
6518                         if (rec->extent_item_refs)
6519                                 rec->extent_item_refs--;
6520                         back->node.found_extent_tree = 0;
6521                 }
6522                 if (!back->node.found_extent_tree && back->node.found_ref) {
6523                         rb_erase(&back->node.node, &rec->backref_tree);
6524                         free(back);
6525                 }
6526         }
6527         maybe_free_extent_rec(extent_cache, rec);
6528 out:
6529         return 0;
6530 }
6531
6532 static int delete_extent_records(struct btrfs_trans_handle *trans,
6533                                  struct btrfs_root *root,
6534                                  struct btrfs_path *path,
6535                                  u64 bytenr, u64 new_len)
6536 {
6537         struct btrfs_key key;
6538         struct btrfs_key found_key;
6539         struct extent_buffer *leaf;
6540         int ret;
6541         int slot;
6542
6543
6544         key.objectid = bytenr;
6545         key.type = (u8)-1;
6546         key.offset = (u64)-1;
6547
6548         while(1) {
6549                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6550                                         &key, path, 0, 1);
6551                 if (ret < 0)
6552                         break;
6553
6554                 if (ret > 0) {
6555                         ret = 0;
6556                         if (path->slots[0] == 0)
6557                                 break;
6558                         path->slots[0]--;
6559                 }
6560                 ret = 0;
6561
6562                 leaf = path->nodes[0];
6563                 slot = path->slots[0];
6564
6565                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6566                 if (found_key.objectid != bytenr)
6567                         break;
6568
6569                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6570                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6571                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6572                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6573                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6574                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6575                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6576                         btrfs_release_path(path);
6577                         if (found_key.type == 0) {
6578                                 if (found_key.offset == 0)
6579                                         break;
6580                                 key.offset = found_key.offset - 1;
6581                                 key.type = found_key.type;
6582                         }
6583                         key.type = found_key.type - 1;
6584                         key.offset = (u64)-1;
6585                         continue;
6586                 }
6587
6588                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6589                         found_key.objectid, found_key.type, found_key.offset);
6590
6591                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6592                 if (ret)
6593                         break;
6594                 btrfs_release_path(path);
6595
6596                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6597                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6598                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6599                                 found_key.offset : root->nodesize;
6600
6601                         ret = btrfs_update_block_group(trans, root, bytenr,
6602                                                        bytes, 0, 0);
6603                         if (ret)
6604                                 break;
6605                 }
6606         }
6607
6608         btrfs_release_path(path);
6609         return ret;
6610 }
6611
6612 /*
6613  * for a single backref, this will allocate a new extent
6614  * and add the backref to it.
6615  */
6616 static int record_extent(struct btrfs_trans_handle *trans,
6617                          struct btrfs_fs_info *info,
6618                          struct btrfs_path *path,
6619                          struct extent_record *rec,
6620                          struct extent_backref *back,
6621                          int allocated, u64 flags)
6622 {
6623         int ret;
6624         struct btrfs_root *extent_root = info->extent_root;
6625         struct extent_buffer *leaf;
6626         struct btrfs_key ins_key;
6627         struct btrfs_extent_item *ei;
6628         struct tree_backref *tback;
6629         struct data_backref *dback;
6630         struct btrfs_tree_block_info *bi;
6631
6632         if (!back->is_data)
6633                 rec->max_size = max_t(u64, rec->max_size,
6634                                     info->extent_root->nodesize);
6635
6636         if (!allocated) {
6637                 u32 item_size = sizeof(*ei);
6638
6639                 if (!back->is_data)
6640                         item_size += sizeof(*bi);
6641
6642                 ins_key.objectid = rec->start;
6643                 ins_key.offset = rec->max_size;
6644                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6645
6646                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6647                                         &ins_key, item_size);
6648                 if (ret)
6649                         goto fail;
6650
6651                 leaf = path->nodes[0];
6652                 ei = btrfs_item_ptr(leaf, path->slots[0],
6653                                     struct btrfs_extent_item);
6654
6655                 btrfs_set_extent_refs(leaf, ei, 0);
6656                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6657
6658                 if (back->is_data) {
6659                         btrfs_set_extent_flags(leaf, ei,
6660                                                BTRFS_EXTENT_FLAG_DATA);
6661                 } else {
6662                         struct btrfs_disk_key copy_key;;
6663
6664                         tback = to_tree_backref(back);
6665                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6666                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6667                                              sizeof(*bi));
6668
6669                         btrfs_set_disk_key_objectid(&copy_key,
6670                                                     rec->info_objectid);
6671                         btrfs_set_disk_key_type(&copy_key, 0);
6672                         btrfs_set_disk_key_offset(&copy_key, 0);
6673
6674                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6675                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6676
6677                         btrfs_set_extent_flags(leaf, ei,
6678                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6679                 }
6680
6681                 btrfs_mark_buffer_dirty(leaf);
6682                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6683                                                rec->max_size, 1, 0);
6684                 if (ret)
6685                         goto fail;
6686                 btrfs_release_path(path);
6687         }
6688
6689         if (back->is_data) {
6690                 u64 parent;
6691                 int i;
6692
6693                 dback = to_data_backref(back);
6694                 if (back->full_backref)
6695                         parent = dback->parent;
6696                 else
6697                         parent = 0;
6698
6699                 for (i = 0; i < dback->found_ref; i++) {
6700                         /* if parent != 0, we're doing a full backref
6701                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6702                          * just makes the backref allocator create a data
6703                          * backref
6704                          */
6705                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6706                                                    rec->start, rec->max_size,
6707                                                    parent,
6708                                                    dback->root,
6709                                                    parent ?
6710                                                    BTRFS_FIRST_FREE_OBJECTID :
6711                                                    dback->owner,
6712                                                    dback->offset);
6713                         if (ret)
6714                                 break;
6715                 }
6716                 fprintf(stderr, "adding new data backref"
6717                                 " on %llu %s %llu owner %llu"
6718                                 " offset %llu found %d\n",
6719                                 (unsigned long long)rec->start,
6720                                 back->full_backref ?
6721                                 "parent" : "root",
6722                                 back->full_backref ?
6723                                 (unsigned long long)parent :
6724                                 (unsigned long long)dback->root,
6725                                 (unsigned long long)dback->owner,
6726                                 (unsigned long long)dback->offset,
6727                                 dback->found_ref);
6728         } else {
6729                 u64 parent;
6730
6731                 tback = to_tree_backref(back);
6732                 if (back->full_backref)
6733                         parent = tback->parent;
6734                 else
6735                         parent = 0;
6736
6737                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6738                                            rec->start, rec->max_size,
6739                                            parent, tback->root, 0, 0);
6740                 fprintf(stderr, "adding new tree backref on "
6741                         "start %llu len %llu parent %llu root %llu\n",
6742                         rec->start, rec->max_size, parent, tback->root);
6743         }
6744 fail:
6745         btrfs_release_path(path);
6746         return ret;
6747 }
6748
6749 static struct extent_entry *find_entry(struct list_head *entries,
6750                                        u64 bytenr, u64 bytes)
6751 {
6752         struct extent_entry *entry = NULL;
6753
6754         list_for_each_entry(entry, entries, list) {
6755                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6756                         return entry;
6757         }
6758
6759         return NULL;
6760 }
6761
6762 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6763 {
6764         struct extent_entry *entry, *best = NULL, *prev = NULL;
6765
6766         list_for_each_entry(entry, entries, list) {
6767                 if (!prev) {
6768                         prev = entry;
6769                         continue;
6770                 }
6771
6772                 /*
6773                  * If there are as many broken entries as entries then we know
6774                  * not to trust this particular entry.
6775                  */
6776                 if (entry->broken == entry->count)
6777                         continue;
6778
6779                 /*
6780                  * If our current entry == best then we can't be sure our best
6781                  * is really the best, so we need to keep searching.
6782                  */
6783                 if (best && best->count == entry->count) {
6784                         prev = entry;
6785                         best = NULL;
6786                         continue;
6787                 }
6788
6789                 /* Prev == entry, not good enough, have to keep searching */
6790                 if (!prev->broken && prev->count == entry->count)
6791                         continue;
6792
6793                 if (!best)
6794                         best = (prev->count > entry->count) ? prev : entry;
6795                 else if (best->count < entry->count)
6796                         best = entry;
6797                 prev = entry;
6798         }
6799
6800         return best;
6801 }
6802
6803 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6804                       struct data_backref *dback, struct extent_entry *entry)
6805 {
6806         struct btrfs_trans_handle *trans;
6807         struct btrfs_root *root;
6808         struct btrfs_file_extent_item *fi;
6809         struct extent_buffer *leaf;
6810         struct btrfs_key key;
6811         u64 bytenr, bytes;
6812         int ret, err;
6813
6814         key.objectid = dback->root;
6815         key.type = BTRFS_ROOT_ITEM_KEY;
6816         key.offset = (u64)-1;
6817         root = btrfs_read_fs_root(info, &key);
6818         if (IS_ERR(root)) {
6819                 fprintf(stderr, "Couldn't find root for our ref\n");
6820                 return -EINVAL;
6821         }
6822
6823         /*
6824          * The backref points to the original offset of the extent if it was
6825          * split, so we need to search down to the offset we have and then walk
6826          * forward until we find the backref we're looking for.
6827          */
6828         key.objectid = dback->owner;
6829         key.type = BTRFS_EXTENT_DATA_KEY;
6830         key.offset = dback->offset;
6831         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6832         if (ret < 0) {
6833                 fprintf(stderr, "Error looking up ref %d\n", ret);
6834                 return ret;
6835         }
6836
6837         while (1) {
6838                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6839                         ret = btrfs_next_leaf(root, path);
6840                         if (ret) {
6841                                 fprintf(stderr, "Couldn't find our ref, next\n");
6842                                 return -EINVAL;
6843                         }
6844                 }
6845                 leaf = path->nodes[0];
6846                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6847                 if (key.objectid != dback->owner ||
6848                     key.type != BTRFS_EXTENT_DATA_KEY) {
6849                         fprintf(stderr, "Couldn't find our ref, search\n");
6850                         return -EINVAL;
6851                 }
6852                 fi = btrfs_item_ptr(leaf, path->slots[0],
6853                                     struct btrfs_file_extent_item);
6854                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6855                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6856
6857                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6858                         break;
6859                 path->slots[0]++;
6860         }
6861
6862         btrfs_release_path(path);
6863
6864         trans = btrfs_start_transaction(root, 1);
6865         if (IS_ERR(trans))
6866                 return PTR_ERR(trans);
6867
6868         /*
6869          * Ok we have the key of the file extent we want to fix, now we can cow
6870          * down to the thing and fix it.
6871          */
6872         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6873         if (ret < 0) {
6874                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6875                         key.objectid, key.type, key.offset, ret);
6876                 goto out;
6877         }
6878         if (ret > 0) {
6879                 fprintf(stderr, "Well that's odd, we just found this key "
6880                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6881                         key.offset);
6882                 ret = -EINVAL;
6883                 goto out;
6884         }
6885         leaf = path->nodes[0];
6886         fi = btrfs_item_ptr(leaf, path->slots[0],
6887                             struct btrfs_file_extent_item);
6888
6889         if (btrfs_file_extent_compression(leaf, fi) &&
6890             dback->disk_bytenr != entry->bytenr) {
6891                 fprintf(stderr, "Ref doesn't match the record start and is "
6892                         "compressed, please take a btrfs-image of this file "
6893                         "system and send it to a btrfs developer so they can "
6894                         "complete this functionality for bytenr %Lu\n",
6895                         dback->disk_bytenr);
6896                 ret = -EINVAL;
6897                 goto out;
6898         }
6899
6900         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6901                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6902         } else if (dback->disk_bytenr > entry->bytenr) {
6903                 u64 off_diff, offset;
6904
6905                 off_diff = dback->disk_bytenr - entry->bytenr;
6906                 offset = btrfs_file_extent_offset(leaf, fi);
6907                 if (dback->disk_bytenr + offset +
6908                     btrfs_file_extent_num_bytes(leaf, fi) >
6909                     entry->bytenr + entry->bytes) {
6910                         fprintf(stderr, "Ref is past the entry end, please "
6911                                 "take a btrfs-image of this file system and "
6912                                 "send it to a btrfs developer, ref %Lu\n",
6913                                 dback->disk_bytenr);
6914                         ret = -EINVAL;
6915                         goto out;
6916                 }
6917                 offset += off_diff;
6918                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6919                 btrfs_set_file_extent_offset(leaf, fi, offset);
6920         } else if (dback->disk_bytenr < entry->bytenr) {
6921                 u64 offset;
6922
6923                 offset = btrfs_file_extent_offset(leaf, fi);
6924                 if (dback->disk_bytenr + offset < entry->bytenr) {
6925                         fprintf(stderr, "Ref is before the entry start, please"
6926                                 " take a btrfs-image of this file system and "
6927                                 "send it to a btrfs developer, ref %Lu\n",
6928                                 dback->disk_bytenr);
6929                         ret = -EINVAL;
6930                         goto out;
6931                 }
6932
6933                 offset += dback->disk_bytenr;
6934                 offset -= entry->bytenr;
6935                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6936                 btrfs_set_file_extent_offset(leaf, fi, offset);
6937         }
6938
6939         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6940
6941         /*
6942          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6943          * only do this if we aren't using compression, otherwise it's a
6944          * trickier case.
6945          */
6946         if (!btrfs_file_extent_compression(leaf, fi))
6947                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6948         else
6949                 printf("ram bytes may be wrong?\n");
6950         btrfs_mark_buffer_dirty(leaf);
6951 out:
6952         err = btrfs_commit_transaction(trans, root);
6953         btrfs_release_path(path);
6954         return ret ? ret : err;
6955 }
6956
6957 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6958                            struct extent_record *rec)
6959 {
6960         struct extent_backref *back, *tmp;
6961         struct data_backref *dback;
6962         struct extent_entry *entry, *best = NULL;
6963         LIST_HEAD(entries);
6964         int nr_entries = 0;
6965         int broken_entries = 0;
6966         int ret = 0;
6967         short mismatch = 0;
6968
6969         /*
6970          * Metadata is easy and the backrefs should always agree on bytenr and
6971          * size, if not we've got bigger issues.
6972          */
6973         if (rec->metadata)
6974                 return 0;
6975
6976         rbtree_postorder_for_each_entry_safe(back, tmp,
6977                                              &rec->backref_tree, node) {
6978                 if (back->full_backref || !back->is_data)
6979                         continue;
6980
6981                 dback = to_data_backref(back);
6982
6983                 /*
6984                  * We only pay attention to backrefs that we found a real
6985                  * backref for.
6986                  */
6987                 if (dback->found_ref == 0)
6988                         continue;
6989
6990                 /*
6991                  * For now we only catch when the bytes don't match, not the
6992                  * bytenr.  We can easily do this at the same time, but I want
6993                  * to have a fs image to test on before we just add repair
6994                  * functionality willy-nilly so we know we won't screw up the
6995                  * repair.
6996                  */
6997
6998                 entry = find_entry(&entries, dback->disk_bytenr,
6999                                    dback->bytes);
7000                 if (!entry) {
7001                         entry = malloc(sizeof(struct extent_entry));
7002                         if (!entry) {
7003                                 ret = -ENOMEM;
7004                                 goto out;
7005                         }
7006                         memset(entry, 0, sizeof(*entry));
7007                         entry->bytenr = dback->disk_bytenr;
7008                         entry->bytes = dback->bytes;
7009                         list_add_tail(&entry->list, &entries);
7010                         nr_entries++;
7011                 }
7012
7013                 /*
7014                  * If we only have on entry we may think the entries agree when
7015                  * in reality they don't so we have to do some extra checking.
7016                  */
7017                 if (dback->disk_bytenr != rec->start ||
7018                     dback->bytes != rec->nr || back->broken)
7019                         mismatch = 1;
7020
7021                 if (back->broken) {
7022                         entry->broken++;
7023                         broken_entries++;
7024                 }
7025
7026                 entry->count++;
7027         }
7028
7029         /* Yay all the backrefs agree, carry on good sir */
7030         if (nr_entries <= 1 && !mismatch)
7031                 goto out;
7032
7033         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7034                 "%Lu\n", rec->start);
7035
7036         /*
7037          * First we want to see if the backrefs can agree amongst themselves who
7038          * is right, so figure out which one of the entries has the highest
7039          * count.
7040          */
7041         best = find_most_right_entry(&entries);
7042
7043         /*
7044          * Ok so we may have an even split between what the backrefs think, so
7045          * this is where we use the extent ref to see what it thinks.
7046          */
7047         if (!best) {
7048                 entry = find_entry(&entries, rec->start, rec->nr);
7049                 if (!entry && (!broken_entries || !rec->found_rec)) {
7050                         fprintf(stderr, "Backrefs don't agree with each other "
7051                                 "and extent record doesn't agree with anybody,"
7052                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7053                                 rec->start, rec->nr);
7054                         ret = -EINVAL;
7055                         goto out;
7056                 } else if (!entry) {
7057                         /*
7058                          * Ok our backrefs were broken, we'll assume this is the
7059                          * correct value and add an entry for this range.
7060                          */
7061                         entry = malloc(sizeof(struct extent_entry));
7062                         if (!entry) {
7063                                 ret = -ENOMEM;
7064                                 goto out;
7065                         }
7066                         memset(entry, 0, sizeof(*entry));
7067                         entry->bytenr = rec->start;
7068                         entry->bytes = rec->nr;
7069                         list_add_tail(&entry->list, &entries);
7070                         nr_entries++;
7071                 }
7072                 entry->count++;
7073                 best = find_most_right_entry(&entries);
7074                 if (!best) {
7075                         fprintf(stderr, "Backrefs and extent record evenly "
7076                                 "split on who is right, this is going to "
7077                                 "require user input to fix bytenr %Lu bytes "
7078                                 "%Lu\n", rec->start, rec->nr);
7079                         ret = -EINVAL;
7080                         goto out;
7081                 }
7082         }
7083
7084         /*
7085          * I don't think this can happen currently as we'll abort() if we catch
7086          * this case higher up, but in case somebody removes that we still can't
7087          * deal with it properly here yet, so just bail out of that's the case.
7088          */
7089         if (best->bytenr != rec->start) {
7090                 fprintf(stderr, "Extent start and backref starts don't match, "
7091                         "please use btrfs-image on this file system and send "
7092                         "it to a btrfs developer so they can make fsck fix "
7093                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7094                         rec->start, rec->nr);
7095                 ret = -EINVAL;
7096                 goto out;
7097         }
7098
7099         /*
7100          * Ok great we all agreed on an extent record, let's go find the real
7101          * references and fix up the ones that don't match.
7102          */
7103         rbtree_postorder_for_each_entry_safe(back, tmp,
7104                                              &rec->backref_tree, node) {
7105                 if (back->full_backref || !back->is_data)
7106                         continue;
7107
7108                 dback = to_data_backref(back);
7109
7110                 /*
7111                  * Still ignoring backrefs that don't have a real ref attached
7112                  * to them.
7113                  */
7114                 if (dback->found_ref == 0)
7115                         continue;
7116
7117                 if (dback->bytes == best->bytes &&
7118                     dback->disk_bytenr == best->bytenr)
7119                         continue;
7120
7121                 ret = repair_ref(info, path, dback, best);
7122                 if (ret)
7123                         goto out;
7124         }
7125
7126         /*
7127          * Ok we messed with the actual refs, which means we need to drop our
7128          * entire cache and go back and rescan.  I know this is a huge pain and
7129          * adds a lot of extra work, but it's the only way to be safe.  Once all
7130          * the backrefs agree we may not need to do anything to the extent
7131          * record itself.
7132          */
7133         ret = -EAGAIN;
7134 out:
7135         while (!list_empty(&entries)) {
7136                 entry = list_entry(entries.next, struct extent_entry, list);
7137                 list_del_init(&entry->list);
7138                 free(entry);
7139         }
7140         return ret;
7141 }
7142
7143 static int process_duplicates(struct btrfs_root *root,
7144                               struct cache_tree *extent_cache,
7145                               struct extent_record *rec)
7146 {
7147         struct extent_record *good, *tmp;
7148         struct cache_extent *cache;
7149         int ret;
7150
7151         /*
7152          * If we found a extent record for this extent then return, or if we
7153          * have more than one duplicate we are likely going to need to delete
7154          * something.
7155          */
7156         if (rec->found_rec || rec->num_duplicates > 1)
7157                 return 0;
7158
7159         /* Shouldn't happen but just in case */
7160         BUG_ON(!rec->num_duplicates);
7161
7162         /*
7163          * So this happens if we end up with a backref that doesn't match the
7164          * actual extent entry.  So either the backref is bad or the extent
7165          * entry is bad.  Either way we want to have the extent_record actually
7166          * reflect what we found in the extent_tree, so we need to take the
7167          * duplicate out and use that as the extent_record since the only way we
7168          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7169          */
7170         remove_cache_extent(extent_cache, &rec->cache);
7171
7172         good = to_extent_record(rec->dups.next);
7173         list_del_init(&good->list);
7174         INIT_LIST_HEAD(&good->backrefs);
7175         INIT_LIST_HEAD(&good->dups);
7176         good->cache.start = good->start;
7177         good->cache.size = good->nr;
7178         good->content_checked = 0;
7179         good->owner_ref_checked = 0;
7180         good->num_duplicates = 0;
7181         good->refs = rec->refs;
7182         list_splice_init(&rec->backrefs, &good->backrefs);
7183         while (1) {
7184                 cache = lookup_cache_extent(extent_cache, good->start,
7185                                             good->nr);
7186                 if (!cache)
7187                         break;
7188                 tmp = container_of(cache, struct extent_record, cache);
7189
7190                 /*
7191                  * If we find another overlapping extent and it's found_rec is
7192                  * set then it's a duplicate and we need to try and delete
7193                  * something.
7194                  */
7195                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7196                         if (list_empty(&good->list))
7197                                 list_add_tail(&good->list,
7198                                               &duplicate_extents);
7199                         good->num_duplicates += tmp->num_duplicates + 1;
7200                         list_splice_init(&tmp->dups, &good->dups);
7201                         list_del_init(&tmp->list);
7202                         list_add_tail(&tmp->list, &good->dups);
7203                         remove_cache_extent(extent_cache, &tmp->cache);
7204                         continue;
7205                 }
7206
7207                 /*
7208                  * Ok we have another non extent item backed extent rec, so lets
7209                  * just add it to this extent and carry on like we did above.
7210                  */
7211                 good->refs += tmp->refs;
7212                 list_splice_init(&tmp->backrefs, &good->backrefs);
7213                 remove_cache_extent(extent_cache, &tmp->cache);
7214                 free(tmp);
7215         }
7216         ret = insert_cache_extent(extent_cache, &good->cache);
7217         BUG_ON(ret);
7218         free(rec);
7219         return good->num_duplicates ? 0 : 1;
7220 }
7221
7222 static int delete_duplicate_records(struct btrfs_root *root,
7223                                     struct extent_record *rec)
7224 {
7225         struct btrfs_trans_handle *trans;
7226         LIST_HEAD(delete_list);
7227         struct btrfs_path *path;
7228         struct extent_record *tmp, *good, *n;
7229         int nr_del = 0;
7230         int ret = 0, err;
7231         struct btrfs_key key;
7232
7233         path = btrfs_alloc_path();
7234         if (!path) {
7235                 ret = -ENOMEM;
7236                 goto out;
7237         }
7238
7239         good = rec;
7240         /* Find the record that covers all of the duplicates. */
7241         list_for_each_entry(tmp, &rec->dups, list) {
7242                 if (good->start < tmp->start)
7243                         continue;
7244                 if (good->nr > tmp->nr)
7245                         continue;
7246
7247                 if (tmp->start + tmp->nr < good->start + good->nr) {
7248                         fprintf(stderr, "Ok we have overlapping extents that "
7249                                 "aren't completely covered by each other, this "
7250                                 "is going to require more careful thought.  "
7251                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7252                                 tmp->start, tmp->nr, good->start, good->nr);
7253                         abort();
7254                 }
7255                 good = tmp;
7256         }
7257
7258         if (good != rec)
7259                 list_add_tail(&rec->list, &delete_list);
7260
7261         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7262                 if (tmp == good)
7263                         continue;
7264                 list_move_tail(&tmp->list, &delete_list);
7265         }
7266
7267         root = root->fs_info->extent_root;
7268         trans = btrfs_start_transaction(root, 1);
7269         if (IS_ERR(trans)) {
7270                 ret = PTR_ERR(trans);
7271                 goto out;
7272         }
7273
7274         list_for_each_entry(tmp, &delete_list, list) {
7275                 if (tmp->found_rec == 0)
7276                         continue;
7277                 key.objectid = tmp->start;
7278                 key.type = BTRFS_EXTENT_ITEM_KEY;
7279                 key.offset = tmp->nr;
7280
7281                 /* Shouldn't happen but just in case */
7282                 if (tmp->metadata) {
7283                         fprintf(stderr, "Well this shouldn't happen, extent "
7284                                 "record overlaps but is metadata? "
7285                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7286                         abort();
7287                 }
7288
7289                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7290                 if (ret) {
7291                         if (ret > 0)
7292                                 ret = -EINVAL;
7293                         break;
7294                 }
7295                 ret = btrfs_del_item(trans, root, path);
7296                 if (ret)
7297                         break;
7298                 btrfs_release_path(path);
7299                 nr_del++;
7300         }
7301         err = btrfs_commit_transaction(trans, root);
7302         if (err && !ret)
7303                 ret = err;
7304 out:
7305         while (!list_empty(&delete_list)) {
7306                 tmp = to_extent_record(delete_list.next);
7307                 list_del_init(&tmp->list);
7308                 if (tmp == rec)
7309                         continue;
7310                 free(tmp);
7311         }
7312
7313         while (!list_empty(&rec->dups)) {
7314                 tmp = to_extent_record(rec->dups.next);
7315                 list_del_init(&tmp->list);
7316                 free(tmp);
7317         }
7318
7319         btrfs_free_path(path);
7320
7321         if (!ret && !nr_del)
7322                 rec->num_duplicates = 0;
7323
7324         return ret ? ret : nr_del;
7325 }
7326
7327 static int find_possible_backrefs(struct btrfs_fs_info *info,
7328                                   struct btrfs_path *path,
7329                                   struct cache_tree *extent_cache,
7330                                   struct extent_record *rec)
7331 {
7332         struct btrfs_root *root;
7333         struct extent_backref *back, *tmp;
7334         struct data_backref *dback;
7335         struct cache_extent *cache;
7336         struct btrfs_file_extent_item *fi;
7337         struct btrfs_key key;
7338         u64 bytenr, bytes;
7339         int ret;
7340
7341         rbtree_postorder_for_each_entry_safe(back, tmp,
7342                                              &rec->backref_tree, node) {
7343                 /* Don't care about full backrefs (poor unloved backrefs) */
7344                 if (back->full_backref || !back->is_data)
7345                         continue;
7346
7347                 dback = to_data_backref(back);
7348
7349                 /* We found this one, we don't need to do a lookup */
7350                 if (dback->found_ref)
7351                         continue;
7352
7353                 key.objectid = dback->root;
7354                 key.type = BTRFS_ROOT_ITEM_KEY;
7355                 key.offset = (u64)-1;
7356
7357                 root = btrfs_read_fs_root(info, &key);
7358
7359                 /* No root, definitely a bad ref, skip */
7360                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7361                         continue;
7362                 /* Other err, exit */
7363                 if (IS_ERR(root))
7364                         return PTR_ERR(root);
7365
7366                 key.objectid = dback->owner;
7367                 key.type = BTRFS_EXTENT_DATA_KEY;
7368                 key.offset = dback->offset;
7369                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7370                 if (ret) {
7371                         btrfs_release_path(path);
7372                         if (ret < 0)
7373                                 return ret;
7374                         /* Didn't find it, we can carry on */
7375                         ret = 0;
7376                         continue;
7377                 }
7378
7379                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7380                                     struct btrfs_file_extent_item);
7381                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7382                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7383                 btrfs_release_path(path);
7384                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7385                 if (cache) {
7386                         struct extent_record *tmp;
7387                         tmp = container_of(cache, struct extent_record, cache);
7388
7389                         /*
7390                          * If we found an extent record for the bytenr for this
7391                          * particular backref then we can't add it to our
7392                          * current extent record.  We only want to add backrefs
7393                          * that don't have a corresponding extent item in the
7394                          * extent tree since they likely belong to this record
7395                          * and we need to fix it if it doesn't match bytenrs.
7396                          */
7397                         if  (tmp->found_rec)
7398                                 continue;
7399                 }
7400
7401                 dback->found_ref += 1;
7402                 dback->disk_bytenr = bytenr;
7403                 dback->bytes = bytes;
7404
7405                 /*
7406                  * Set this so the verify backref code knows not to trust the
7407                  * values in this backref.
7408                  */
7409                 back->broken = 1;
7410         }
7411
7412         return 0;
7413 }
7414
7415 /*
7416  * Record orphan data ref into corresponding root.
7417  *
7418  * Return 0 if the extent item contains data ref and recorded.
7419  * Return 1 if the extent item contains no useful data ref
7420  *   On that case, it may contains only shared_dataref or metadata backref
7421  *   or the file extent exists(this should be handled by the extent bytenr
7422  *   recovery routine)
7423  * Return <0 if something goes wrong.
7424  */
7425 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7426                                       struct extent_record *rec)
7427 {
7428         struct btrfs_key key;
7429         struct btrfs_root *dest_root;
7430         struct extent_backref *back, *tmp;
7431         struct data_backref *dback;
7432         struct orphan_data_extent *orphan;
7433         struct btrfs_path *path;
7434         int recorded_data_ref = 0;
7435         int ret = 0;
7436
7437         if (rec->metadata)
7438                 return 1;
7439         path = btrfs_alloc_path();
7440         if (!path)
7441                 return -ENOMEM;
7442         rbtree_postorder_for_each_entry_safe(back, tmp,
7443                                              &rec->backref_tree, node) {
7444                 if (back->full_backref || !back->is_data ||
7445                     !back->found_extent_tree)
7446                         continue;
7447                 dback = to_data_backref(back);
7448                 if (dback->found_ref)
7449                         continue;
7450                 key.objectid = dback->root;
7451                 key.type = BTRFS_ROOT_ITEM_KEY;
7452                 key.offset = (u64)-1;
7453
7454                 dest_root = btrfs_read_fs_root(fs_info, &key);
7455
7456                 /* For non-exist root we just skip it */
7457                 if (IS_ERR(dest_root) || !dest_root)
7458                         continue;
7459
7460                 key.objectid = dback->owner;
7461                 key.type = BTRFS_EXTENT_DATA_KEY;
7462                 key.offset = dback->offset;
7463
7464                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7465                 /*
7466                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7467                  * we need to record it for inode/file extent rebuild.
7468                  * For ret > 0, we record it only for file extent rebuild.
7469                  * For ret == 0, the file extent exists but only bytenr
7470                  * mismatch, let the original bytenr fix routine to handle,
7471                  * don't record it.
7472                  */
7473                 if (ret == 0)
7474                         continue;
7475                 ret = 0;
7476                 orphan = malloc(sizeof(*orphan));
7477                 if (!orphan) {
7478                         ret = -ENOMEM;
7479                         goto out;
7480                 }
7481                 INIT_LIST_HEAD(&orphan->list);
7482                 orphan->root = dback->root;
7483                 orphan->objectid = dback->owner;
7484                 orphan->offset = dback->offset;
7485                 orphan->disk_bytenr = rec->cache.start;
7486                 orphan->disk_len = rec->cache.size;
7487                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7488                 recorded_data_ref = 1;
7489         }
7490 out:
7491         btrfs_free_path(path);
7492         if (!ret)
7493                 return !recorded_data_ref;
7494         else
7495                 return ret;
7496 }
7497
7498 /*
7499  * when an incorrect extent item is found, this will delete
7500  * all of the existing entries for it and recreate them
7501  * based on what the tree scan found.
7502  */
7503 static int fixup_extent_refs(struct btrfs_fs_info *info,
7504                              struct cache_tree *extent_cache,
7505                              struct extent_record *rec)
7506 {
7507         struct btrfs_trans_handle *trans = NULL;
7508         int ret;
7509         struct btrfs_path *path;
7510         struct cache_extent *cache;
7511         struct extent_backref *back, *tmp;
7512         int allocated = 0;
7513         u64 flags = 0;
7514
7515         if (rec->flag_block_full_backref)
7516                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7517
7518         path = btrfs_alloc_path();
7519         if (!path)
7520                 return -ENOMEM;
7521
7522         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7523                 /*
7524                  * Sometimes the backrefs themselves are so broken they don't
7525                  * get attached to any meaningful rec, so first go back and
7526                  * check any of our backrefs that we couldn't find and throw
7527                  * them into the list if we find the backref so that
7528                  * verify_backrefs can figure out what to do.
7529                  */
7530                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7531                 if (ret < 0)
7532                         goto out;
7533         }
7534
7535         /* step one, make sure all of the backrefs agree */
7536         ret = verify_backrefs(info, path, rec);
7537         if (ret < 0)
7538                 goto out;
7539
7540         trans = btrfs_start_transaction(info->extent_root, 1);
7541         if (IS_ERR(trans)) {
7542                 ret = PTR_ERR(trans);
7543                 goto out;
7544         }
7545
7546         /* step two, delete all the existing records */
7547         ret = delete_extent_records(trans, info->extent_root, path,
7548                                     rec->start, rec->max_size);
7549
7550         if (ret < 0)
7551                 goto out;
7552
7553         /* was this block corrupt?  If so, don't add references to it */
7554         cache = lookup_cache_extent(info->corrupt_blocks,
7555                                     rec->start, rec->max_size);
7556         if (cache) {
7557                 ret = 0;
7558                 goto out;
7559         }
7560
7561         /* step three, recreate all the refs we did find */
7562         rbtree_postorder_for_each_entry_safe(back, tmp,
7563                                              &rec->backref_tree, node) {
7564                 /*
7565                  * if we didn't find any references, don't create a
7566                  * new extent record
7567                  */
7568                 if (!back->found_ref)
7569                         continue;
7570
7571                 rec->bad_full_backref = 0;
7572                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7573                 allocated = 1;
7574
7575                 if (ret)
7576                         goto out;
7577         }
7578 out:
7579         if (trans) {
7580                 int err = btrfs_commit_transaction(trans, info->extent_root);
7581                 if (!ret)
7582                         ret = err;
7583         }
7584
7585         btrfs_free_path(path);
7586         return ret;
7587 }
7588
7589 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7590                               struct extent_record *rec)
7591 {
7592         struct btrfs_trans_handle *trans;
7593         struct btrfs_root *root = fs_info->extent_root;
7594         struct btrfs_path *path;
7595         struct btrfs_extent_item *ei;
7596         struct btrfs_key key;
7597         u64 flags;
7598         int ret = 0;
7599
7600         key.objectid = rec->start;
7601         if (rec->metadata) {
7602                 key.type = BTRFS_METADATA_ITEM_KEY;
7603                 key.offset = rec->info_level;
7604         } else {
7605                 key.type = BTRFS_EXTENT_ITEM_KEY;
7606                 key.offset = rec->max_size;
7607         }
7608
7609         path = btrfs_alloc_path();
7610         if (!path)
7611                 return -ENOMEM;
7612
7613         trans = btrfs_start_transaction(root, 0);
7614         if (IS_ERR(trans)) {
7615                 btrfs_free_path(path);
7616                 return PTR_ERR(trans);
7617         }
7618
7619         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7620         if (ret < 0) {
7621                 btrfs_free_path(path);
7622                 btrfs_commit_transaction(trans, root);
7623                 return ret;
7624         } else if (ret) {
7625                 fprintf(stderr, "Didn't find extent for %llu\n",
7626                         (unsigned long long)rec->start);
7627                 btrfs_free_path(path);
7628                 btrfs_commit_transaction(trans, root);
7629                 return -ENOENT;
7630         }
7631
7632         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7633                             struct btrfs_extent_item);
7634         flags = btrfs_extent_flags(path->nodes[0], ei);
7635         if (rec->flag_block_full_backref) {
7636                 fprintf(stderr, "setting full backref on %llu\n",
7637                         (unsigned long long)key.objectid);
7638                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7639         } else {
7640                 fprintf(stderr, "clearing full backref on %llu\n",
7641                         (unsigned long long)key.objectid);
7642                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7643         }
7644         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7645         btrfs_mark_buffer_dirty(path->nodes[0]);
7646         btrfs_free_path(path);
7647         return btrfs_commit_transaction(trans, root);
7648 }
7649
7650 /* right now we only prune from the extent allocation tree */
7651 static int prune_one_block(struct btrfs_trans_handle *trans,
7652                            struct btrfs_fs_info *info,
7653                            struct btrfs_corrupt_block *corrupt)
7654 {
7655         int ret;
7656         struct btrfs_path path;
7657         struct extent_buffer *eb;
7658         u64 found;
7659         int slot;
7660         int nritems;
7661         int level = corrupt->level + 1;
7662
7663         btrfs_init_path(&path);
7664 again:
7665         /* we want to stop at the parent to our busted block */
7666         path.lowest_level = level;
7667
7668         ret = btrfs_search_slot(trans, info->extent_root,
7669                                 &corrupt->key, &path, -1, 1);
7670
7671         if (ret < 0)
7672                 goto out;
7673
7674         eb = path.nodes[level];
7675         if (!eb) {
7676                 ret = -ENOENT;
7677                 goto out;
7678         }
7679
7680         /*
7681          * hopefully the search gave us the block we want to prune,
7682          * lets try that first
7683          */
7684         slot = path.slots[level];
7685         found =  btrfs_node_blockptr(eb, slot);
7686         if (found == corrupt->cache.start)
7687                 goto del_ptr;
7688
7689         nritems = btrfs_header_nritems(eb);
7690
7691         /* the search failed, lets scan this node and hope we find it */
7692         for (slot = 0; slot < nritems; slot++) {
7693                 found =  btrfs_node_blockptr(eb, slot);
7694                 if (found == corrupt->cache.start)
7695                         goto del_ptr;
7696         }
7697         /*
7698          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7699          * to this block
7700          */
7701         if (eb == info->extent_root->node) {
7702                 ret = -ENOENT;
7703                 goto out;
7704         } else {
7705                 level++;
7706                 btrfs_release_path(&path);
7707                 goto again;
7708         }
7709
7710 del_ptr:
7711         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7712         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7713
7714 out:
7715         btrfs_release_path(&path);
7716         return ret;
7717 }
7718
7719 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7720 {
7721         struct btrfs_trans_handle *trans = NULL;
7722         struct cache_extent *cache;
7723         struct btrfs_corrupt_block *corrupt;
7724
7725         while (1) {
7726                 cache = search_cache_extent(info->corrupt_blocks, 0);
7727                 if (!cache)
7728                         break;
7729                 if (!trans) {
7730                         trans = btrfs_start_transaction(info->extent_root, 1);
7731                         if (IS_ERR(trans))
7732                                 return PTR_ERR(trans);
7733                 }
7734                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7735                 prune_one_block(trans, info, corrupt);
7736                 remove_cache_extent(info->corrupt_blocks, cache);
7737         }
7738         if (trans)
7739                 return btrfs_commit_transaction(trans, info->extent_root);
7740         return 0;
7741 }
7742
7743 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7744 {
7745         struct btrfs_block_group_cache *cache;
7746         u64 start, end;
7747         int ret;
7748
7749         while (1) {
7750                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7751                                             &start, &end, EXTENT_DIRTY);
7752                 if (ret)
7753                         break;
7754                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7755                                    GFP_NOFS);
7756         }
7757
7758         start = 0;
7759         while (1) {
7760                 cache = btrfs_lookup_first_block_group(fs_info, start);
7761                 if (!cache)
7762                         break;
7763                 if (cache->cached)
7764                         cache->cached = 0;
7765                 start = cache->key.objectid + cache->key.offset;
7766         }
7767 }
7768
7769 static int check_extent_refs(struct btrfs_root *root,
7770                              struct cache_tree *extent_cache)
7771 {
7772         struct extent_record *rec;
7773         struct cache_extent *cache;
7774         int err = 0;
7775         int ret = 0;
7776         int fixed = 0;
7777         int had_dups = 0;
7778         int recorded = 0;
7779
7780         if (repair) {
7781                 /*
7782                  * if we're doing a repair, we have to make sure
7783                  * we don't allocate from the problem extents.
7784                  * In the worst case, this will be all the
7785                  * extents in the FS
7786                  */
7787                 cache = search_cache_extent(extent_cache, 0);
7788                 while(cache) {
7789                         rec = container_of(cache, struct extent_record, cache);
7790                         set_extent_dirty(root->fs_info->excluded_extents,
7791                                          rec->start,
7792                                          rec->start + rec->max_size - 1,
7793                                          GFP_NOFS);
7794                         cache = next_cache_extent(cache);
7795                 }
7796
7797                 /* pin down all the corrupted blocks too */
7798                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7799                 while(cache) {
7800                         set_extent_dirty(root->fs_info->excluded_extents,
7801                                          cache->start,
7802                                          cache->start + cache->size - 1,
7803                                          GFP_NOFS);
7804                         cache = next_cache_extent(cache);
7805                 }
7806                 prune_corrupt_blocks(root->fs_info);
7807                 reset_cached_block_groups(root->fs_info);
7808         }
7809
7810         reset_cached_block_groups(root->fs_info);
7811
7812         /*
7813          * We need to delete any duplicate entries we find first otherwise we
7814          * could mess up the extent tree when we have backrefs that actually
7815          * belong to a different extent item and not the weird duplicate one.
7816          */
7817         while (repair && !list_empty(&duplicate_extents)) {
7818                 rec = to_extent_record(duplicate_extents.next);
7819                 list_del_init(&rec->list);
7820
7821                 /* Sometimes we can find a backref before we find an actual
7822                  * extent, so we need to process it a little bit to see if there
7823                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7824                  * if this is a backref screwup.  If we need to delete stuff
7825                  * process_duplicates() will return 0, otherwise it will return
7826                  * 1 and we
7827                  */
7828                 if (process_duplicates(root, extent_cache, rec))
7829                         continue;
7830                 ret = delete_duplicate_records(root, rec);
7831                 if (ret < 0)
7832                         return ret;
7833                 /*
7834                  * delete_duplicate_records will return the number of entries
7835                  * deleted, so if it's greater than 0 then we know we actually
7836                  * did something and we need to remove.
7837                  */
7838                 if (ret)
7839                         had_dups = 1;
7840         }
7841
7842         if (had_dups)
7843                 return -EAGAIN;
7844
7845         while(1) {
7846                 int cur_err = 0;
7847
7848                 fixed = 0;
7849                 recorded = 0;
7850                 cache = search_cache_extent(extent_cache, 0);
7851                 if (!cache)
7852                         break;
7853                 rec = container_of(cache, struct extent_record, cache);
7854                 if (rec->num_duplicates) {
7855                         fprintf(stderr, "extent item %llu has multiple extent "
7856                                 "items\n", (unsigned long long)rec->start);
7857                         err = 1;
7858                         cur_err = 1;
7859                 }
7860
7861                 if (rec->refs != rec->extent_item_refs) {
7862                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7863                                 (unsigned long long)rec->start,
7864                                 (unsigned long long)rec->nr);
7865                         fprintf(stderr, "extent item %llu, found %llu\n",
7866                                 (unsigned long long)rec->extent_item_refs,
7867                                 (unsigned long long)rec->refs);
7868                         ret = record_orphan_data_extents(root->fs_info, rec);
7869                         if (ret < 0)
7870                                 goto repair_abort;
7871                         if (ret == 0) {
7872                                 recorded = 1;
7873                         } else {
7874                                 /*
7875                                  * we can't use the extent to repair file
7876                                  * extent, let the fallback method handle it.
7877                                  */
7878                                 if (!fixed && repair) {
7879                                         ret = fixup_extent_refs(
7880                                                         root->fs_info,
7881                                                         extent_cache, rec);
7882                                         if (ret)
7883                                                 goto repair_abort;
7884                                         fixed = 1;
7885                                 }
7886                         }
7887                         err = 1;
7888                         cur_err = 1;
7889                 }
7890                 if (all_backpointers_checked(rec, 1)) {
7891                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7892                                 (unsigned long long)rec->start,
7893                                 (unsigned long long)rec->nr);
7894
7895                         if (!fixed && !recorded && repair) {
7896                                 ret = fixup_extent_refs(root->fs_info,
7897                                                         extent_cache, rec);
7898                                 if (ret)
7899                                         goto repair_abort;
7900                                 fixed = 1;
7901                         }
7902                         cur_err = 1;
7903                         err = 1;
7904                 }
7905                 if (!rec->owner_ref_checked) {
7906                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7907                                 (unsigned long long)rec->start,
7908                                 (unsigned long long)rec->nr);
7909                         if (!fixed && !recorded && repair) {
7910                                 ret = fixup_extent_refs(root->fs_info,
7911                                                         extent_cache, rec);
7912                                 if (ret)
7913                                         goto repair_abort;
7914                                 fixed = 1;
7915                         }
7916                         err = 1;
7917                         cur_err = 1;
7918                 }
7919                 if (rec->bad_full_backref) {
7920                         fprintf(stderr, "bad full backref, on [%llu]\n",
7921                                 (unsigned long long)rec->start);
7922                         if (repair) {
7923                                 ret = fixup_extent_flags(root->fs_info, rec);
7924                                 if (ret)
7925                                         goto repair_abort;
7926                                 fixed = 1;
7927                         }
7928                         err = 1;
7929                         cur_err = 1;
7930                 }
7931                 /*
7932                  * Although it's not a extent ref's problem, we reuse this
7933                  * routine for error reporting.
7934                  * No repair function yet.
7935                  */
7936                 if (rec->crossing_stripes) {
7937                         fprintf(stderr,
7938                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7939                                 rec->start, rec->start + rec->max_size);
7940                         err = 1;
7941                         cur_err = 1;
7942                 }
7943
7944                 if (rec->wrong_chunk_type) {
7945                         fprintf(stderr,
7946                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7947                                 rec->start, rec->start + rec->max_size);
7948                         err = 1;
7949                         cur_err = 1;
7950                 }
7951
7952                 remove_cache_extent(extent_cache, cache);
7953                 free_all_extent_backrefs(rec);
7954                 if (!init_extent_tree && repair && (!cur_err || fixed))
7955                         clear_extent_dirty(root->fs_info->excluded_extents,
7956                                            rec->start,
7957                                            rec->start + rec->max_size - 1,
7958                                            GFP_NOFS);
7959                 free(rec);
7960         }
7961 repair_abort:
7962         if (repair) {
7963                 if (ret && ret != -EAGAIN) {
7964                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7965                         exit(1);
7966                 } else if (!ret) {
7967                         struct btrfs_trans_handle *trans;
7968
7969                         root = root->fs_info->extent_root;
7970                         trans = btrfs_start_transaction(root, 1);
7971                         if (IS_ERR(trans)) {
7972                                 ret = PTR_ERR(trans);
7973                                 goto repair_abort;
7974                         }
7975
7976                         btrfs_fix_block_accounting(trans, root);
7977                         ret = btrfs_commit_transaction(trans, root);
7978                         if (ret)
7979                                 goto repair_abort;
7980                 }
7981                 if (err)
7982                         fprintf(stderr, "repaired damaged extent references\n");
7983                 return ret;
7984         }
7985         return err;
7986 }
7987
7988 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7989 {
7990         u64 stripe_size;
7991
7992         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7993                 stripe_size = length;
7994                 stripe_size /= num_stripes;
7995         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7996                 stripe_size = length * 2;
7997                 stripe_size /= num_stripes;
7998         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7999                 stripe_size = length;
8000                 stripe_size /= (num_stripes - 1);
8001         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8002                 stripe_size = length;
8003                 stripe_size /= (num_stripes - 2);
8004         } else {
8005                 stripe_size = length;
8006         }
8007         return stripe_size;
8008 }
8009
8010 /*
8011  * Check the chunk with its block group/dev list ref:
8012  * Return 0 if all refs seems valid.
8013  * Return 1 if part of refs seems valid, need later check for rebuild ref
8014  * like missing block group and needs to search extent tree to rebuild them.
8015  * Return -1 if essential refs are missing and unable to rebuild.
8016  */
8017 static int check_chunk_refs(struct chunk_record *chunk_rec,
8018                             struct block_group_tree *block_group_cache,
8019                             struct device_extent_tree *dev_extent_cache,
8020                             int silent)
8021 {
8022         struct cache_extent *block_group_item;
8023         struct block_group_record *block_group_rec;
8024         struct cache_extent *dev_extent_item;
8025         struct device_extent_record *dev_extent_rec;
8026         u64 devid;
8027         u64 offset;
8028         u64 length;
8029         int metadump_v2 = 0;
8030         int i;
8031         int ret = 0;
8032
8033         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8034                                                chunk_rec->offset,
8035                                                chunk_rec->length);
8036         if (block_group_item) {
8037                 block_group_rec = container_of(block_group_item,
8038                                                struct block_group_record,
8039                                                cache);
8040                 if (chunk_rec->length != block_group_rec->offset ||
8041                     chunk_rec->offset != block_group_rec->objectid ||
8042                     (!metadump_v2 &&
8043                      chunk_rec->type_flags != block_group_rec->flags)) {
8044                         if (!silent)
8045                                 fprintf(stderr,
8046                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8047                                         chunk_rec->objectid,
8048                                         chunk_rec->type,
8049                                         chunk_rec->offset,
8050                                         chunk_rec->length,
8051                                         chunk_rec->offset,
8052                                         chunk_rec->type_flags,
8053                                         block_group_rec->objectid,
8054                                         block_group_rec->type,
8055                                         block_group_rec->offset,
8056                                         block_group_rec->offset,
8057                                         block_group_rec->objectid,
8058                                         block_group_rec->flags);
8059                         ret = -1;
8060                 } else {
8061                         list_del_init(&block_group_rec->list);
8062                         chunk_rec->bg_rec = block_group_rec;
8063                 }
8064         } else {
8065                 if (!silent)
8066                         fprintf(stderr,
8067                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8068                                 chunk_rec->objectid,
8069                                 chunk_rec->type,
8070                                 chunk_rec->offset,
8071                                 chunk_rec->length,
8072                                 chunk_rec->offset,
8073                                 chunk_rec->type_flags);
8074                 ret = 1;
8075         }
8076
8077         if (metadump_v2)
8078                 return ret;
8079
8080         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8081                                     chunk_rec->num_stripes);
8082         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8083                 devid = chunk_rec->stripes[i].devid;
8084                 offset = chunk_rec->stripes[i].offset;
8085                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8086                                                        devid, offset, length);
8087                 if (dev_extent_item) {
8088                         dev_extent_rec = container_of(dev_extent_item,
8089                                                 struct device_extent_record,
8090                                                 cache);
8091                         if (dev_extent_rec->objectid != devid ||
8092                             dev_extent_rec->offset != offset ||
8093                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8094                             dev_extent_rec->length != length) {
8095                                 if (!silent)
8096                                         fprintf(stderr,
8097                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8098                                                 chunk_rec->objectid,
8099                                                 chunk_rec->type,
8100                                                 chunk_rec->offset,
8101                                                 chunk_rec->stripes[i].devid,
8102                                                 chunk_rec->stripes[i].offset,
8103                                                 dev_extent_rec->objectid,
8104                                                 dev_extent_rec->offset,
8105                                                 dev_extent_rec->length);
8106                                 ret = -1;
8107                         } else {
8108                                 list_move(&dev_extent_rec->chunk_list,
8109                                           &chunk_rec->dextents);
8110                         }
8111                 } else {
8112                         if (!silent)
8113                                 fprintf(stderr,
8114                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8115                                         chunk_rec->objectid,
8116                                         chunk_rec->type,
8117                                         chunk_rec->offset,
8118                                         chunk_rec->stripes[i].devid,
8119                                         chunk_rec->stripes[i].offset);
8120                         ret = -1;
8121                 }
8122         }
8123         return ret;
8124 }
8125
8126 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8127 int check_chunks(struct cache_tree *chunk_cache,
8128                  struct block_group_tree *block_group_cache,
8129                  struct device_extent_tree *dev_extent_cache,
8130                  struct list_head *good, struct list_head *bad,
8131                  struct list_head *rebuild, int silent)
8132 {
8133         struct cache_extent *chunk_item;
8134         struct chunk_record *chunk_rec;
8135         struct block_group_record *bg_rec;
8136         struct device_extent_record *dext_rec;
8137         int err;
8138         int ret = 0;
8139
8140         chunk_item = first_cache_extent(chunk_cache);
8141         while (chunk_item) {
8142                 chunk_rec = container_of(chunk_item, struct chunk_record,
8143                                          cache);
8144                 err = check_chunk_refs(chunk_rec, block_group_cache,
8145                                        dev_extent_cache, silent);
8146                 if (err < 0)
8147                         ret = err;
8148                 if (err == 0 && good)
8149                         list_add_tail(&chunk_rec->list, good);
8150                 if (err > 0 && rebuild)
8151                         list_add_tail(&chunk_rec->list, rebuild);
8152                 if (err < 0 && bad)
8153                         list_add_tail(&chunk_rec->list, bad);
8154                 chunk_item = next_cache_extent(chunk_item);
8155         }
8156
8157         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8158                 if (!silent)
8159                         fprintf(stderr,
8160                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8161                                 bg_rec->objectid,
8162                                 bg_rec->offset,
8163                                 bg_rec->flags);
8164                 if (!ret)
8165                         ret = 1;
8166         }
8167
8168         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8169                             chunk_list) {
8170                 if (!silent)
8171                         fprintf(stderr,
8172                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8173                                 dext_rec->objectid,
8174                                 dext_rec->offset,
8175                                 dext_rec->length);
8176                 if (!ret)
8177                         ret = 1;
8178         }
8179         return ret;
8180 }
8181
8182
8183 static int check_device_used(struct device_record *dev_rec,
8184                              struct device_extent_tree *dext_cache)
8185 {
8186         struct cache_extent *cache;
8187         struct device_extent_record *dev_extent_rec;
8188         u64 total_byte = 0;
8189
8190         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8191         while (cache) {
8192                 dev_extent_rec = container_of(cache,
8193                                               struct device_extent_record,
8194                                               cache);
8195                 if (dev_extent_rec->objectid != dev_rec->devid)
8196                         break;
8197
8198                 list_del_init(&dev_extent_rec->device_list);
8199                 total_byte += dev_extent_rec->length;
8200                 cache = next_cache_extent(cache);
8201         }
8202
8203         if (total_byte != dev_rec->byte_used) {
8204                 fprintf(stderr,
8205                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8206                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8207                         dev_rec->type, dev_rec->offset);
8208                 return -1;
8209         } else {
8210                 return 0;
8211         }
8212 }
8213
8214 /* check btrfs_dev_item -> btrfs_dev_extent */
8215 static int check_devices(struct rb_root *dev_cache,
8216                          struct device_extent_tree *dev_extent_cache)
8217 {
8218         struct rb_node *dev_node;
8219         struct device_record *dev_rec;
8220         struct device_extent_record *dext_rec;
8221         int err;
8222         int ret = 0;
8223
8224         dev_node = rb_first(dev_cache);
8225         while (dev_node) {
8226                 dev_rec = container_of(dev_node, struct device_record, node);
8227                 err = check_device_used(dev_rec, dev_extent_cache);
8228                 if (err)
8229                         ret = err;
8230
8231                 dev_node = rb_next(dev_node);
8232         }
8233         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8234                             device_list) {
8235                 fprintf(stderr,
8236                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8237                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8238                 if (!ret)
8239                         ret = 1;
8240         }
8241         return ret;
8242 }
8243
8244 static int add_root_item_to_list(struct list_head *head,
8245                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8246                                   u8 level, u8 drop_level,
8247                                   int level_size, struct btrfs_key *drop_key)
8248 {
8249
8250         struct root_item_record *ri_rec;
8251         ri_rec = malloc(sizeof(*ri_rec));
8252         if (!ri_rec)
8253                 return -ENOMEM;
8254         ri_rec->bytenr = bytenr;
8255         ri_rec->objectid = objectid;
8256         ri_rec->level = level;
8257         ri_rec->level_size = level_size;
8258         ri_rec->drop_level = drop_level;
8259         ri_rec->last_snapshot = last_snapshot;
8260         if (drop_key)
8261                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8262         list_add_tail(&ri_rec->list, head);
8263
8264         return 0;
8265 }
8266
8267 static void free_root_item_list(struct list_head *list)
8268 {
8269         struct root_item_record *ri_rec;
8270
8271         while (!list_empty(list)) {
8272                 ri_rec = list_first_entry(list, struct root_item_record,
8273                                           list);
8274                 list_del_init(&ri_rec->list);
8275                 free(ri_rec);
8276         }
8277 }
8278
8279 static int deal_root_from_list(struct list_head *list,
8280                                struct btrfs_root *root,
8281                                struct block_info *bits,
8282                                int bits_nr,
8283                                struct cache_tree *pending,
8284                                struct cache_tree *seen,
8285                                struct cache_tree *reada,
8286                                struct cache_tree *nodes,
8287                                struct cache_tree *extent_cache,
8288                                struct cache_tree *chunk_cache,
8289                                struct rb_root *dev_cache,
8290                                struct block_group_tree *block_group_cache,
8291                                struct device_extent_tree *dev_extent_cache)
8292 {
8293         int ret = 0;
8294         u64 last;
8295
8296         while (!list_empty(list)) {
8297                 struct root_item_record *rec;
8298                 struct extent_buffer *buf;
8299                 rec = list_entry(list->next,
8300                                  struct root_item_record, list);
8301                 last = 0;
8302                 buf = read_tree_block(root->fs_info->tree_root,
8303                                       rec->bytenr, rec->level_size, 0);
8304                 if (!extent_buffer_uptodate(buf)) {
8305                         free_extent_buffer(buf);
8306                         ret = -EIO;
8307                         break;
8308                 }
8309                 add_root_to_pending(buf, extent_cache, pending,
8310                                     seen, nodes, rec->objectid);
8311                 /*
8312                  * To rebuild extent tree, we need deal with snapshot
8313                  * one by one, otherwise we deal with node firstly which
8314                  * can maximize readahead.
8315                  */
8316                 while (1) {
8317                         ret = run_next_block(root, bits, bits_nr, &last,
8318                                              pending, seen, reada, nodes,
8319                                              extent_cache, chunk_cache,
8320                                              dev_cache, block_group_cache,
8321                                              dev_extent_cache, rec);
8322                         if (ret != 0)
8323                                 break;
8324                 }
8325                 free_extent_buffer(buf);
8326                 list_del(&rec->list);
8327                 free(rec);
8328                 if (ret < 0)
8329                         break;
8330         }
8331         while (ret >= 0) {
8332                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8333                                      reada, nodes, extent_cache, chunk_cache,
8334                                      dev_cache, block_group_cache,
8335                                      dev_extent_cache, NULL);
8336                 if (ret != 0) {
8337                         if (ret > 0)
8338                                 ret = 0;
8339                         break;
8340                 }
8341         }
8342         return ret;
8343 }
8344
8345 static int check_chunks_and_extents(struct btrfs_root *root)
8346 {
8347         struct rb_root dev_cache;
8348         struct cache_tree chunk_cache;
8349         struct block_group_tree block_group_cache;
8350         struct device_extent_tree dev_extent_cache;
8351         struct cache_tree extent_cache;
8352         struct cache_tree seen;
8353         struct cache_tree pending;
8354         struct cache_tree reada;
8355         struct cache_tree nodes;
8356         struct extent_io_tree excluded_extents;
8357         struct cache_tree corrupt_blocks;
8358         struct btrfs_path path;
8359         struct btrfs_key key;
8360         struct btrfs_key found_key;
8361         int ret, err = 0;
8362         struct block_info *bits;
8363         int bits_nr;
8364         struct extent_buffer *leaf;
8365         int slot;
8366         struct btrfs_root_item ri;
8367         struct list_head dropping_trees;
8368         struct list_head normal_trees;
8369         struct btrfs_root *root1;
8370         u64 objectid;
8371         u32 level_size;
8372         u8 level;
8373
8374         dev_cache = RB_ROOT;
8375         cache_tree_init(&chunk_cache);
8376         block_group_tree_init(&block_group_cache);
8377         device_extent_tree_init(&dev_extent_cache);
8378
8379         cache_tree_init(&extent_cache);
8380         cache_tree_init(&seen);
8381         cache_tree_init(&pending);
8382         cache_tree_init(&nodes);
8383         cache_tree_init(&reada);
8384         cache_tree_init(&corrupt_blocks);
8385         extent_io_tree_init(&excluded_extents);
8386         INIT_LIST_HEAD(&dropping_trees);
8387         INIT_LIST_HEAD(&normal_trees);
8388
8389         if (repair) {
8390                 root->fs_info->excluded_extents = &excluded_extents;
8391                 root->fs_info->fsck_extent_cache = &extent_cache;
8392                 root->fs_info->free_extent_hook = free_extent_hook;
8393                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8394         }
8395
8396         bits_nr = 1024;
8397         bits = malloc(bits_nr * sizeof(struct block_info));
8398         if (!bits) {
8399                 perror("malloc");
8400                 exit(1);
8401         }
8402
8403         if (ctx.progress_enabled) {
8404                 ctx.tp = TASK_EXTENTS;
8405                 task_start(ctx.info);
8406         }
8407
8408 again:
8409         root1 = root->fs_info->tree_root;
8410         level = btrfs_header_level(root1->node);
8411         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8412                                     root1->node->start, 0, level, 0,
8413                                     root1->nodesize, NULL);
8414         if (ret < 0)
8415                 goto out;
8416         root1 = root->fs_info->chunk_root;
8417         level = btrfs_header_level(root1->node);
8418         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8419                                     root1->node->start, 0, level, 0,
8420                                     root1->nodesize, NULL);
8421         if (ret < 0)
8422                 goto out;
8423         btrfs_init_path(&path);
8424         key.offset = 0;
8425         key.objectid = 0;
8426         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8427         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8428                                         &key, &path, 0, 0);
8429         if (ret < 0)
8430                 goto out;
8431         while(1) {
8432                 leaf = path.nodes[0];
8433                 slot = path.slots[0];
8434                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8435                         ret = btrfs_next_leaf(root, &path);
8436                         if (ret != 0)
8437                                 break;
8438                         leaf = path.nodes[0];
8439                         slot = path.slots[0];
8440                 }
8441                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8442                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8443                         unsigned long offset;
8444                         u64 last_snapshot;
8445
8446                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8447                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8448                         last_snapshot = btrfs_root_last_snapshot(&ri);
8449                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8450                                 level = btrfs_root_level(&ri);
8451                                 level_size = root->nodesize;
8452                                 ret = add_root_item_to_list(&normal_trees,
8453                                                 found_key.objectid,
8454                                                 btrfs_root_bytenr(&ri),
8455                                                 last_snapshot, level,
8456                                                 0, level_size, NULL);
8457                                 if (ret < 0)
8458                                         goto out;
8459                         } else {
8460                                 level = btrfs_root_level(&ri);
8461                                 level_size = root->nodesize;
8462                                 objectid = found_key.objectid;
8463                                 btrfs_disk_key_to_cpu(&found_key,
8464                                                       &ri.drop_progress);
8465                                 ret = add_root_item_to_list(&dropping_trees,
8466                                                 objectid,
8467                                                 btrfs_root_bytenr(&ri),
8468                                                 last_snapshot, level,
8469                                                 ri.drop_level,
8470                                                 level_size, &found_key);
8471                                 if (ret < 0)
8472                                         goto out;
8473                         }
8474                 }
8475                 path.slots[0]++;
8476         }
8477         btrfs_release_path(&path);
8478
8479         /*
8480          * check_block can return -EAGAIN if it fixes something, please keep
8481          * this in mind when dealing with return values from these functions, if
8482          * we get -EAGAIN we want to fall through and restart the loop.
8483          */
8484         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8485                                   &seen, &reada, &nodes, &extent_cache,
8486                                   &chunk_cache, &dev_cache, &block_group_cache,
8487                                   &dev_extent_cache);
8488         if (ret < 0) {
8489                 if (ret == -EAGAIN)
8490                         goto loop;
8491                 goto out;
8492         }
8493         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8494                                   &pending, &seen, &reada, &nodes,
8495                                   &extent_cache, &chunk_cache, &dev_cache,
8496                                   &block_group_cache, &dev_extent_cache);
8497         if (ret < 0) {
8498                 if (ret == -EAGAIN)
8499                         goto loop;
8500                 goto out;
8501         }
8502
8503         ret = check_chunks(&chunk_cache, &block_group_cache,
8504                            &dev_extent_cache, NULL, NULL, NULL, 0);
8505         if (ret) {
8506                 if (ret == -EAGAIN)
8507                         goto loop;
8508                 err = ret;
8509         }
8510
8511         ret = check_extent_refs(root, &extent_cache);
8512         if (ret < 0) {
8513                 if (ret == -EAGAIN)
8514                         goto loop;
8515                 goto out;
8516         }
8517
8518         ret = check_devices(&dev_cache, &dev_extent_cache);
8519         if (ret && err)
8520                 ret = err;
8521
8522 out:
8523         task_stop(ctx.info);
8524         if (repair) {
8525                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8526                 extent_io_tree_cleanup(&excluded_extents);
8527                 root->fs_info->fsck_extent_cache = NULL;
8528                 root->fs_info->free_extent_hook = NULL;
8529                 root->fs_info->corrupt_blocks = NULL;
8530                 root->fs_info->excluded_extents = NULL;
8531         }
8532         free(bits);
8533         free_chunk_cache_tree(&chunk_cache);
8534         free_device_cache_tree(&dev_cache);
8535         free_block_group_tree(&block_group_cache);
8536         free_device_extent_tree(&dev_extent_cache);
8537         free_extent_cache_tree(&seen);
8538         free_extent_cache_tree(&pending);
8539         free_extent_cache_tree(&reada);
8540         free_extent_cache_tree(&nodes);
8541         return ret;
8542 loop:
8543         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8544         free_extent_cache_tree(&seen);
8545         free_extent_cache_tree(&pending);
8546         free_extent_cache_tree(&reada);
8547         free_extent_cache_tree(&nodes);
8548         free_chunk_cache_tree(&chunk_cache);
8549         free_block_group_tree(&block_group_cache);
8550         free_device_cache_tree(&dev_cache);
8551         free_device_extent_tree(&dev_extent_cache);
8552         free_extent_record_cache(root->fs_info, &extent_cache);
8553         free_root_item_list(&normal_trees);
8554         free_root_item_list(&dropping_trees);
8555         extent_io_tree_cleanup(&excluded_extents);
8556         goto again;
8557 }
8558
8559 /*
8560  * Check backrefs of a tree block given by @bytenr or @eb.
8561  *
8562  * @root:       the root containing the @bytenr or @eb
8563  * @eb:         tree block extent buffer, can be NULL
8564  * @bytenr:     bytenr of the tree block to search
8565  * @level:      tree level of the tree block
8566  * @owner:      owner of the tree block
8567  *
8568  * Return >0 for any error found and output error message
8569  * Return 0 for no error found
8570  */
8571 static int check_tree_block_ref(struct btrfs_root *root,
8572                                 struct extent_buffer *eb, u64 bytenr,
8573                                 int level, u64 owner)
8574 {
8575         struct btrfs_key key;
8576         struct btrfs_root *extent_root = root->fs_info->extent_root;
8577         struct btrfs_path path;
8578         struct btrfs_extent_item *ei;
8579         struct btrfs_extent_inline_ref *iref;
8580         struct extent_buffer *leaf;
8581         unsigned long end;
8582         unsigned long ptr;
8583         int slot;
8584         int skinny_level;
8585         int type;
8586         u32 nodesize = root->nodesize;
8587         u32 item_size;
8588         u64 offset;
8589         int found_ref = 0;
8590         int err = 0;
8591         int ret;
8592
8593         btrfs_init_path(&path);
8594         key.objectid = bytenr;
8595         if (btrfs_fs_incompat(root->fs_info,
8596                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8597                 key.type = BTRFS_METADATA_ITEM_KEY;
8598         else
8599                 key.type = BTRFS_EXTENT_ITEM_KEY;
8600         key.offset = (u64)-1;
8601
8602         /* Search for the backref in extent tree */
8603         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8604         if (ret < 0) {
8605                 err |= BACKREF_MISSING;
8606                 goto out;
8607         }
8608         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8609         if (ret) {
8610                 err |= BACKREF_MISSING;
8611                 goto out;
8612         }
8613
8614         leaf = path.nodes[0];
8615         slot = path.slots[0];
8616         btrfs_item_key_to_cpu(leaf, &key, slot);
8617
8618         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8619
8620         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8621                 skinny_level = (int)key.offset;
8622                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8623         } else {
8624                 struct btrfs_tree_block_info *info;
8625
8626                 info = (struct btrfs_tree_block_info *)(ei + 1);
8627                 skinny_level = btrfs_tree_block_level(leaf, info);
8628                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8629         }
8630
8631         if (eb) {
8632                 u64 header_gen;
8633                 u64 extent_gen;
8634
8635                 if (!(btrfs_extent_flags(leaf, ei) &
8636                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8637                         error(
8638                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8639                                 key.objectid, nodesize,
8640                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8641                         err = BACKREF_MISMATCH;
8642                 }
8643                 header_gen = btrfs_header_generation(eb);
8644                 extent_gen = btrfs_extent_generation(leaf, ei);
8645                 if (header_gen != extent_gen) {
8646                         error(
8647         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8648                                 key.objectid, nodesize, header_gen,
8649                                 extent_gen);
8650                         err = BACKREF_MISMATCH;
8651                 }
8652                 if (level != skinny_level) {
8653                         error(
8654                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8655                                 key.objectid, nodesize, level, skinny_level);
8656                         err = BACKREF_MISMATCH;
8657                 }
8658                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8659                         error(
8660                         "extent[%llu %u] is referred by other roots than %llu",
8661                                 key.objectid, nodesize, root->objectid);
8662                         err = BACKREF_MISMATCH;
8663                 }
8664         }
8665
8666         /*
8667          * Iterate the extent/metadata item to find the exact backref
8668          */
8669         item_size = btrfs_item_size_nr(leaf, slot);
8670         ptr = (unsigned long)iref;
8671         end = (unsigned long)ei + item_size;
8672         while (ptr < end) {
8673                 iref = (struct btrfs_extent_inline_ref *)ptr;
8674                 type = btrfs_extent_inline_ref_type(leaf, iref);
8675                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8676
8677                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8678                         (offset == root->objectid || offset == owner)) {
8679                         found_ref = 1;
8680                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8681                         /* Check if the backref points to valid referencer */
8682                         found_ref = !check_tree_block_ref(root, NULL, offset,
8683                                                           level + 1, owner);
8684                 }
8685
8686                 if (found_ref)
8687                         break;
8688                 ptr += btrfs_extent_inline_ref_size(type);
8689         }
8690
8691         /*
8692          * Inlined extent item doesn't have what we need, check
8693          * TREE_BLOCK_REF_KEY
8694          */
8695         if (!found_ref) {
8696                 btrfs_release_path(&path);
8697                 key.objectid = bytenr;
8698                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8699                 key.offset = root->objectid;
8700
8701                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8702                 if (!ret)
8703                         found_ref = 1;
8704         }
8705         if (!found_ref)
8706                 err |= BACKREF_MISSING;
8707 out:
8708         btrfs_release_path(&path);
8709         if (eb && (err & BACKREF_MISSING))
8710                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8711                         bytenr, nodesize, owner, level);
8712         return err;
8713 }
8714
8715 /*
8716  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8717  *
8718  * Return >0 any error found and output error message
8719  * Return 0 for no error found
8720  */
8721 static int check_extent_data_item(struct btrfs_root *root,
8722                                   struct extent_buffer *eb, int slot)
8723 {
8724         struct btrfs_file_extent_item *fi;
8725         struct btrfs_path path;
8726         struct btrfs_root *extent_root = root->fs_info->extent_root;
8727         struct btrfs_key fi_key;
8728         struct btrfs_key dbref_key;
8729         struct extent_buffer *leaf;
8730         struct btrfs_extent_item *ei;
8731         struct btrfs_extent_inline_ref *iref;
8732         struct btrfs_extent_data_ref *dref;
8733         u64 owner;
8734         u64 file_extent_gen;
8735         u64 disk_bytenr;
8736         u64 disk_num_bytes;
8737         u64 extent_num_bytes;
8738         u64 extent_flags;
8739         u64 extent_gen;
8740         u32 item_size;
8741         unsigned long end;
8742         unsigned long ptr;
8743         int type;
8744         u64 ref_root;
8745         int found_dbackref = 0;
8746         int err = 0;
8747         int ret;
8748
8749         btrfs_item_key_to_cpu(eb, &fi_key, slot);
8750         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8751         file_extent_gen = btrfs_file_extent_generation(eb, fi);
8752
8753         /* Nothing to check for hole and inline data extents */
8754         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8755             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8756                 return 0;
8757
8758         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8759         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8760         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8761
8762         /* Check unaligned disk_num_bytes and num_bytes */
8763         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8764                 error(
8765 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8766                         fi_key.objectid, fi_key.offset, disk_num_bytes,
8767                         root->sectorsize);
8768                 err |= BYTES_UNALIGNED;
8769         } else {
8770                 data_bytes_allocated += disk_num_bytes;
8771         }
8772         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8773                 error(
8774 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8775                         fi_key.objectid, fi_key.offset, extent_num_bytes,
8776                         root->sectorsize);
8777                 err |= BYTES_UNALIGNED;
8778         } else {
8779                 data_bytes_referenced += extent_num_bytes;
8780         }
8781         owner = btrfs_header_owner(eb);
8782
8783         /* Check the extent item of the file extent in extent tree */
8784         btrfs_init_path(&path);
8785         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8786         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8787         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8788
8789         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8790         if (ret) {
8791                 err |= BACKREF_MISSING;
8792                 goto error;
8793         }
8794
8795         leaf = path.nodes[0];
8796         slot = path.slots[0];
8797         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8798
8799         extent_flags = btrfs_extent_flags(leaf, ei);
8800         extent_gen = btrfs_extent_generation(leaf, ei);
8801
8802         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8803                 error(
8804                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8805                     disk_bytenr, disk_num_bytes,
8806                     BTRFS_EXTENT_FLAG_DATA);
8807                 err |= BACKREF_MISMATCH;
8808         }
8809
8810         if (file_extent_gen < extent_gen) {
8811                 error(
8812 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8813                         disk_bytenr, disk_num_bytes, file_extent_gen,
8814                         extent_gen);
8815                 err |= BACKREF_MISMATCH;
8816         }
8817
8818         /* Check data backref inside that extent item */
8819         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8820         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8821         ptr = (unsigned long)iref;
8822         end = (unsigned long)ei + item_size;
8823         while (ptr < end) {
8824                 iref = (struct btrfs_extent_inline_ref *)ptr;
8825                 type = btrfs_extent_inline_ref_type(leaf, iref);
8826                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8827
8828                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8829                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
8830                         if (ref_root == owner || ref_root == root->objectid)
8831                                 found_dbackref = 1;
8832                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8833                         found_dbackref = !check_tree_block_ref(root, NULL,
8834                                 btrfs_extent_inline_ref_offset(leaf, iref),
8835                                 0, owner);
8836                 }
8837
8838                 if (found_dbackref)
8839                         break;
8840                 ptr += btrfs_extent_inline_ref_size(type);
8841         }
8842
8843         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8844         if (!found_dbackref) {
8845                 btrfs_release_path(&path);
8846
8847                 btrfs_init_path(&path);
8848                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8849                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8850                 dbref_key.offset = hash_extent_data_ref(root->objectid,
8851                                 fi_key.objectid, fi_key.offset);
8852
8853                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8854                                         &dbref_key, &path, 0, 0);
8855                 if (!ret)
8856                         found_dbackref = 1;
8857         }
8858
8859         if (!found_dbackref)
8860                 err |= BACKREF_MISSING;
8861 error:
8862         btrfs_release_path(&path);
8863         if (err & BACKREF_MISSING) {
8864                 error("data extent[%llu %llu] backref lost",
8865                       disk_bytenr, disk_num_bytes);
8866         }
8867         return err;
8868 }
8869
8870 /*
8871  * Get real tree block level for the case like shared block
8872  * Return >= 0 as tree level
8873  * Return <0 for error
8874  */
8875 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8876 {
8877         struct extent_buffer *eb;
8878         struct btrfs_path path;
8879         struct btrfs_key key;
8880         struct btrfs_extent_item *ei;
8881         u64 flags;
8882         u64 transid;
8883         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8884         u8 backref_level;
8885         u8 header_level;
8886         int ret;
8887
8888         /* Search extent tree for extent generation and level */
8889         key.objectid = bytenr;
8890         key.type = BTRFS_METADATA_ITEM_KEY;
8891         key.offset = (u64)-1;
8892
8893         btrfs_init_path(&path);
8894         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8895         if (ret < 0)
8896                 goto release_out;
8897         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8898         if (ret < 0)
8899                 goto release_out;
8900         if (ret > 0) {
8901                 ret = -ENOENT;
8902                 goto release_out;
8903         }
8904
8905         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8906         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8907                             struct btrfs_extent_item);
8908         flags = btrfs_extent_flags(path.nodes[0], ei);
8909         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8910                 ret = -ENOENT;
8911                 goto release_out;
8912         }
8913
8914         /* Get transid for later read_tree_block() check */
8915         transid = btrfs_extent_generation(path.nodes[0], ei);
8916
8917         /* Get backref level as one source */
8918         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8919                 backref_level = key.offset;
8920         } else {
8921                 struct btrfs_tree_block_info *info;
8922
8923                 info = (struct btrfs_tree_block_info *)(ei + 1);
8924                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
8925         }
8926         btrfs_release_path(&path);
8927
8928         /* Get level from tree block as an alternative source */
8929         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
8930         if (!extent_buffer_uptodate(eb)) {
8931                 free_extent_buffer(eb);
8932                 return -EIO;
8933         }
8934         header_level = btrfs_header_level(eb);
8935         free_extent_buffer(eb);
8936
8937         if (header_level != backref_level)
8938                 return -EIO;
8939         return header_level;
8940
8941 release_out:
8942         btrfs_release_path(&path);
8943         return ret;
8944 }
8945
8946 /*
8947  * Check if a tree block backref is valid (points to a valid tree block)
8948  * if level == -1, level will be resolved
8949  * Return >0 for any error found and print error message
8950  */
8951 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
8952                                     u64 bytenr, int level)
8953 {
8954         struct btrfs_root *root;
8955         struct btrfs_key key;
8956         struct btrfs_path path;
8957         struct extent_buffer *eb;
8958         struct extent_buffer *node;
8959         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8960         int err = 0;
8961         int ret;
8962
8963         /* Query level for level == -1 special case */
8964         if (level == -1)
8965                 level = query_tree_block_level(fs_info, bytenr);
8966         if (level < 0) {
8967                 err |= REFERENCER_MISSING;
8968                 goto out;
8969         }
8970
8971         key.objectid = root_id;
8972         key.type = BTRFS_ROOT_ITEM_KEY;
8973         key.offset = (u64)-1;
8974
8975         root = btrfs_read_fs_root(fs_info, &key);
8976         if (IS_ERR(root)) {
8977                 err |= REFERENCER_MISSING;
8978                 goto out;
8979         }
8980
8981         /* Read out the tree block to get item/node key */
8982         eb = read_tree_block(root, bytenr, root->nodesize, 0);
8983         if (!extent_buffer_uptodate(eb)) {
8984                 err |= REFERENCER_MISSING;
8985                 free_extent_buffer(eb);
8986                 goto out;
8987         }
8988
8989         /* Empty tree, no need to check key */
8990         if (!btrfs_header_nritems(eb) && !level) {
8991                 free_extent_buffer(eb);
8992                 goto out;
8993         }
8994
8995         if (level)
8996                 btrfs_node_key_to_cpu(eb, &key, 0);
8997         else
8998                 btrfs_item_key_to_cpu(eb, &key, 0);
8999
9000         free_extent_buffer(eb);
9001
9002         btrfs_init_path(&path);
9003         /* Search with the first key, to ensure we can reach it */
9004         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9005         if (ret) {
9006                 err |= REFERENCER_MISSING;
9007                 goto release_out;
9008         }
9009
9010         node = path.nodes[level];
9011         if (btrfs_header_bytenr(node) != bytenr) {
9012                 error(
9013         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9014                         bytenr, nodesize, bytenr,
9015                         btrfs_header_bytenr(node));
9016                 err |= REFERENCER_MISMATCH;
9017         }
9018         if (btrfs_header_level(node) != level) {
9019                 error(
9020         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9021                         bytenr, nodesize, level,
9022                         btrfs_header_level(node));
9023                 err |= REFERENCER_MISMATCH;
9024         }
9025
9026 release_out:
9027         btrfs_release_path(&path);
9028 out:
9029         if (err & REFERENCER_MISSING) {
9030                 if (level < 0)
9031                         error("extent [%llu %d] lost referencer (owner: %llu)",
9032                                 bytenr, nodesize, root_id);
9033                 else
9034                         error(
9035                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9036                                 bytenr, nodesize, root_id, level);
9037         }
9038
9039         return err;
9040 }
9041
9042 /*
9043  * Check referencer for shared block backref
9044  * If level == -1, this function will resolve the level.
9045  */
9046 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9047                                      u64 parent, u64 bytenr, int level)
9048 {
9049         struct extent_buffer *eb;
9050         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9051         u32 nr;
9052         int found_parent = 0;
9053         int i;
9054
9055         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9056         if (!extent_buffer_uptodate(eb))
9057                 goto out;
9058
9059         if (level == -1)
9060                 level = query_tree_block_level(fs_info, bytenr);
9061         if (level < 0)
9062                 goto out;
9063
9064         if (level + 1 != btrfs_header_level(eb))
9065                 goto out;
9066
9067         nr = btrfs_header_nritems(eb);
9068         for (i = 0; i < nr; i++) {
9069                 if (bytenr == btrfs_node_blockptr(eb, i)) {
9070                         found_parent = 1;
9071                         break;
9072                 }
9073         }
9074 out:
9075         free_extent_buffer(eb);
9076         if (!found_parent) {
9077                 error(
9078         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9079                         bytenr, nodesize, parent, level);
9080                 return REFERENCER_MISSING;
9081         }
9082         return 0;
9083 }
9084
9085 /*
9086  * Check referencer for normal (inlined) data ref
9087  * If len == 0, it will be resolved by searching in extent tree
9088  */
9089 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9090                                      u64 root_id, u64 objectid, u64 offset,
9091                                      u64 bytenr, u64 len, u32 count)
9092 {
9093         struct btrfs_root *root;
9094         struct btrfs_root *extent_root = fs_info->extent_root;
9095         struct btrfs_key key;
9096         struct btrfs_path path;
9097         struct extent_buffer *leaf;
9098         struct btrfs_file_extent_item *fi;
9099         u32 found_count = 0;
9100         int slot;
9101         int ret = 0;
9102
9103         if (!len) {
9104                 key.objectid = bytenr;
9105                 key.type = BTRFS_EXTENT_ITEM_KEY;
9106                 key.offset = (u64)-1;
9107
9108                 btrfs_init_path(&path);
9109                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9110                 if (ret < 0)
9111                         goto out;
9112                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9113                 if (ret)
9114                         goto out;
9115                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9116                 if (key.objectid != bytenr ||
9117                     key.type != BTRFS_EXTENT_ITEM_KEY)
9118                         goto out;
9119                 len = key.offset;
9120                 btrfs_release_path(&path);
9121         }
9122         key.objectid = root_id;
9123         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
9124         key.offset = (u64)-1;
9125         btrfs_init_path(&path);
9126
9127         root = btrfs_read_fs_root(fs_info, &key);
9128         if (IS_ERR(root))
9129                 goto out;
9130
9131         key.objectid = objectid;
9132         key.type = BTRFS_EXTENT_DATA_KEY;
9133         /*
9134          * It can be nasty as data backref offset is
9135          * file offset - file extent offset, which is smaller or
9136          * equal to original backref offset.  The only special case is
9137          * overflow.  So we need to special check and do further search.
9138          */
9139         key.offset = offset & (1ULL << 63) ? 0 : offset;
9140
9141         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9142         if (ret < 0)
9143                 goto out;
9144
9145         /*
9146          * Search afterwards to get correct one
9147          * NOTE: As we must do a comprehensive check on the data backref to
9148          * make sure the dref count also matches, we must iterate all file
9149          * extents for that inode.
9150          */
9151         while (1) {
9152                 leaf = path.nodes[0];
9153                 slot = path.slots[0];
9154
9155                 btrfs_item_key_to_cpu(leaf, &key, slot);
9156                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9157                         break;
9158                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9159                 /*
9160                  * Except normal disk bytenr and disk num bytes, we still
9161                  * need to do extra check on dbackref offset as
9162                  * dbackref offset = file_offset - file_extent_offset
9163                  */
9164                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9165                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9166                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9167                     offset)
9168                         found_count++;
9169
9170                 ret = btrfs_next_item(root, &path);
9171                 if (ret)
9172                         break;
9173         }
9174 out:
9175         btrfs_release_path(&path);
9176         if (found_count != count) {
9177                 error(
9178 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9179                         bytenr, len, root_id, objectid, offset, count, found_count);
9180                 return REFERENCER_MISSING;
9181         }
9182         return 0;
9183 }
9184
9185 /*
9186  * Check if the referencer of a shared data backref exists
9187  */
9188 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9189                                      u64 parent, u64 bytenr)
9190 {
9191         struct extent_buffer *eb;
9192         struct btrfs_key key;
9193         struct btrfs_file_extent_item *fi;
9194         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9195         u32 nr;
9196         int found_parent = 0;
9197         int i;
9198
9199         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9200         if (!extent_buffer_uptodate(eb))
9201                 goto out;
9202
9203         nr = btrfs_header_nritems(eb);
9204         for (i = 0; i < nr; i++) {
9205                 btrfs_item_key_to_cpu(eb, &key, i);
9206                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9207                         continue;
9208
9209                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9210                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9211                         continue;
9212
9213                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9214                         found_parent = 1;
9215                         break;
9216                 }
9217         }
9218
9219 out:
9220         free_extent_buffer(eb);
9221         if (!found_parent) {
9222                 error("shared extent %llu referencer lost (parent: %llu)",
9223                         bytenr, parent);
9224                 return REFERENCER_MISSING;
9225         }
9226         return 0;
9227 }
9228
9229 /*
9230  * This function will check a given extent item, including its backref and
9231  * itself (like crossing stripe boundary and type)
9232  *
9233  * Since we don't use extent_record anymore, introduce new error bit
9234  */
9235 static int check_extent_item(struct btrfs_fs_info *fs_info,
9236                              struct extent_buffer *eb, int slot)
9237 {
9238         struct btrfs_extent_item *ei;
9239         struct btrfs_extent_inline_ref *iref;
9240         struct btrfs_extent_data_ref *dref;
9241         unsigned long end;
9242         unsigned long ptr;
9243         int type;
9244         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9245         u32 item_size = btrfs_item_size_nr(eb, slot);
9246         u64 flags;
9247         u64 offset;
9248         int metadata = 0;
9249         int level;
9250         struct btrfs_key key;
9251         int ret;
9252         int err = 0;
9253
9254         btrfs_item_key_to_cpu(eb, &key, slot);
9255         if (key.type == BTRFS_EXTENT_ITEM_KEY)
9256                 bytes_used += key.offset;
9257         else
9258                 bytes_used += nodesize;
9259
9260         if (item_size < sizeof(*ei)) {
9261                 /*
9262                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9263                  * old thing when on disk format is still un-determined.
9264                  * No need to care about it anymore
9265                  */
9266                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9267                 return -ENOTTY;
9268         }
9269
9270         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9271         flags = btrfs_extent_flags(eb, ei);
9272
9273         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9274                 metadata = 1;
9275         if (metadata && check_crossing_stripes(key.objectid, eb->len)) {
9276                 error("bad metadata [%llu, %llu) crossing stripe boundary",
9277                       key.objectid, key.objectid + nodesize);
9278                 err |= CROSSING_STRIPE_BOUNDARY;
9279         }
9280
9281         ptr = (unsigned long)(ei + 1);
9282
9283         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9284                 /* Old EXTENT_ITEM metadata */
9285                 struct btrfs_tree_block_info *info;
9286
9287                 info = (struct btrfs_tree_block_info *)ptr;
9288                 level = btrfs_tree_block_level(eb, info);
9289                 ptr += sizeof(struct btrfs_tree_block_info);
9290         } else {
9291                 /* New METADATA_ITEM */
9292                 level = key.offset;
9293         }
9294         end = (unsigned long)ei + item_size;
9295
9296         if (ptr >= end) {
9297                 err |= ITEM_SIZE_MISMATCH;
9298                 goto out;
9299         }
9300
9301         /* Now check every backref in this extent item */
9302 next:
9303         iref = (struct btrfs_extent_inline_ref *)ptr;
9304         type = btrfs_extent_inline_ref_type(eb, iref);
9305         offset = btrfs_extent_inline_ref_offset(eb, iref);
9306         switch (type) {
9307         case BTRFS_TREE_BLOCK_REF_KEY:
9308                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9309                                                level);
9310                 err |= ret;
9311                 break;
9312         case BTRFS_SHARED_BLOCK_REF_KEY:
9313                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9314                                                  level);
9315                 err |= ret;
9316                 break;
9317         case BTRFS_EXTENT_DATA_REF_KEY:
9318                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9319                 ret = check_extent_data_backref(fs_info,
9320                                 btrfs_extent_data_ref_root(eb, dref),
9321                                 btrfs_extent_data_ref_objectid(eb, dref),
9322                                 btrfs_extent_data_ref_offset(eb, dref),
9323                                 key.objectid, key.offset,
9324                                 btrfs_extent_data_ref_count(eb, dref));
9325                 err |= ret;
9326                 break;
9327         case BTRFS_SHARED_DATA_REF_KEY:
9328                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9329                 err |= ret;
9330                 break;
9331         default:
9332                 error("extent[%llu %d %llu] has unknown ref type: %d",
9333                         key.objectid, key.type, key.offset, type);
9334                 err |= UNKNOWN_TYPE;
9335                 goto out;
9336         }
9337
9338         ptr += btrfs_extent_inline_ref_size(type);
9339         if (ptr < end)
9340                 goto next;
9341
9342 out:
9343         return err;
9344 }
9345
9346 /*
9347  * Check if a dev extent item is referred correctly by its chunk
9348  */
9349 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9350                                  struct extent_buffer *eb, int slot)
9351 {
9352         struct btrfs_root *chunk_root = fs_info->chunk_root;
9353         struct btrfs_dev_extent *ptr;
9354         struct btrfs_path path;
9355         struct btrfs_key chunk_key;
9356         struct btrfs_key devext_key;
9357         struct btrfs_chunk *chunk;
9358         struct extent_buffer *l;
9359         int num_stripes;
9360         u64 length;
9361         int i;
9362         int found_chunk = 0;
9363         int ret;
9364
9365         btrfs_item_key_to_cpu(eb, &devext_key, slot);
9366         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9367         length = btrfs_dev_extent_length(eb, ptr);
9368
9369         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9370         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9371         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9372
9373         btrfs_init_path(&path);
9374         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9375         if (ret)
9376                 goto out;
9377
9378         l = path.nodes[0];
9379         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9380         if (btrfs_chunk_length(l, chunk) != length)
9381                 goto out;
9382
9383         num_stripes = btrfs_chunk_num_stripes(l, chunk);
9384         for (i = 0; i < num_stripes; i++) {
9385                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9386                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9387
9388                 if (devid == devext_key.objectid &&
9389                     offset == devext_key.offset) {
9390                         found_chunk = 1;
9391                         break;
9392                 }
9393         }
9394 out:
9395         btrfs_release_path(&path);
9396         if (!found_chunk) {
9397                 error(
9398                 "device extent[%llu, %llu, %llu] did not find the related chunk",
9399                         devext_key.objectid, devext_key.offset, length);
9400                 return REFERENCER_MISSING;
9401         }
9402         return 0;
9403 }
9404
9405 /*
9406  * Check if the used space is correct with the dev item
9407  */
9408 static int check_dev_item(struct btrfs_fs_info *fs_info,
9409                           struct extent_buffer *eb, int slot)
9410 {
9411         struct btrfs_root *dev_root = fs_info->dev_root;
9412         struct btrfs_dev_item *dev_item;
9413         struct btrfs_path path;
9414         struct btrfs_key key;
9415         struct btrfs_dev_extent *ptr;
9416         u64 dev_id;
9417         u64 used;
9418         u64 total = 0;
9419         int ret;
9420
9421         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9422         dev_id = btrfs_device_id(eb, dev_item);
9423         used = btrfs_device_bytes_used(eb, dev_item);
9424
9425         key.objectid = dev_id;
9426         key.type = BTRFS_DEV_EXTENT_KEY;
9427         key.offset = 0;
9428
9429         btrfs_init_path(&path);
9430         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9431         if (ret < 0) {
9432                 btrfs_item_key_to_cpu(eb, &key, slot);
9433                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9434                         key.objectid, key.type, key.offset);
9435                 btrfs_release_path(&path);
9436                 return REFERENCER_MISSING;
9437         }
9438
9439         /* Iterate dev_extents to calculate the used space of a device */
9440         while (1) {
9441                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9442
9443                 if (key.objectid > dev_id)
9444                         break;
9445                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9446                         goto next;
9447
9448                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9449                                      struct btrfs_dev_extent);
9450                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9451 next:
9452                 ret = btrfs_next_item(dev_root, &path);
9453                 if (ret)
9454                         break;
9455         }
9456         btrfs_release_path(&path);
9457
9458         if (used != total) {
9459                 btrfs_item_key_to_cpu(eb, &key, slot);
9460                 error(
9461 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9462                         total, used, BTRFS_ROOT_TREE_OBJECTID,
9463                         BTRFS_DEV_EXTENT_KEY, dev_id);
9464                 return ACCOUNTING_MISMATCH;
9465         }
9466         return 0;
9467 }
9468
9469 /*
9470  * Check a block group item with its referener (chunk) and its used space
9471  * with extent/metadata item
9472  */
9473 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9474                                   struct extent_buffer *eb, int slot)
9475 {
9476         struct btrfs_root *extent_root = fs_info->extent_root;
9477         struct btrfs_root *chunk_root = fs_info->chunk_root;
9478         struct btrfs_block_group_item *bi;
9479         struct btrfs_block_group_item bg_item;
9480         struct btrfs_path path;
9481         struct btrfs_key bg_key;
9482         struct btrfs_key chunk_key;
9483         struct btrfs_key extent_key;
9484         struct btrfs_chunk *chunk;
9485         struct extent_buffer *leaf;
9486         struct btrfs_extent_item *ei;
9487         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9488         u64 flags;
9489         u64 bg_flags;
9490         u64 used;
9491         u64 total = 0;
9492         int ret;
9493         int err = 0;
9494
9495         btrfs_item_key_to_cpu(eb, &bg_key, slot);
9496         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9497         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9498         used = btrfs_block_group_used(&bg_item);
9499         bg_flags = btrfs_block_group_flags(&bg_item);
9500
9501         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9502         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9503         chunk_key.offset = bg_key.objectid;
9504
9505         btrfs_init_path(&path);
9506         /* Search for the referencer chunk */
9507         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9508         if (ret) {
9509                 error(
9510                 "block group[%llu %llu] did not find the related chunk item",
9511                         bg_key.objectid, bg_key.offset);
9512                 err |= REFERENCER_MISSING;
9513         } else {
9514                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9515                                         struct btrfs_chunk);
9516                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9517                                                 bg_key.offset) {
9518                         error(
9519         "block group[%llu %llu] related chunk item length does not match",
9520                                 bg_key.objectid, bg_key.offset);
9521                         err |= REFERENCER_MISMATCH;
9522                 }
9523         }
9524         btrfs_release_path(&path);
9525
9526         /* Search from the block group bytenr */
9527         extent_key.objectid = bg_key.objectid;
9528         extent_key.type = 0;
9529         extent_key.offset = 0;
9530
9531         btrfs_init_path(&path);
9532         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9533         if (ret < 0)
9534                 goto out;
9535
9536         /* Iterate extent tree to account used space */
9537         while (1) {
9538                 leaf = path.nodes[0];
9539                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9540                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9541                         break;
9542
9543                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9544                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9545                         goto next;
9546                 if (extent_key.objectid < bg_key.objectid)
9547                         goto next;
9548
9549                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9550                         total += nodesize;
9551                 else
9552                         total += extent_key.offset;
9553
9554                 ei = btrfs_item_ptr(leaf, path.slots[0],
9555                                     struct btrfs_extent_item);
9556                 flags = btrfs_extent_flags(leaf, ei);
9557                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9558                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9559                                 error(
9560                         "bad extent[%llu, %llu) type mismatch with chunk",
9561                                         extent_key.objectid,
9562                                         extent_key.objectid + extent_key.offset);
9563                                 err |= CHUNK_TYPE_MISMATCH;
9564                         }
9565                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9566                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9567                                     BTRFS_BLOCK_GROUP_METADATA))) {
9568                                 error(
9569                         "bad extent[%llu, %llu) type mismatch with chunk",
9570                                         extent_key.objectid,
9571                                         extent_key.objectid + nodesize);
9572                                 err |= CHUNK_TYPE_MISMATCH;
9573                         }
9574                 }
9575 next:
9576                 ret = btrfs_next_item(extent_root, &path);
9577                 if (ret)
9578                         break;
9579         }
9580
9581 out:
9582         btrfs_release_path(&path);
9583
9584         if (total != used) {
9585                 error(
9586                 "block group[%llu %llu] used %llu but extent items used %llu",
9587                         bg_key.objectid, bg_key.offset, used, total);
9588                 err |= ACCOUNTING_MISMATCH;
9589         }
9590         return err;
9591 }
9592
9593 /*
9594  * Check a chunk item.
9595  * Including checking all referred dev_extents and block group
9596  */
9597 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9598                             struct extent_buffer *eb, int slot)
9599 {
9600         struct btrfs_root *extent_root = fs_info->extent_root;
9601         struct btrfs_root *dev_root = fs_info->dev_root;
9602         struct btrfs_path path;
9603         struct btrfs_key chunk_key;
9604         struct btrfs_key bg_key;
9605         struct btrfs_key devext_key;
9606         struct btrfs_chunk *chunk;
9607         struct extent_buffer *leaf;
9608         struct btrfs_block_group_item *bi;
9609         struct btrfs_block_group_item bg_item;
9610         struct btrfs_dev_extent *ptr;
9611         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9612         u64 length;
9613         u64 chunk_end;
9614         u64 type;
9615         u64 profile;
9616         int num_stripes;
9617         u64 offset;
9618         u64 objectid;
9619         int i;
9620         int ret;
9621         int err = 0;
9622
9623         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9624         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9625         length = btrfs_chunk_length(eb, chunk);
9626         chunk_end = chunk_key.offset + length;
9627         if (!IS_ALIGNED(length, sectorsize)) {
9628                 error("chunk[%llu %llu) not aligned to %u",
9629                         chunk_key.offset, chunk_end, sectorsize);
9630                 err |= BYTES_UNALIGNED;
9631                 goto out;
9632         }
9633
9634         type = btrfs_chunk_type(eb, chunk);
9635         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9636         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9637                 error("chunk[%llu %llu) has no chunk type",
9638                         chunk_key.offset, chunk_end);
9639                 err |= UNKNOWN_TYPE;
9640         }
9641         if (profile && (profile & (profile - 1))) {
9642                 error("chunk[%llu %llu) multiple profiles detected: %llx",
9643                         chunk_key.offset, chunk_end, profile);
9644                 err |= UNKNOWN_TYPE;
9645         }
9646
9647         bg_key.objectid = chunk_key.offset;
9648         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9649         bg_key.offset = length;
9650
9651         btrfs_init_path(&path);
9652         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9653         if (ret) {
9654                 error(
9655                 "chunk[%llu %llu) did not find the related block group item",
9656                         chunk_key.offset, chunk_end);
9657                 err |= REFERENCER_MISSING;
9658         } else{
9659                 leaf = path.nodes[0];
9660                 bi = btrfs_item_ptr(leaf, path.slots[0],
9661                                     struct btrfs_block_group_item);
9662                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9663                                    sizeof(bg_item));
9664                 if (btrfs_block_group_flags(&bg_item) != type) {
9665                         error(
9666 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9667                                 chunk_key.offset, chunk_end, type,
9668                                 btrfs_block_group_flags(&bg_item));
9669                         err |= REFERENCER_MISSING;
9670                 }
9671         }
9672
9673         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9674         for (i = 0; i < num_stripes; i++) {
9675                 btrfs_release_path(&path);
9676                 btrfs_init_path(&path);
9677                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9678                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9679                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9680
9681                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9682                                         0, 0);
9683                 if (ret)
9684                         goto not_match_dev;
9685
9686                 leaf = path.nodes[0];
9687                 ptr = btrfs_item_ptr(leaf, path.slots[0],
9688                                      struct btrfs_dev_extent);
9689                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9690                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9691                 if (objectid != chunk_key.objectid ||
9692                     offset != chunk_key.offset ||
9693                     btrfs_dev_extent_length(leaf, ptr) != length)
9694                         goto not_match_dev;
9695                 continue;
9696 not_match_dev:
9697                 err |= BACKREF_MISSING;
9698                 error(
9699                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9700                         chunk_key.objectid, chunk_end, i);
9701                 continue;
9702         }
9703         btrfs_release_path(&path);
9704 out:
9705         return err;
9706 }
9707
9708 /*
9709  * Main entry function to check known items and update related accounting info
9710  */
9711 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9712 {
9713         struct btrfs_fs_info *fs_info = root->fs_info;
9714         struct btrfs_key key;
9715         int slot = 0;
9716         int type;
9717         struct btrfs_extent_data_ref *dref;
9718         int ret;
9719         int err = 0;
9720
9721 next:
9722         btrfs_item_key_to_cpu(eb, &key, slot);
9723         type = btrfs_key_type(&key);
9724
9725         switch (type) {
9726         case BTRFS_EXTENT_DATA_KEY:
9727                 ret = check_extent_data_item(root, eb, slot);
9728                 err |= ret;
9729                 break;
9730         case BTRFS_BLOCK_GROUP_ITEM_KEY:
9731                 ret = check_block_group_item(fs_info, eb, slot);
9732                 err |= ret;
9733                 break;
9734         case BTRFS_DEV_ITEM_KEY:
9735                 ret = check_dev_item(fs_info, eb, slot);
9736                 err |= ret;
9737                 break;
9738         case BTRFS_CHUNK_ITEM_KEY:
9739                 ret = check_chunk_item(fs_info, eb, slot);
9740                 err |= ret;
9741                 break;
9742         case BTRFS_DEV_EXTENT_KEY:
9743                 ret = check_dev_extent_item(fs_info, eb, slot);
9744                 err |= ret;
9745                 break;
9746         case BTRFS_EXTENT_ITEM_KEY:
9747         case BTRFS_METADATA_ITEM_KEY:
9748                 ret = check_extent_item(fs_info, eb, slot);
9749                 err |= ret;
9750                 break;
9751         case BTRFS_EXTENT_CSUM_KEY:
9752                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9753                 break;
9754         case BTRFS_TREE_BLOCK_REF_KEY:
9755                 ret = check_tree_block_backref(fs_info, key.offset,
9756                                                key.objectid, -1);
9757                 err |= ret;
9758                 break;
9759         case BTRFS_EXTENT_DATA_REF_KEY:
9760                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9761                 ret = check_extent_data_backref(fs_info,
9762                                 btrfs_extent_data_ref_root(eb, dref),
9763                                 btrfs_extent_data_ref_objectid(eb, dref),
9764                                 btrfs_extent_data_ref_offset(eb, dref),
9765                                 key.objectid, 0,
9766                                 btrfs_extent_data_ref_count(eb, dref));
9767                 err |= ret;
9768                 break;
9769         case BTRFS_SHARED_BLOCK_REF_KEY:
9770                 ret = check_shared_block_backref(fs_info, key.offset,
9771                                                  key.objectid, -1);
9772                 err |= ret;
9773                 break;
9774         case BTRFS_SHARED_DATA_REF_KEY:
9775                 ret = check_shared_data_backref(fs_info, key.offset,
9776                                                 key.objectid);
9777                 err |= ret;
9778                 break;
9779         default:
9780                 break;
9781         }
9782
9783         if (++slot < btrfs_header_nritems(eb))
9784                 goto next;
9785
9786         return err;
9787 }
9788
9789 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
9790                            struct btrfs_root *root, int overwrite)
9791 {
9792         struct extent_buffer *c;
9793         struct extent_buffer *old = root->node;
9794         int level;
9795         int ret;
9796         struct btrfs_disk_key disk_key = {0,0,0};
9797
9798         level = 0;
9799
9800         if (overwrite) {
9801                 c = old;
9802                 extent_buffer_get(c);
9803                 goto init;
9804         }
9805         c = btrfs_alloc_free_block(trans, root,
9806                                    root->nodesize,
9807                                    root->root_key.objectid,
9808                                    &disk_key, level, 0, 0);
9809         if (IS_ERR(c)) {
9810                 c = old;
9811                 extent_buffer_get(c);
9812                 overwrite = 1;
9813         }
9814 init:
9815         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
9816         btrfs_set_header_level(c, level);
9817         btrfs_set_header_bytenr(c, c->start);
9818         btrfs_set_header_generation(c, trans->transid);
9819         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
9820         btrfs_set_header_owner(c, root->root_key.objectid);
9821
9822         write_extent_buffer(c, root->fs_info->fsid,
9823                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
9824
9825         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
9826                             btrfs_header_chunk_tree_uuid(c),
9827                             BTRFS_UUID_SIZE);
9828
9829         btrfs_mark_buffer_dirty(c);
9830         /*
9831          * this case can happen in the following case:
9832          *
9833          * 1.overwrite previous root.
9834          *
9835          * 2.reinit reloc data root, this is because we skip pin
9836          * down reloc data tree before which means we can allocate
9837          * same block bytenr here.
9838          */
9839         if (old->start == c->start) {
9840                 btrfs_set_root_generation(&root->root_item,
9841                                           trans->transid);
9842                 root->root_item.level = btrfs_header_level(root->node);
9843                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
9844                                         &root->root_key, &root->root_item);
9845                 if (ret) {
9846                         free_extent_buffer(c);
9847                         return ret;
9848                 }
9849         }
9850         free_extent_buffer(old);
9851         root->node = c;
9852         add_root_to_dirty_list(root);
9853         return 0;
9854 }
9855
9856 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
9857                                 struct extent_buffer *eb, int tree_root)
9858 {
9859         struct extent_buffer *tmp;
9860         struct btrfs_root_item *ri;
9861         struct btrfs_key key;
9862         u64 bytenr;
9863         u32 nodesize;
9864         int level = btrfs_header_level(eb);
9865         int nritems;
9866         int ret;
9867         int i;
9868
9869         /*
9870          * If we have pinned this block before, don't pin it again.
9871          * This can not only avoid forever loop with broken filesystem
9872          * but also give us some speedups.
9873          */
9874         if (test_range_bit(&fs_info->pinned_extents, eb->start,
9875                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
9876                 return 0;
9877
9878         btrfs_pin_extent(fs_info, eb->start, eb->len);
9879
9880         nodesize = btrfs_super_nodesize(fs_info->super_copy);
9881         nritems = btrfs_header_nritems(eb);
9882         for (i = 0; i < nritems; i++) {
9883                 if (level == 0) {
9884                         btrfs_item_key_to_cpu(eb, &key, i);
9885                         if (key.type != BTRFS_ROOT_ITEM_KEY)
9886                                 continue;
9887                         /* Skip the extent root and reloc roots */
9888                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
9889                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
9890                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
9891                                 continue;
9892                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
9893                         bytenr = btrfs_disk_root_bytenr(eb, ri);
9894
9895                         /*
9896                          * If at any point we start needing the real root we
9897                          * will have to build a stump root for the root we are
9898                          * in, but for now this doesn't actually use the root so
9899                          * just pass in extent_root.
9900                          */
9901                         tmp = read_tree_block(fs_info->extent_root, bytenr,
9902                                               nodesize, 0);
9903                         if (!extent_buffer_uptodate(tmp)) {
9904                                 fprintf(stderr, "Error reading root block\n");
9905                                 return -EIO;
9906                         }
9907                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
9908                         free_extent_buffer(tmp);
9909                         if (ret)
9910                                 return ret;
9911                 } else {
9912                         bytenr = btrfs_node_blockptr(eb, i);
9913
9914                         /* If we aren't the tree root don't read the block */
9915                         if (level == 1 && !tree_root) {
9916                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
9917                                 continue;
9918                         }
9919
9920                         tmp = read_tree_block(fs_info->extent_root, bytenr,
9921                                               nodesize, 0);
9922                         if (!extent_buffer_uptodate(tmp)) {
9923                                 fprintf(stderr, "Error reading tree block\n");
9924                                 return -EIO;
9925                         }
9926                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
9927                         free_extent_buffer(tmp);
9928                         if (ret)
9929                                 return ret;
9930                 }
9931         }
9932
9933         return 0;
9934 }
9935
9936 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
9937 {
9938         int ret;
9939
9940         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
9941         if (ret)
9942                 return ret;
9943
9944         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
9945 }
9946
9947 static int reset_block_groups(struct btrfs_fs_info *fs_info)
9948 {
9949         struct btrfs_block_group_cache *cache;
9950         struct btrfs_path *path;
9951         struct extent_buffer *leaf;
9952         struct btrfs_chunk *chunk;
9953         struct btrfs_key key;
9954         int ret;
9955         u64 start;
9956
9957         path = btrfs_alloc_path();
9958         if (!path)
9959                 return -ENOMEM;
9960
9961         key.objectid = 0;
9962         key.type = BTRFS_CHUNK_ITEM_KEY;
9963         key.offset = 0;
9964
9965         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
9966         if (ret < 0) {
9967                 btrfs_free_path(path);
9968                 return ret;
9969         }
9970
9971         /*
9972          * We do this in case the block groups were screwed up and had alloc
9973          * bits that aren't actually set on the chunks.  This happens with
9974          * restored images every time and could happen in real life I guess.
9975          */
9976         fs_info->avail_data_alloc_bits = 0;
9977         fs_info->avail_metadata_alloc_bits = 0;
9978         fs_info->avail_system_alloc_bits = 0;
9979
9980         /* First we need to create the in-memory block groups */
9981         while (1) {
9982                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9983                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
9984                         if (ret < 0) {
9985                                 btrfs_free_path(path);
9986                                 return ret;
9987                         }
9988                         if (ret) {
9989                                 ret = 0;
9990                                 break;
9991                         }
9992                 }
9993                 leaf = path->nodes[0];
9994                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9995                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
9996                         path->slots[0]++;
9997                         continue;
9998                 }
9999
10000                 chunk = btrfs_item_ptr(leaf, path->slots[0],
10001                                        struct btrfs_chunk);
10002                 btrfs_add_block_group(fs_info, 0,
10003                                       btrfs_chunk_type(leaf, chunk),
10004                                       key.objectid, key.offset,
10005                                       btrfs_chunk_length(leaf, chunk));
10006                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10007                                  key.offset + btrfs_chunk_length(leaf, chunk),
10008                                  GFP_NOFS);
10009                 path->slots[0]++;
10010         }
10011         start = 0;
10012         while (1) {
10013                 cache = btrfs_lookup_first_block_group(fs_info, start);
10014                 if (!cache)
10015                         break;
10016                 cache->cached = 1;
10017                 start = cache->key.objectid + cache->key.offset;
10018         }
10019
10020         btrfs_free_path(path);
10021         return 0;
10022 }
10023
10024 static int reset_balance(struct btrfs_trans_handle *trans,
10025                          struct btrfs_fs_info *fs_info)
10026 {
10027         struct btrfs_root *root = fs_info->tree_root;
10028         struct btrfs_path *path;
10029         struct extent_buffer *leaf;
10030         struct btrfs_key key;
10031         int del_slot, del_nr = 0;
10032         int ret;
10033         int found = 0;
10034
10035         path = btrfs_alloc_path();
10036         if (!path)
10037                 return -ENOMEM;
10038
10039         key.objectid = BTRFS_BALANCE_OBJECTID;
10040         key.type = BTRFS_BALANCE_ITEM_KEY;
10041         key.offset = 0;
10042
10043         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10044         if (ret) {
10045                 if (ret > 0)
10046                         ret = 0;
10047                 if (!ret)
10048                         goto reinit_data_reloc;
10049                 else
10050                         goto out;
10051         }
10052
10053         ret = btrfs_del_item(trans, root, path);
10054         if (ret)
10055                 goto out;
10056         btrfs_release_path(path);
10057
10058         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10059         key.type = BTRFS_ROOT_ITEM_KEY;
10060         key.offset = 0;
10061
10062         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10063         if (ret < 0)
10064                 goto out;
10065         while (1) {
10066                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10067                         if (!found)
10068                                 break;
10069
10070                         if (del_nr) {
10071                                 ret = btrfs_del_items(trans, root, path,
10072                                                       del_slot, del_nr);
10073                                 del_nr = 0;
10074                                 if (ret)
10075                                         goto out;
10076                         }
10077                         key.offset++;
10078                         btrfs_release_path(path);
10079
10080                         found = 0;
10081                         ret = btrfs_search_slot(trans, root, &key, path,
10082                                                 -1, 1);
10083                         if (ret < 0)
10084                                 goto out;
10085                         continue;
10086                 }
10087                 found = 1;
10088                 leaf = path->nodes[0];
10089                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10090                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10091                         break;
10092                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10093                         path->slots[0]++;
10094                         continue;
10095                 }
10096                 if (!del_nr) {
10097                         del_slot = path->slots[0];
10098                         del_nr = 1;
10099                 } else {
10100                         del_nr++;
10101                 }
10102                 path->slots[0]++;
10103         }
10104
10105         if (del_nr) {
10106                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10107                 if (ret)
10108                         goto out;
10109         }
10110         btrfs_release_path(path);
10111
10112 reinit_data_reloc:
10113         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10114         key.type = BTRFS_ROOT_ITEM_KEY;
10115         key.offset = (u64)-1;
10116         root = btrfs_read_fs_root(fs_info, &key);
10117         if (IS_ERR(root)) {
10118                 fprintf(stderr, "Error reading data reloc tree\n");
10119                 ret = PTR_ERR(root);
10120                 goto out;
10121         }
10122         record_root_in_trans(trans, root);
10123         ret = btrfs_fsck_reinit_root(trans, root, 0);
10124         if (ret)
10125                 goto out;
10126         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10127 out:
10128         btrfs_free_path(path);
10129         return ret;
10130 }
10131
10132 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10133                               struct btrfs_fs_info *fs_info)
10134 {
10135         u64 start = 0;
10136         int ret;
10137
10138         /*
10139          * The only reason we don't do this is because right now we're just
10140          * walking the trees we find and pinning down their bytes, we don't look
10141          * at any of the leaves.  In order to do mixed groups we'd have to check
10142          * the leaves of any fs roots and pin down the bytes for any file
10143          * extents we find.  Not hard but why do it if we don't have to?
10144          */
10145         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10146                 fprintf(stderr, "We don't support re-initing the extent tree "
10147                         "for mixed block groups yet, please notify a btrfs "
10148                         "developer you want to do this so they can add this "
10149                         "functionality.\n");
10150                 return -EINVAL;
10151         }
10152
10153         /*
10154          * first we need to walk all of the trees except the extent tree and pin
10155          * down the bytes that are in use so we don't overwrite any existing
10156          * metadata.
10157          */
10158         ret = pin_metadata_blocks(fs_info);
10159         if (ret) {
10160                 fprintf(stderr, "error pinning down used bytes\n");
10161                 return ret;
10162         }
10163
10164         /*
10165          * Need to drop all the block groups since we're going to recreate all
10166          * of them again.
10167          */
10168         btrfs_free_block_groups(fs_info);
10169         ret = reset_block_groups(fs_info);
10170         if (ret) {
10171                 fprintf(stderr, "error resetting the block groups\n");
10172                 return ret;
10173         }
10174
10175         /* Ok we can allocate now, reinit the extent root */
10176         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10177         if (ret) {
10178                 fprintf(stderr, "extent root initialization failed\n");
10179                 /*
10180                  * When the transaction code is updated we should end the
10181                  * transaction, but for now progs only knows about commit so
10182                  * just return an error.
10183                  */
10184                 return ret;
10185         }
10186
10187         /*
10188          * Now we have all the in-memory block groups setup so we can make
10189          * allocations properly, and the metadata we care about is safe since we
10190          * pinned all of it above.
10191          */
10192         while (1) {
10193                 struct btrfs_block_group_cache *cache;
10194
10195                 cache = btrfs_lookup_first_block_group(fs_info, start);
10196                 if (!cache)
10197                         break;
10198                 start = cache->key.objectid + cache->key.offset;
10199                 ret = btrfs_insert_item(trans, fs_info->extent_root,
10200                                         &cache->key, &cache->item,
10201                                         sizeof(cache->item));
10202                 if (ret) {
10203                         fprintf(stderr, "Error adding block group\n");
10204                         return ret;
10205                 }
10206                 btrfs_extent_post_op(trans, fs_info->extent_root);
10207         }
10208
10209         ret = reset_balance(trans, fs_info);
10210         if (ret)
10211                 fprintf(stderr, "error resetting the pending balance\n");
10212
10213         return ret;
10214 }
10215
10216 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10217 {
10218         struct btrfs_path *path;
10219         struct btrfs_trans_handle *trans;
10220         struct btrfs_key key;
10221         int ret;
10222
10223         printf("Recowing metadata block %llu\n", eb->start);
10224         key.objectid = btrfs_header_owner(eb);
10225         key.type = BTRFS_ROOT_ITEM_KEY;
10226         key.offset = (u64)-1;
10227
10228         root = btrfs_read_fs_root(root->fs_info, &key);
10229         if (IS_ERR(root)) {
10230                 fprintf(stderr, "Couldn't find owner root %llu\n",
10231                         key.objectid);
10232                 return PTR_ERR(root);
10233         }
10234
10235         path = btrfs_alloc_path();
10236         if (!path)
10237                 return -ENOMEM;
10238
10239         trans = btrfs_start_transaction(root, 1);
10240         if (IS_ERR(trans)) {
10241                 btrfs_free_path(path);
10242                 return PTR_ERR(trans);
10243         }
10244
10245         path->lowest_level = btrfs_header_level(eb);
10246         if (path->lowest_level)
10247                 btrfs_node_key_to_cpu(eb, &key, 0);
10248         else
10249                 btrfs_item_key_to_cpu(eb, &key, 0);
10250
10251         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10252         btrfs_commit_transaction(trans, root);
10253         btrfs_free_path(path);
10254         return ret;
10255 }
10256
10257 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10258 {
10259         struct btrfs_path *path;
10260         struct btrfs_trans_handle *trans;
10261         struct btrfs_key key;
10262         int ret;
10263
10264         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10265                bad->key.type, bad->key.offset);
10266         key.objectid = bad->root_id;
10267         key.type = BTRFS_ROOT_ITEM_KEY;
10268         key.offset = (u64)-1;
10269
10270         root = btrfs_read_fs_root(root->fs_info, &key);
10271         if (IS_ERR(root)) {
10272                 fprintf(stderr, "Couldn't find owner root %llu\n",
10273                         key.objectid);
10274                 return PTR_ERR(root);
10275         }
10276
10277         path = btrfs_alloc_path();
10278         if (!path)
10279                 return -ENOMEM;
10280
10281         trans = btrfs_start_transaction(root, 1);
10282         if (IS_ERR(trans)) {
10283                 btrfs_free_path(path);
10284                 return PTR_ERR(trans);
10285         }
10286
10287         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10288         if (ret) {
10289                 if (ret > 0)
10290                         ret = 0;
10291                 goto out;
10292         }
10293         ret = btrfs_del_item(trans, root, path);
10294 out:
10295         btrfs_commit_transaction(trans, root);
10296         btrfs_free_path(path);
10297         return ret;
10298 }
10299
10300 static int zero_log_tree(struct btrfs_root *root)
10301 {
10302         struct btrfs_trans_handle *trans;
10303         int ret;
10304
10305         trans = btrfs_start_transaction(root, 1);
10306         if (IS_ERR(trans)) {
10307                 ret = PTR_ERR(trans);
10308                 return ret;
10309         }
10310         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10311         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10312         ret = btrfs_commit_transaction(trans, root);
10313         return ret;
10314 }
10315
10316 static int populate_csum(struct btrfs_trans_handle *trans,
10317                          struct btrfs_root *csum_root, char *buf, u64 start,
10318                          u64 len)
10319 {
10320         u64 offset = 0;
10321         u64 sectorsize;
10322         int ret = 0;
10323
10324         while (offset < len) {
10325                 sectorsize = csum_root->sectorsize;
10326                 ret = read_extent_data(csum_root, buf, start + offset,
10327                                        &sectorsize, 0);
10328                 if (ret)
10329                         break;
10330                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10331                                             start + offset, buf, sectorsize);
10332                 if (ret)
10333                         break;
10334                 offset += sectorsize;
10335         }
10336         return ret;
10337 }
10338
10339 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10340                                       struct btrfs_root *csum_root,
10341                                       struct btrfs_root *cur_root)
10342 {
10343         struct btrfs_path *path;
10344         struct btrfs_key key;
10345         struct extent_buffer *node;
10346         struct btrfs_file_extent_item *fi;
10347         char *buf = NULL;
10348         u64 start = 0;
10349         u64 len = 0;
10350         int slot = 0;
10351         int ret = 0;
10352
10353         path = btrfs_alloc_path();
10354         if (!path)
10355                 return -ENOMEM;
10356         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10357         if (!buf) {
10358                 ret = -ENOMEM;
10359                 goto out;
10360         }
10361
10362         key.objectid = 0;
10363         key.offset = 0;
10364         key.type = 0;
10365
10366         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10367         if (ret < 0)
10368                 goto out;
10369         /* Iterate all regular file extents and fill its csum */
10370         while (1) {
10371                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10372
10373                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10374                         goto next;
10375                 node = path->nodes[0];
10376                 slot = path->slots[0];
10377                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10378                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10379                         goto next;
10380                 start = btrfs_file_extent_disk_bytenr(node, fi);
10381                 len = btrfs_file_extent_disk_num_bytes(node, fi);
10382
10383                 ret = populate_csum(trans, csum_root, buf, start, len);
10384                 if (ret == -EEXIST)
10385                         ret = 0;
10386                 if (ret < 0)
10387                         goto out;
10388 next:
10389                 /*
10390                  * TODO: if next leaf is corrupted, jump to nearest next valid
10391                  * leaf.
10392                  */
10393                 ret = btrfs_next_item(cur_root, path);
10394                 if (ret < 0)
10395                         goto out;
10396                 if (ret > 0) {
10397                         ret = 0;
10398                         goto out;
10399                 }
10400         }
10401
10402 out:
10403         btrfs_free_path(path);
10404         free(buf);
10405         return ret;
10406 }
10407
10408 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10409                                   struct btrfs_root *csum_root)
10410 {
10411         struct btrfs_fs_info *fs_info = csum_root->fs_info;
10412         struct btrfs_path *path;
10413         struct btrfs_root *tree_root = fs_info->tree_root;
10414         struct btrfs_root *cur_root;
10415         struct extent_buffer *node;
10416         struct btrfs_key key;
10417         int slot = 0;
10418         int ret = 0;
10419
10420         path = btrfs_alloc_path();
10421         if (!path)
10422                 return -ENOMEM;
10423
10424         key.objectid = BTRFS_FS_TREE_OBJECTID;
10425         key.offset = 0;
10426         key.type = BTRFS_ROOT_ITEM_KEY;
10427
10428         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10429         if (ret < 0)
10430                 goto out;
10431         if (ret > 0) {
10432                 ret = -ENOENT;
10433                 goto out;
10434         }
10435
10436         while (1) {
10437                 node = path->nodes[0];
10438                 slot = path->slots[0];
10439                 btrfs_item_key_to_cpu(node, &key, slot);
10440                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10441                         goto out;
10442                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10443                         goto next;
10444                 if (!is_fstree(key.objectid))
10445                         goto next;
10446                 key.offset = (u64)-1;
10447
10448                 cur_root = btrfs_read_fs_root(fs_info, &key);
10449                 if (IS_ERR(cur_root) || !cur_root) {
10450                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10451                                 key.objectid);
10452                         goto out;
10453                 }
10454                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10455                                 cur_root);
10456                 if (ret < 0)
10457                         goto out;
10458 next:
10459                 ret = btrfs_next_item(tree_root, path);
10460                 if (ret > 0) {
10461                         ret = 0;
10462                         goto out;
10463                 }
10464                 if (ret < 0)
10465                         goto out;
10466         }
10467
10468 out:
10469         btrfs_free_path(path);
10470         return ret;
10471 }
10472
10473 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10474                                       struct btrfs_root *csum_root)
10475 {
10476         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10477         struct btrfs_path *path;
10478         struct btrfs_extent_item *ei;
10479         struct extent_buffer *leaf;
10480         char *buf;
10481         struct btrfs_key key;
10482         int ret;
10483
10484         path = btrfs_alloc_path();
10485         if (!path)
10486                 return -ENOMEM;
10487
10488         key.objectid = 0;
10489         key.type = BTRFS_EXTENT_ITEM_KEY;
10490         key.offset = 0;
10491
10492         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10493         if (ret < 0) {
10494                 btrfs_free_path(path);
10495                 return ret;
10496         }
10497
10498         buf = malloc(csum_root->sectorsize);
10499         if (!buf) {
10500                 btrfs_free_path(path);
10501                 return -ENOMEM;
10502         }
10503
10504         while (1) {
10505                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10506                         ret = btrfs_next_leaf(extent_root, path);
10507                         if (ret < 0)
10508                                 break;
10509                         if (ret) {
10510                                 ret = 0;
10511                                 break;
10512                         }
10513                 }
10514                 leaf = path->nodes[0];
10515
10516                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10517                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10518                         path->slots[0]++;
10519                         continue;
10520                 }
10521
10522                 ei = btrfs_item_ptr(leaf, path->slots[0],
10523                                     struct btrfs_extent_item);
10524                 if (!(btrfs_extent_flags(leaf, ei) &
10525                       BTRFS_EXTENT_FLAG_DATA)) {
10526                         path->slots[0]++;
10527                         continue;
10528                 }
10529
10530                 ret = populate_csum(trans, csum_root, buf, key.objectid,
10531                                     key.offset);
10532                 if (ret)
10533                         break;
10534                 path->slots[0]++;
10535         }
10536
10537         btrfs_free_path(path);
10538         free(buf);
10539         return ret;
10540 }
10541
10542 /*
10543  * Recalculate the csum and put it into the csum tree.
10544  *
10545  * Extent tree init will wipe out all the extent info, so in that case, we
10546  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
10547  * will use fs/subvol trees to init the csum tree.
10548  */
10549 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10550                           struct btrfs_root *csum_root,
10551                           int search_fs_tree)
10552 {
10553         if (search_fs_tree)
10554                 return fill_csum_tree_from_fs(trans, csum_root);
10555         else
10556                 return fill_csum_tree_from_extent(trans, csum_root);
10557 }
10558
10559 static void free_roots_info_cache(void)
10560 {
10561         if (!roots_info_cache)
10562                 return;
10563
10564         while (!cache_tree_empty(roots_info_cache)) {
10565                 struct cache_extent *entry;
10566                 struct root_item_info *rii;
10567
10568                 entry = first_cache_extent(roots_info_cache);
10569                 if (!entry)
10570                         break;
10571                 remove_cache_extent(roots_info_cache, entry);
10572                 rii = container_of(entry, struct root_item_info, cache_extent);
10573                 free(rii);
10574         }
10575
10576         free(roots_info_cache);
10577         roots_info_cache = NULL;
10578 }
10579
10580 static int build_roots_info_cache(struct btrfs_fs_info *info)
10581 {
10582         int ret = 0;
10583         struct btrfs_key key;
10584         struct extent_buffer *leaf;
10585         struct btrfs_path *path;
10586
10587         if (!roots_info_cache) {
10588                 roots_info_cache = malloc(sizeof(*roots_info_cache));
10589                 if (!roots_info_cache)
10590                         return -ENOMEM;
10591                 cache_tree_init(roots_info_cache);
10592         }
10593
10594         path = btrfs_alloc_path();
10595         if (!path)
10596                 return -ENOMEM;
10597
10598         key.objectid = 0;
10599         key.type = BTRFS_EXTENT_ITEM_KEY;
10600         key.offset = 0;
10601
10602         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10603         if (ret < 0)
10604                 goto out;
10605         leaf = path->nodes[0];
10606
10607         while (1) {
10608                 struct btrfs_key found_key;
10609                 struct btrfs_extent_item *ei;
10610                 struct btrfs_extent_inline_ref *iref;
10611                 int slot = path->slots[0];
10612                 int type;
10613                 u64 flags;
10614                 u64 root_id;
10615                 u8 level;
10616                 struct cache_extent *entry;
10617                 struct root_item_info *rii;
10618
10619                 if (slot >= btrfs_header_nritems(leaf)) {
10620                         ret = btrfs_next_leaf(info->extent_root, path);
10621                         if (ret < 0) {
10622                                 break;
10623                         } else if (ret) {
10624                                 ret = 0;
10625                                 break;
10626                         }
10627                         leaf = path->nodes[0];
10628                         slot = path->slots[0];
10629                 }
10630
10631                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10632
10633                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10634                     found_key.type != BTRFS_METADATA_ITEM_KEY)
10635                         goto next;
10636
10637                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10638                 flags = btrfs_extent_flags(leaf, ei);
10639
10640                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10641                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10642                         goto next;
10643
10644                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10645                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10646                         level = found_key.offset;
10647                 } else {
10648                         struct btrfs_tree_block_info *binfo;
10649
10650                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
10651                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10652                         level = btrfs_tree_block_level(leaf, binfo);
10653                 }
10654
10655                 /*
10656                  * For a root extent, it must be of the following type and the
10657                  * first (and only one) iref in the item.
10658                  */
10659                 type = btrfs_extent_inline_ref_type(leaf, iref);
10660                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10661                         goto next;
10662
10663                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10664                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10665                 if (!entry) {
10666                         rii = malloc(sizeof(struct root_item_info));
10667                         if (!rii) {
10668                                 ret = -ENOMEM;
10669                                 goto out;
10670                         }
10671                         rii->cache_extent.start = root_id;
10672                         rii->cache_extent.size = 1;
10673                         rii->level = (u8)-1;
10674                         entry = &rii->cache_extent;
10675                         ret = insert_cache_extent(roots_info_cache, entry);
10676                         ASSERT(ret == 0);
10677                 } else {
10678                         rii = container_of(entry, struct root_item_info,
10679                                            cache_extent);
10680                 }
10681
10682                 ASSERT(rii->cache_extent.start == root_id);
10683                 ASSERT(rii->cache_extent.size == 1);
10684
10685                 if (level > rii->level || rii->level == (u8)-1) {
10686                         rii->level = level;
10687                         rii->bytenr = found_key.objectid;
10688                         rii->gen = btrfs_extent_generation(leaf, ei);
10689                         rii->node_count = 1;
10690                 } else if (level == rii->level) {
10691                         rii->node_count++;
10692                 }
10693 next:
10694                 path->slots[0]++;
10695         }
10696
10697 out:
10698         btrfs_free_path(path);
10699
10700         return ret;
10701 }
10702
10703 static int maybe_repair_root_item(struct btrfs_fs_info *info,
10704                                   struct btrfs_path *path,
10705                                   const struct btrfs_key *root_key,
10706                                   const int read_only_mode)
10707 {
10708         const u64 root_id = root_key->objectid;
10709         struct cache_extent *entry;
10710         struct root_item_info *rii;
10711         struct btrfs_root_item ri;
10712         unsigned long offset;
10713
10714         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10715         if (!entry) {
10716                 fprintf(stderr,
10717                         "Error: could not find extent items for root %llu\n",
10718                         root_key->objectid);
10719                 return -ENOENT;
10720         }
10721
10722         rii = container_of(entry, struct root_item_info, cache_extent);
10723         ASSERT(rii->cache_extent.start == root_id);
10724         ASSERT(rii->cache_extent.size == 1);
10725
10726         if (rii->node_count != 1) {
10727                 fprintf(stderr,
10728                         "Error: could not find btree root extent for root %llu\n",
10729                         root_id);
10730                 return -ENOENT;
10731         }
10732
10733         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
10734         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
10735
10736         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
10737             btrfs_root_level(&ri) != rii->level ||
10738             btrfs_root_generation(&ri) != rii->gen) {
10739
10740                 /*
10741                  * If we're in repair mode but our caller told us to not update
10742                  * the root item, i.e. just check if it needs to be updated, don't
10743                  * print this message, since the caller will call us again shortly
10744                  * for the same root item without read only mode (the caller will
10745                  * open a transaction first).
10746                  */
10747                 if (!(read_only_mode && repair))
10748                         fprintf(stderr,
10749                                 "%sroot item for root %llu,"
10750                                 " current bytenr %llu, current gen %llu, current level %u,"
10751                                 " new bytenr %llu, new gen %llu, new level %u\n",
10752                                 (read_only_mode ? "" : "fixing "),
10753                                 root_id,
10754                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
10755                                 btrfs_root_level(&ri),
10756                                 rii->bytenr, rii->gen, rii->level);
10757
10758                 if (btrfs_root_generation(&ri) > rii->gen) {
10759                         fprintf(stderr,
10760                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
10761                                 root_id, btrfs_root_generation(&ri), rii->gen);
10762                         return -EINVAL;
10763                 }
10764
10765                 if (!read_only_mode) {
10766                         btrfs_set_root_bytenr(&ri, rii->bytenr);
10767                         btrfs_set_root_level(&ri, rii->level);
10768                         btrfs_set_root_generation(&ri, rii->gen);
10769                         write_extent_buffer(path->nodes[0], &ri,
10770                                             offset, sizeof(ri));
10771                 }
10772
10773                 return 1;
10774         }
10775
10776         return 0;
10777 }
10778
10779 /*
10780  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
10781  * caused read-only snapshots to be corrupted if they were created at a moment
10782  * when the source subvolume/snapshot had orphan items. The issue was that the
10783  * on-disk root items became incorrect, referring to the pre orphan cleanup root
10784  * node instead of the post orphan cleanup root node.
10785  * So this function, and its callees, just detects and fixes those cases. Even
10786  * though the regression was for read-only snapshots, this function applies to
10787  * any snapshot/subvolume root.
10788  * This must be run before any other repair code - not doing it so, makes other
10789  * repair code delete or modify backrefs in the extent tree for example, which
10790  * will result in an inconsistent fs after repairing the root items.
10791  */
10792 static int repair_root_items(struct btrfs_fs_info *info)
10793 {
10794         struct btrfs_path *path = NULL;
10795         struct btrfs_key key;
10796         struct extent_buffer *leaf;
10797         struct btrfs_trans_handle *trans = NULL;
10798         int ret = 0;
10799         int bad_roots = 0;
10800         int need_trans = 0;
10801
10802         ret = build_roots_info_cache(info);
10803         if (ret)
10804                 goto out;
10805
10806         path = btrfs_alloc_path();
10807         if (!path) {
10808                 ret = -ENOMEM;
10809                 goto out;
10810         }
10811
10812         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
10813         key.type = BTRFS_ROOT_ITEM_KEY;
10814         key.offset = 0;
10815
10816 again:
10817         /*
10818          * Avoid opening and committing transactions if a leaf doesn't have
10819          * any root items that need to be fixed, so that we avoid rotating
10820          * backup roots unnecessarily.
10821          */
10822         if (need_trans) {
10823                 trans = btrfs_start_transaction(info->tree_root, 1);
10824                 if (IS_ERR(trans)) {
10825                         ret = PTR_ERR(trans);
10826                         goto out;
10827                 }
10828         }
10829
10830         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
10831                                 0, trans ? 1 : 0);
10832         if (ret < 0)
10833                 goto out;
10834         leaf = path->nodes[0];
10835
10836         while (1) {
10837                 struct btrfs_key found_key;
10838
10839                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
10840                         int no_more_keys = find_next_key(path, &key);
10841
10842                         btrfs_release_path(path);
10843                         if (trans) {
10844                                 ret = btrfs_commit_transaction(trans,
10845                                                                info->tree_root);
10846                                 trans = NULL;
10847                                 if (ret < 0)
10848                                         goto out;
10849                         }
10850                         need_trans = 0;
10851                         if (no_more_keys)
10852                                 break;
10853                         goto again;
10854                 }
10855
10856                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10857
10858                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
10859                         goto next;
10860                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
10861                         goto next;
10862
10863                 ret = maybe_repair_root_item(info, path, &found_key,
10864                                              trans ? 0 : 1);
10865                 if (ret < 0)
10866                         goto out;
10867                 if (ret) {
10868                         if (!trans && repair) {
10869                                 need_trans = 1;
10870                                 key = found_key;
10871                                 btrfs_release_path(path);
10872                                 goto again;
10873                         }
10874                         bad_roots++;
10875                 }
10876 next:
10877                 path->slots[0]++;
10878         }
10879         ret = 0;
10880 out:
10881         free_roots_info_cache();
10882         btrfs_free_path(path);
10883         if (trans)
10884                 btrfs_commit_transaction(trans, info->tree_root);
10885         if (ret < 0)
10886                 return ret;
10887
10888         return bad_roots;
10889 }
10890
10891 const char * const cmd_check_usage[] = {
10892         "btrfs check [options] <device>",
10893         "Check structural integrity of a filesystem (unmounted).",
10894         "Check structural integrity of an unmounted filesystem. Verify internal",
10895         "trees' consistency and item connectivity. In the repair mode try to",
10896         "fix the problems found.",
10897         "WARNING: the repair mode is considered dangerous",
10898         "",
10899         "-s|--super <superblock>     use this superblock copy",
10900         "-b|--backup                 use the first valid backup root copy",
10901         "--repair                    try to repair the filesystem",
10902         "--readonly                  run in read-only mode (default)",
10903         "--init-csum-tree            create a new CRC tree",
10904         "--init-extent-tree          create a new extent tree",
10905         "--check-data-csum           verify checksums of data blocks",
10906         "-Q|--qgroup-report           print a report on qgroup consistency",
10907         "-E|--subvol-extents <subvolid>",
10908         "                            print subvolume extents and sharing state",
10909         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
10910         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
10911         "-p|--progress               indicate progress",
10912         NULL
10913 };
10914
10915 int cmd_check(int argc, char **argv)
10916 {
10917         struct cache_tree root_cache;
10918         struct btrfs_root *root;
10919         struct btrfs_fs_info *info;
10920         u64 bytenr = 0;
10921         u64 subvolid = 0;
10922         u64 tree_root_bytenr = 0;
10923         u64 chunk_root_bytenr = 0;
10924         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
10925         int ret;
10926         u64 num;
10927         int init_csum_tree = 0;
10928         int readonly = 0;
10929         int qgroup_report = 0;
10930         int qgroups_repaired = 0;
10931         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
10932
10933         while(1) {
10934                 int c;
10935                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
10936                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
10937                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE };
10938                 static const struct option long_options[] = {
10939                         { "super", required_argument, NULL, 's' },
10940                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
10941                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
10942                         { "init-csum-tree", no_argument, NULL,
10943                                 GETOPT_VAL_INIT_CSUM },
10944                         { "init-extent-tree", no_argument, NULL,
10945                                 GETOPT_VAL_INIT_EXTENT },
10946                         { "check-data-csum", no_argument, NULL,
10947                                 GETOPT_VAL_CHECK_CSUM },
10948                         { "backup", no_argument, NULL, 'b' },
10949                         { "subvol-extents", required_argument, NULL, 'E' },
10950                         { "qgroup-report", no_argument, NULL, 'Q' },
10951                         { "tree-root", required_argument, NULL, 'r' },
10952                         { "chunk-root", required_argument, NULL,
10953                                 GETOPT_VAL_CHUNK_TREE },
10954                         { "progress", no_argument, NULL, 'p' },
10955                         { NULL, 0, NULL, 0}
10956                 };
10957
10958                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
10959                 if (c < 0)
10960                         break;
10961                 switch(c) {
10962                         case 'a': /* ignored */ break;
10963                         case 'b':
10964                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
10965                                 break;
10966                         case 's':
10967                                 num = arg_strtou64(optarg);
10968                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
10969                                         fprintf(stderr,
10970                                                 "ERROR: super mirror should be less than: %d\n",
10971                                                 BTRFS_SUPER_MIRROR_MAX);
10972                                         exit(1);
10973                                 }
10974                                 bytenr = btrfs_sb_offset(((int)num));
10975                                 printf("using SB copy %llu, bytenr %llu\n", num,
10976                                        (unsigned long long)bytenr);
10977                                 break;
10978                         case 'Q':
10979                                 qgroup_report = 1;
10980                                 break;
10981                         case 'E':
10982                                 subvolid = arg_strtou64(optarg);
10983                                 break;
10984                         case 'r':
10985                                 tree_root_bytenr = arg_strtou64(optarg);
10986                                 break;
10987                         case GETOPT_VAL_CHUNK_TREE:
10988                                 chunk_root_bytenr = arg_strtou64(optarg);
10989                                 break;
10990                         case 'p':
10991                                 ctx.progress_enabled = true;
10992                                 break;
10993                         case '?':
10994                         case 'h':
10995                                 usage(cmd_check_usage);
10996                         case GETOPT_VAL_REPAIR:
10997                                 printf("enabling repair mode\n");
10998                                 repair = 1;
10999                                 ctree_flags |= OPEN_CTREE_WRITES;
11000                                 break;
11001                         case GETOPT_VAL_READONLY:
11002                                 readonly = 1;
11003                                 break;
11004                         case GETOPT_VAL_INIT_CSUM:
11005                                 printf("Creating a new CRC tree\n");
11006                                 init_csum_tree = 1;
11007                                 repair = 1;
11008                                 ctree_flags |= OPEN_CTREE_WRITES;
11009                                 break;
11010                         case GETOPT_VAL_INIT_EXTENT:
11011                                 init_extent_tree = 1;
11012                                 ctree_flags |= (OPEN_CTREE_WRITES |
11013                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
11014                                 repair = 1;
11015                                 break;
11016                         case GETOPT_VAL_CHECK_CSUM:
11017                                 check_data_csum = 1;
11018                                 break;
11019                 }
11020         }
11021
11022         if (check_argc_exact(argc - optind, 1))
11023                 usage(cmd_check_usage);
11024
11025         if (ctx.progress_enabled) {
11026                 ctx.tp = TASK_NOTHING;
11027                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11028         }
11029
11030         /* This check is the only reason for --readonly to exist */
11031         if (readonly && repair) {
11032                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
11033                 exit(1);
11034         }
11035
11036         radix_tree_init();
11037         cache_tree_init(&root_cache);
11038
11039         if((ret = check_mounted(argv[optind])) < 0) {
11040                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
11041                 goto err_out;
11042         } else if(ret) {
11043                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
11044                 ret = -EBUSY;
11045                 goto err_out;
11046         }
11047
11048         /* only allow partial opening under repair mode */
11049         if (repair)
11050                 ctree_flags |= OPEN_CTREE_PARTIAL;
11051
11052         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11053                                   chunk_root_bytenr, ctree_flags);
11054         if (!info) {
11055                 fprintf(stderr, "Couldn't open file system\n");
11056                 ret = -EIO;
11057                 goto err_out;
11058         }
11059
11060         global_info = info;
11061         root = info->fs_root;
11062
11063         /*
11064          * repair mode will force us to commit transaction which
11065          * will make us fail to load log tree when mounting.
11066          */
11067         if (repair && btrfs_super_log_root(info->super_copy)) {
11068                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
11069                 if (!ret) {
11070                         ret = 1;
11071                         goto close_out;
11072                 }
11073                 ret = zero_log_tree(root);
11074                 if (ret) {
11075                         fprintf(stderr, "fail to zero log tree\n");
11076                         goto close_out;
11077                 }
11078         }
11079
11080         uuid_unparse(info->super_copy->fsid, uuidbuf);
11081         if (qgroup_report) {
11082                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11083                        uuidbuf);
11084                 ret = qgroup_verify_all(info);
11085                 if (ret == 0)
11086                         report_qgroups(1);
11087                 goto close_out;
11088         }
11089         if (subvolid) {
11090                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11091                        subvolid, argv[optind], uuidbuf);
11092                 ret = print_extent_state(info, subvolid);
11093                 goto close_out;
11094         }
11095         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11096
11097         if (!extent_buffer_uptodate(info->tree_root->node) ||
11098             !extent_buffer_uptodate(info->dev_root->node) ||
11099             !extent_buffer_uptodate(info->chunk_root->node)) {
11100                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11101                 ret = -EIO;
11102                 goto close_out;
11103         }
11104
11105         if (init_extent_tree || init_csum_tree) {
11106                 struct btrfs_trans_handle *trans;
11107
11108                 trans = btrfs_start_transaction(info->extent_root, 0);
11109                 if (IS_ERR(trans)) {
11110                         fprintf(stderr, "Error starting transaction\n");
11111                         ret = PTR_ERR(trans);
11112                         goto close_out;
11113                 }
11114
11115                 if (init_extent_tree) {
11116                         printf("Creating a new extent tree\n");
11117                         ret = reinit_extent_tree(trans, info);
11118                         if (ret)
11119                                 goto close_out;
11120                 }
11121
11122                 if (init_csum_tree) {
11123                         fprintf(stderr, "Reinit crc root\n");
11124                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11125                         if (ret) {
11126                                 fprintf(stderr, "crc root initialization failed\n");
11127                                 ret = -EIO;
11128                                 goto close_out;
11129                         }
11130
11131                         ret = fill_csum_tree(trans, info->csum_root,
11132                                              init_extent_tree);
11133                         if (ret) {
11134                                 fprintf(stderr, "crc refilling failed\n");
11135                                 return -EIO;
11136                         }
11137                 }
11138                 /*
11139                  * Ok now we commit and run the normal fsck, which will add
11140                  * extent entries for all of the items it finds.
11141                  */
11142                 ret = btrfs_commit_transaction(trans, info->extent_root);
11143                 if (ret)
11144                         goto close_out;
11145         }
11146         if (!extent_buffer_uptodate(info->extent_root->node)) {
11147                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11148                 ret = -EIO;
11149                 goto close_out;
11150         }
11151         if (!extent_buffer_uptodate(info->csum_root->node)) {
11152                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
11153                 ret = -EIO;
11154                 goto close_out;
11155         }
11156
11157         if (!ctx.progress_enabled)
11158                 fprintf(stderr, "checking extents\n");
11159         ret = check_chunks_and_extents(root);
11160         if (ret)
11161                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
11162
11163         ret = repair_root_items(info);
11164         if (ret < 0)
11165                 goto close_out;
11166         if (repair) {
11167                 fprintf(stderr, "Fixed %d roots.\n", ret);
11168                 ret = 0;
11169         } else if (ret > 0) {
11170                 fprintf(stderr,
11171                        "Found %d roots with an outdated root item.\n",
11172                        ret);
11173                 fprintf(stderr,
11174                         "Please run a filesystem check with the option --repair to fix them.\n");
11175                 ret = 1;
11176                 goto close_out;
11177         }
11178
11179         if (!ctx.progress_enabled) {
11180                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11181                         fprintf(stderr, "checking free space tree\n");
11182                 else
11183                         fprintf(stderr, "checking free space cache\n");
11184         }
11185         ret = check_space_cache(root);
11186         if (ret)
11187                 goto out;
11188
11189         /*
11190          * We used to have to have these hole extents in between our real
11191          * extents so if we don't have this flag set we need to make sure there
11192          * are no gaps in the file extents for inodes, otherwise we can just
11193          * ignore it when this happens.
11194          */
11195         no_holes = btrfs_fs_incompat(root->fs_info,
11196                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11197         if (!ctx.progress_enabled)
11198                 fprintf(stderr, "checking fs roots\n");
11199         ret = check_fs_roots(root, &root_cache);
11200         if (ret)
11201                 goto out;
11202
11203         fprintf(stderr, "checking csums\n");
11204         ret = check_csums(root);
11205         if (ret)
11206                 goto out;
11207
11208         fprintf(stderr, "checking root refs\n");
11209         ret = check_root_refs(root, &root_cache);
11210         if (ret)
11211                 goto out;
11212
11213         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11214                 struct extent_buffer *eb;
11215
11216                 eb = list_first_entry(&root->fs_info->recow_ebs,
11217                                       struct extent_buffer, recow);
11218                 list_del_init(&eb->recow);
11219                 ret = recow_extent_buffer(root, eb);
11220                 if (ret)
11221                         break;
11222         }
11223
11224         while (!list_empty(&delete_items)) {
11225                 struct bad_item *bad;
11226
11227                 bad = list_first_entry(&delete_items, struct bad_item, list);
11228                 list_del_init(&bad->list);
11229                 if (repair)
11230                         ret = delete_bad_item(root, bad);
11231                 free(bad);
11232         }
11233
11234         if (info->quota_enabled) {
11235                 int err;
11236                 fprintf(stderr, "checking quota groups\n");
11237                 err = qgroup_verify_all(info);
11238                 if (err)
11239                         goto out;
11240                 report_qgroups(0);
11241                 err = repair_qgroups(info, &qgroups_repaired);
11242                 if (err)
11243                         goto out;
11244         }
11245
11246         if (!list_empty(&root->fs_info->recow_ebs)) {
11247                 fprintf(stderr, "Transid errors in file system\n");
11248                 ret = 1;
11249         }
11250 out:
11251         /* Don't override original ret */
11252         if (!ret && qgroups_repaired)
11253                 ret = qgroups_repaired;
11254
11255         if (found_old_backref) { /*
11256                  * there was a disk format change when mixed
11257                  * backref was in testing tree. The old format
11258                  * existed about one week.
11259                  */
11260                 printf("\n * Found old mixed backref format. "
11261                        "The old format is not supported! *"
11262                        "\n * Please mount the FS in readonly mode, "
11263                        "backup data and re-format the FS. *\n\n");
11264                 ret = 1;
11265         }
11266         printf("found %llu bytes used err is %d\n",
11267                (unsigned long long)bytes_used, ret);
11268         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11269         printf("total tree bytes: %llu\n",
11270                (unsigned long long)total_btree_bytes);
11271         printf("total fs tree bytes: %llu\n",
11272                (unsigned long long)total_fs_tree_bytes);
11273         printf("total extent tree bytes: %llu\n",
11274                (unsigned long long)total_extent_tree_bytes);
11275         printf("btree space waste bytes: %llu\n",
11276                (unsigned long long)btree_space_waste);
11277         printf("file data blocks allocated: %llu\n referenced %llu\n",
11278                 (unsigned long long)data_bytes_allocated,
11279                 (unsigned long long)data_bytes_referenced);
11280
11281         free_qgroup_counts();
11282         free_root_recs_tree(&root_cache);
11283 close_out:
11284         close_ctree(root);
11285 err_out:
11286         if (ctx.progress_enabled)
11287                 task_deinit(ctx.info);
11288
11289         return ret;
11290 }