btrfs-progs: use values directly for BLOCK_GROUP_ macros
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
76
77 struct extent_backref {
78         struct rb_node node;
79         unsigned int is_data:1;
80         unsigned int found_extent_tree:1;
81         unsigned int full_backref:1;
82         unsigned int found_ref:1;
83         unsigned int broken:1;
84 };
85
86 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
87 {
88         return rb_entry(node, struct extent_backref, node);
89 }
90
91 struct data_backref {
92         struct extent_backref node;
93         union {
94                 u64 parent;
95                 u64 root;
96         };
97         u64 owner;
98         u64 offset;
99         u64 disk_bytenr;
100         u64 bytes;
101         u64 ram_bytes;
102         u32 num_refs;
103         u32 found_ref;
104 };
105
106 static inline struct data_backref* to_data_backref(struct extent_backref *back)
107 {
108         return container_of(back, struct data_backref, node);
109 }
110
111 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
112 {
113         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
114         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
115         struct data_backref *back1 = to_data_backref(ext1);
116         struct data_backref *back2 = to_data_backref(ext2);
117
118         WARN_ON(!ext1->is_data);
119         WARN_ON(!ext2->is_data);
120
121         /* parent and root are a union, so this covers both */
122         if (back1->parent > back2->parent)
123                 return 1;
124         if (back1->parent < back2->parent)
125                 return -1;
126
127         /* This is a full backref and the parents match. */
128         if (back1->node.full_backref)
129                 return 0;
130
131         if (back1->owner > back2->owner)
132                 return 1;
133         if (back1->owner < back2->owner)
134                 return -1;
135
136         if (back1->offset > back2->offset)
137                 return 1;
138         if (back1->offset < back2->offset)
139                 return -1;
140
141         if (back1->bytes > back2->bytes)
142                 return 1;
143         if (back1->bytes < back2->bytes)
144                 return -1;
145
146         if (back1->found_ref && back2->found_ref) {
147                 if (back1->disk_bytenr > back2->disk_bytenr)
148                         return 1;
149                 if (back1->disk_bytenr < back2->disk_bytenr)
150                         return -1;
151
152                 if (back1->found_ref > back2->found_ref)
153                         return 1;
154                 if (back1->found_ref < back2->found_ref)
155                         return -1;
156         }
157
158         return 0;
159 }
160
161 /*
162  * Much like data_backref, just removed the undetermined members
163  * and change it to use list_head.
164  * During extent scan, it is stored in root->orphan_data_extent.
165  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
166  */
167 struct orphan_data_extent {
168         struct list_head list;
169         u64 root;
170         u64 objectid;
171         u64 offset;
172         u64 disk_bytenr;
173         u64 disk_len;
174 };
175
176 struct tree_backref {
177         struct extent_backref node;
178         union {
179                 u64 parent;
180                 u64 root;
181         };
182 };
183
184 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
185 {
186         return container_of(back, struct tree_backref, node);
187 }
188
189 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
190 {
191         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
192         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
193         struct tree_backref *back1 = to_tree_backref(ext1);
194         struct tree_backref *back2 = to_tree_backref(ext2);
195
196         WARN_ON(ext1->is_data);
197         WARN_ON(ext2->is_data);
198
199         /* parent and root are a union, so this covers both */
200         if (back1->parent > back2->parent)
201                 return 1;
202         if (back1->parent < back2->parent)
203                 return -1;
204
205         return 0;
206 }
207
208 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
209 {
210         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
211         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
212
213         if (ext1->is_data > ext2->is_data)
214                 return 1;
215
216         if (ext1->is_data < ext2->is_data)
217                 return -1;
218
219         if (ext1->full_backref > ext2->full_backref)
220                 return 1;
221         if (ext1->full_backref < ext2->full_backref)
222                 return -1;
223
224         if (ext1->is_data)
225                 return compare_data_backref(node1, node2);
226         else
227                 return compare_tree_backref(node1, node2);
228 }
229
230 /* Explicit initialization for extent_record::flag_block_full_backref */
231 enum { FLAG_UNSET = 2 };
232
233 struct extent_record {
234         struct list_head backrefs;
235         struct list_head dups;
236         struct rb_root backref_tree;
237         struct list_head list;
238         struct cache_extent cache;
239         struct btrfs_disk_key parent_key;
240         u64 start;
241         u64 max_size;
242         u64 nr;
243         u64 refs;
244         u64 extent_item_refs;
245         u64 generation;
246         u64 parent_generation;
247         u64 info_objectid;
248         u32 num_duplicates;
249         u8 info_level;
250         unsigned int flag_block_full_backref:2;
251         unsigned int found_rec:1;
252         unsigned int content_checked:1;
253         unsigned int owner_ref_checked:1;
254         unsigned int is_root:1;
255         unsigned int metadata:1;
256         unsigned int bad_full_backref:1;
257         unsigned int crossing_stripes:1;
258         unsigned int wrong_chunk_type:1;
259 };
260
261 static inline struct extent_record* to_extent_record(struct list_head *entry)
262 {
263         return container_of(entry, struct extent_record, list);
264 }
265
266 struct inode_backref {
267         struct list_head list;
268         unsigned int found_dir_item:1;
269         unsigned int found_dir_index:1;
270         unsigned int found_inode_ref:1;
271         unsigned int filetype:8;
272         int errors;
273         unsigned int ref_type;
274         u64 dir;
275         u64 index;
276         u16 namelen;
277         char name[0];
278 };
279
280 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
281 {
282         return list_entry(entry, struct inode_backref, list);
283 }
284
285 struct root_item_record {
286         struct list_head list;
287         u64 objectid;
288         u64 bytenr;
289         u64 last_snapshot;
290         u8 level;
291         u8 drop_level;
292         int level_size;
293         struct btrfs_key drop_key;
294 };
295
296 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
297 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
298 #define REF_ERR_NO_INODE_REF            (1 << 2)
299 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
300 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
301 #define REF_ERR_DUP_INODE_REF           (1 << 5)
302 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
303 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
304 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
305 #define REF_ERR_NO_ROOT_REF             (1 << 9)
306 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
307 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
308 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
309
310 struct file_extent_hole {
311         struct rb_node node;
312         u64 start;
313         u64 len;
314 };
315
316 struct inode_record {
317         struct list_head backrefs;
318         unsigned int checked:1;
319         unsigned int merging:1;
320         unsigned int found_inode_item:1;
321         unsigned int found_dir_item:1;
322         unsigned int found_file_extent:1;
323         unsigned int found_csum_item:1;
324         unsigned int some_csum_missing:1;
325         unsigned int nodatasum:1;
326         int errors;
327
328         u64 ino;
329         u32 nlink;
330         u32 imode;
331         u64 isize;
332         u64 nbytes;
333
334         u32 found_link;
335         u64 found_size;
336         u64 extent_start;
337         u64 extent_end;
338         struct rb_root holes;
339         struct list_head orphan_extents;
340
341         u32 refs;
342 };
343
344 #define I_ERR_NO_INODE_ITEM             (1 << 0)
345 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
346 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
347 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
348 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
349 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
350 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
351 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
352 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
353 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
354 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
355 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
356 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
357 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
358 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
359
360 struct root_backref {
361         struct list_head list;
362         unsigned int found_dir_item:1;
363         unsigned int found_dir_index:1;
364         unsigned int found_back_ref:1;
365         unsigned int found_forward_ref:1;
366         unsigned int reachable:1;
367         int errors;
368         u64 ref_root;
369         u64 dir;
370         u64 index;
371         u16 namelen;
372         char name[0];
373 };
374
375 static inline struct root_backref* to_root_backref(struct list_head *entry)
376 {
377         return list_entry(entry, struct root_backref, list);
378 }
379
380 struct root_record {
381         struct list_head backrefs;
382         struct cache_extent cache;
383         unsigned int found_root_item:1;
384         u64 objectid;
385         u32 found_ref;
386 };
387
388 struct ptr_node {
389         struct cache_extent cache;
390         void *data;
391 };
392
393 struct shared_node {
394         struct cache_extent cache;
395         struct cache_tree root_cache;
396         struct cache_tree inode_cache;
397         struct inode_record *current;
398         u32 refs;
399 };
400
401 struct block_info {
402         u64 start;
403         u32 size;
404 };
405
406 struct walk_control {
407         struct cache_tree shared;
408         struct shared_node *nodes[BTRFS_MAX_LEVEL];
409         int active_node;
410         int root_level;
411 };
412
413 struct bad_item {
414         struct btrfs_key key;
415         u64 root_id;
416         struct list_head list;
417 };
418
419 struct extent_entry {
420         u64 bytenr;
421         u64 bytes;
422         int count;
423         int broken;
424         struct list_head list;
425 };
426
427 struct root_item_info {
428         /* level of the root */
429         u8 level;
430         /* number of nodes at this level, must be 1 for a root */
431         int node_count;
432         u64 bytenr;
433         u64 gen;
434         struct cache_extent cache_extent;
435 };
436
437 static void *print_status_check(void *p)
438 {
439         struct task_ctx *priv = p;
440         const char work_indicator[] = { '.', 'o', 'O', 'o' };
441         uint32_t count = 0;
442         static char *task_position_string[] = {
443                 "checking extents",
444                 "checking free space cache",
445                 "checking fs roots",
446         };
447
448         task_period_start(priv->info, 1000 /* 1s */);
449
450         if (priv->tp == TASK_NOTHING)
451                 return NULL;
452
453         while (1) {
454                 printf("%s [%c]\r", task_position_string[priv->tp],
455                                 work_indicator[count % 4]);
456                 count++;
457                 fflush(stdout);
458                 task_period_wait(priv->info);
459         }
460         return NULL;
461 }
462
463 static int print_status_return(void *p)
464 {
465         printf("\n");
466         fflush(stdout);
467
468         return 0;
469 }
470
471 /* Compatible function to allow reuse of old codes */
472 static u64 first_extent_gap(struct rb_root *holes)
473 {
474         struct file_extent_hole *hole;
475
476         if (RB_EMPTY_ROOT(holes))
477                 return (u64)-1;
478
479         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
480         return hole->start;
481 }
482
483 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
484 {
485         struct file_extent_hole *hole1;
486         struct file_extent_hole *hole2;
487
488         hole1 = rb_entry(node1, struct file_extent_hole, node);
489         hole2 = rb_entry(node2, struct file_extent_hole, node);
490
491         if (hole1->start > hole2->start)
492                 return -1;
493         if (hole1->start < hole2->start)
494                 return 1;
495         /* Now hole1->start == hole2->start */
496         if (hole1->len >= hole2->len)
497                 /*
498                  * Hole 1 will be merge center
499                  * Same hole will be merged later
500                  */
501                 return -1;
502         /* Hole 2 will be merge center */
503         return 1;
504 }
505
506 /*
507  * Add a hole to the record
508  *
509  * This will do hole merge for copy_file_extent_holes(),
510  * which will ensure there won't be continuous holes.
511  */
512 static int add_file_extent_hole(struct rb_root *holes,
513                                 u64 start, u64 len)
514 {
515         struct file_extent_hole *hole;
516         struct file_extent_hole *prev = NULL;
517         struct file_extent_hole *next = NULL;
518
519         hole = malloc(sizeof(*hole));
520         if (!hole)
521                 return -ENOMEM;
522         hole->start = start;
523         hole->len = len;
524         /* Since compare will not return 0, no -EEXIST will happen */
525         rb_insert(holes, &hole->node, compare_hole);
526
527         /* simple merge with previous hole */
528         if (rb_prev(&hole->node))
529                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
530                                 node);
531         if (prev && prev->start + prev->len >= hole->start) {
532                 hole->len = hole->start + hole->len - prev->start;
533                 hole->start = prev->start;
534                 rb_erase(&prev->node, holes);
535                 free(prev);
536                 prev = NULL;
537         }
538
539         /* iterate merge with next holes */
540         while (1) {
541                 if (!rb_next(&hole->node))
542                         break;
543                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
544                                         node);
545                 if (hole->start + hole->len >= next->start) {
546                         if (hole->start + hole->len <= next->start + next->len)
547                                 hole->len = next->start + next->len -
548                                             hole->start;
549                         rb_erase(&next->node, holes);
550                         free(next);
551                         next = NULL;
552                 } else
553                         break;
554         }
555         return 0;
556 }
557
558 static int compare_hole_range(struct rb_node *node, void *data)
559 {
560         struct file_extent_hole *hole;
561         u64 start;
562
563         hole = (struct file_extent_hole *)data;
564         start = hole->start;
565
566         hole = rb_entry(node, struct file_extent_hole, node);
567         if (start < hole->start)
568                 return -1;
569         if (start >= hole->start && start < hole->start + hole->len)
570                 return 0;
571         return 1;
572 }
573
574 /*
575  * Delete a hole in the record
576  *
577  * This will do the hole split and is much restrict than add.
578  */
579 static int del_file_extent_hole(struct rb_root *holes,
580                                 u64 start, u64 len)
581 {
582         struct file_extent_hole *hole;
583         struct file_extent_hole tmp;
584         u64 prev_start = 0;
585         u64 prev_len = 0;
586         u64 next_start = 0;
587         u64 next_len = 0;
588         struct rb_node *node;
589         int have_prev = 0;
590         int have_next = 0;
591         int ret = 0;
592
593         tmp.start = start;
594         tmp.len = len;
595         node = rb_search(holes, &tmp, compare_hole_range, NULL);
596         if (!node)
597                 return -EEXIST;
598         hole = rb_entry(node, struct file_extent_hole, node);
599         if (start + len > hole->start + hole->len)
600                 return -EEXIST;
601
602         /*
603          * Now there will be no overlap, delete the hole and re-add the
604          * split(s) if they exists.
605          */
606         if (start > hole->start) {
607                 prev_start = hole->start;
608                 prev_len = start - hole->start;
609                 have_prev = 1;
610         }
611         if (hole->start + hole->len > start + len) {
612                 next_start = start + len;
613                 next_len = hole->start + hole->len - start - len;
614                 have_next = 1;
615         }
616         rb_erase(node, holes);
617         free(hole);
618         if (have_prev) {
619                 ret = add_file_extent_hole(holes, prev_start, prev_len);
620                 if (ret < 0)
621                         return ret;
622         }
623         if (have_next) {
624                 ret = add_file_extent_hole(holes, next_start, next_len);
625                 if (ret < 0)
626                         return ret;
627         }
628         return 0;
629 }
630
631 static int copy_file_extent_holes(struct rb_root *dst,
632                                   struct rb_root *src)
633 {
634         struct file_extent_hole *hole;
635         struct rb_node *node;
636         int ret = 0;
637
638         node = rb_first(src);
639         while (node) {
640                 hole = rb_entry(node, struct file_extent_hole, node);
641                 ret = add_file_extent_hole(dst, hole->start, hole->len);
642                 if (ret)
643                         break;
644                 node = rb_next(node);
645         }
646         return ret;
647 }
648
649 static void free_file_extent_holes(struct rb_root *holes)
650 {
651         struct rb_node *node;
652         struct file_extent_hole *hole;
653
654         node = rb_first(holes);
655         while (node) {
656                 hole = rb_entry(node, struct file_extent_hole, node);
657                 rb_erase(node, holes);
658                 free(hole);
659                 node = rb_first(holes);
660         }
661 }
662
663 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
664
665 static void record_root_in_trans(struct btrfs_trans_handle *trans,
666                                  struct btrfs_root *root)
667 {
668         if (root->last_trans != trans->transid) {
669                 root->track_dirty = 1;
670                 root->last_trans = trans->transid;
671                 root->commit_root = root->node;
672                 extent_buffer_get(root->node);
673         }
674 }
675
676 static u8 imode_to_type(u32 imode)
677 {
678 #define S_SHIFT 12
679         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
680                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
681                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
682                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
683                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
684                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
685                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
686                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
687         };
688
689         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
690 #undef S_SHIFT
691 }
692
693 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
694 {
695         struct device_record *rec1;
696         struct device_record *rec2;
697
698         rec1 = rb_entry(node1, struct device_record, node);
699         rec2 = rb_entry(node2, struct device_record, node);
700         if (rec1->devid > rec2->devid)
701                 return -1;
702         else if (rec1->devid < rec2->devid)
703                 return 1;
704         else
705                 return 0;
706 }
707
708 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
709 {
710         struct inode_record *rec;
711         struct inode_backref *backref;
712         struct inode_backref *orig;
713         struct inode_backref *tmp;
714         struct orphan_data_extent *src_orphan;
715         struct orphan_data_extent *dst_orphan;
716         size_t size;
717         int ret;
718
719         rec = malloc(sizeof(*rec));
720         if (!rec)
721                 return ERR_PTR(-ENOMEM);
722         memcpy(rec, orig_rec, sizeof(*rec));
723         rec->refs = 1;
724         INIT_LIST_HEAD(&rec->backrefs);
725         INIT_LIST_HEAD(&rec->orphan_extents);
726         rec->holes = RB_ROOT;
727
728         list_for_each_entry(orig, &orig_rec->backrefs, list) {
729                 size = sizeof(*orig) + orig->namelen + 1;
730                 backref = malloc(size);
731                 if (!backref) {
732                         ret = -ENOMEM;
733                         goto cleanup;
734                 }
735                 memcpy(backref, orig, size);
736                 list_add_tail(&backref->list, &rec->backrefs);
737         }
738         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
739                 dst_orphan = malloc(sizeof(*dst_orphan));
740                 if (!dst_orphan) {
741                         ret = -ENOMEM;
742                         goto cleanup;
743                 }
744                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
745                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
746         }
747         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
748         BUG_ON(ret < 0);
749
750         return rec;
751
752 cleanup:
753         if (!list_empty(&rec->backrefs))
754                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
755                         list_del(&orig->list);
756                         free(orig);
757                 }
758
759         if (!list_empty(&rec->orphan_extents))
760                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
761                         list_del(&orig->list);
762                         free(orig);
763                 }
764
765         free(rec);
766
767         return ERR_PTR(ret);
768 }
769
770 static void print_orphan_data_extents(struct list_head *orphan_extents,
771                                       u64 objectid)
772 {
773         struct orphan_data_extent *orphan;
774
775         if (list_empty(orphan_extents))
776                 return;
777         printf("The following data extent is lost in tree %llu:\n",
778                objectid);
779         list_for_each_entry(orphan, orphan_extents, list) {
780                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
781                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
782                        orphan->disk_len);
783         }
784 }
785
786 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
787 {
788         u64 root_objectid = root->root_key.objectid;
789         int errors = rec->errors;
790
791         if (!errors)
792                 return;
793         /* reloc root errors, we print its corresponding fs root objectid*/
794         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
795                 root_objectid = root->root_key.offset;
796                 fprintf(stderr, "reloc");
797         }
798         fprintf(stderr, "root %llu inode %llu errors %x",
799                 (unsigned long long) root_objectid,
800                 (unsigned long long) rec->ino, rec->errors);
801
802         if (errors & I_ERR_NO_INODE_ITEM)
803                 fprintf(stderr, ", no inode item");
804         if (errors & I_ERR_NO_ORPHAN_ITEM)
805                 fprintf(stderr, ", no orphan item");
806         if (errors & I_ERR_DUP_INODE_ITEM)
807                 fprintf(stderr, ", dup inode item");
808         if (errors & I_ERR_DUP_DIR_INDEX)
809                 fprintf(stderr, ", dup dir index");
810         if (errors & I_ERR_ODD_DIR_ITEM)
811                 fprintf(stderr, ", odd dir item");
812         if (errors & I_ERR_ODD_FILE_EXTENT)
813                 fprintf(stderr, ", odd file extent");
814         if (errors & I_ERR_BAD_FILE_EXTENT)
815                 fprintf(stderr, ", bad file extent");
816         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
817                 fprintf(stderr, ", file extent overlap");
818         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
819                 fprintf(stderr, ", file extent discount");
820         if (errors & I_ERR_DIR_ISIZE_WRONG)
821                 fprintf(stderr, ", dir isize wrong");
822         if (errors & I_ERR_FILE_NBYTES_WRONG)
823                 fprintf(stderr, ", nbytes wrong");
824         if (errors & I_ERR_ODD_CSUM_ITEM)
825                 fprintf(stderr, ", odd csum item");
826         if (errors & I_ERR_SOME_CSUM_MISSING)
827                 fprintf(stderr, ", some csum missing");
828         if (errors & I_ERR_LINK_COUNT_WRONG)
829                 fprintf(stderr, ", link count wrong");
830         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
831                 fprintf(stderr, ", orphan file extent");
832         fprintf(stderr, "\n");
833         /* Print the orphan extents if needed */
834         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
835                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
836
837         /* Print the holes if needed */
838         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
839                 struct file_extent_hole *hole;
840                 struct rb_node *node;
841                 int found = 0;
842
843                 node = rb_first(&rec->holes);
844                 fprintf(stderr, "Found file extent holes:\n");
845                 while (node) {
846                         found = 1;
847                         hole = rb_entry(node, struct file_extent_hole, node);
848                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
849                                 hole->start, hole->len);
850                         node = rb_next(node);
851                 }
852                 if (!found)
853                         fprintf(stderr, "\tstart: 0, len: %llu\n",
854                                 round_up(rec->isize, root->sectorsize));
855         }
856 }
857
858 static void print_ref_error(int errors)
859 {
860         if (errors & REF_ERR_NO_DIR_ITEM)
861                 fprintf(stderr, ", no dir item");
862         if (errors & REF_ERR_NO_DIR_INDEX)
863                 fprintf(stderr, ", no dir index");
864         if (errors & REF_ERR_NO_INODE_REF)
865                 fprintf(stderr, ", no inode ref");
866         if (errors & REF_ERR_DUP_DIR_ITEM)
867                 fprintf(stderr, ", dup dir item");
868         if (errors & REF_ERR_DUP_DIR_INDEX)
869                 fprintf(stderr, ", dup dir index");
870         if (errors & REF_ERR_DUP_INODE_REF)
871                 fprintf(stderr, ", dup inode ref");
872         if (errors & REF_ERR_INDEX_UNMATCH)
873                 fprintf(stderr, ", index mismatch");
874         if (errors & REF_ERR_FILETYPE_UNMATCH)
875                 fprintf(stderr, ", filetype mismatch");
876         if (errors & REF_ERR_NAME_TOO_LONG)
877                 fprintf(stderr, ", name too long");
878         if (errors & REF_ERR_NO_ROOT_REF)
879                 fprintf(stderr, ", no root ref");
880         if (errors & REF_ERR_NO_ROOT_BACKREF)
881                 fprintf(stderr, ", no root backref");
882         if (errors & REF_ERR_DUP_ROOT_REF)
883                 fprintf(stderr, ", dup root ref");
884         if (errors & REF_ERR_DUP_ROOT_BACKREF)
885                 fprintf(stderr, ", dup root backref");
886         fprintf(stderr, "\n");
887 }
888
889 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
890                                           u64 ino, int mod)
891 {
892         struct ptr_node *node;
893         struct cache_extent *cache;
894         struct inode_record *rec = NULL;
895         int ret;
896
897         cache = lookup_cache_extent(inode_cache, ino, 1);
898         if (cache) {
899                 node = container_of(cache, struct ptr_node, cache);
900                 rec = node->data;
901                 if (mod && rec->refs > 1) {
902                         node->data = clone_inode_rec(rec);
903                         if (IS_ERR(node->data))
904                                 return node->data;
905                         rec->refs--;
906                         rec = node->data;
907                 }
908         } else if (mod) {
909                 rec = calloc(1, sizeof(*rec));
910                 if (!rec)
911                         return ERR_PTR(-ENOMEM);
912                 rec->ino = ino;
913                 rec->extent_start = (u64)-1;
914                 rec->refs = 1;
915                 INIT_LIST_HEAD(&rec->backrefs);
916                 INIT_LIST_HEAD(&rec->orphan_extents);
917                 rec->holes = RB_ROOT;
918
919                 node = malloc(sizeof(*node));
920                 if (!node) {
921                         free(rec);
922                         return ERR_PTR(-ENOMEM);
923                 }
924                 node->cache.start = ino;
925                 node->cache.size = 1;
926                 node->data = rec;
927
928                 if (ino == BTRFS_FREE_INO_OBJECTID)
929                         rec->found_link = 1;
930
931                 ret = insert_cache_extent(inode_cache, &node->cache);
932                 if (ret)
933                         return ERR_PTR(-EEXIST);
934         }
935         return rec;
936 }
937
938 static void free_orphan_data_extents(struct list_head *orphan_extents)
939 {
940         struct orphan_data_extent *orphan;
941
942         while (!list_empty(orphan_extents)) {
943                 orphan = list_entry(orphan_extents->next,
944                                     struct orphan_data_extent, list);
945                 list_del(&orphan->list);
946                 free(orphan);
947         }
948 }
949
950 static void free_inode_rec(struct inode_record *rec)
951 {
952         struct inode_backref *backref;
953
954         if (--rec->refs > 0)
955                 return;
956
957         while (!list_empty(&rec->backrefs)) {
958                 backref = to_inode_backref(rec->backrefs.next);
959                 list_del(&backref->list);
960                 free(backref);
961         }
962         free_orphan_data_extents(&rec->orphan_extents);
963         free_file_extent_holes(&rec->holes);
964         free(rec);
965 }
966
967 static int can_free_inode_rec(struct inode_record *rec)
968 {
969         if (!rec->errors && rec->checked && rec->found_inode_item &&
970             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
971                 return 1;
972         return 0;
973 }
974
975 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
976                                  struct inode_record *rec)
977 {
978         struct cache_extent *cache;
979         struct inode_backref *tmp, *backref;
980         struct ptr_node *node;
981         unsigned char filetype;
982
983         if (!rec->found_inode_item)
984                 return;
985
986         filetype = imode_to_type(rec->imode);
987         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
988                 if (backref->found_dir_item && backref->found_dir_index) {
989                         if (backref->filetype != filetype)
990                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
991                         if (!backref->errors && backref->found_inode_ref &&
992                             rec->nlink == rec->found_link) {
993                                 list_del(&backref->list);
994                                 free(backref);
995                         }
996                 }
997         }
998
999         if (!rec->checked || rec->merging)
1000                 return;
1001
1002         if (S_ISDIR(rec->imode)) {
1003                 if (rec->found_size != rec->isize)
1004                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1005                 if (rec->found_file_extent)
1006                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1007         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1008                 if (rec->found_dir_item)
1009                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1010                 if (rec->found_size != rec->nbytes)
1011                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1012                 if (rec->nlink > 0 && !no_holes &&
1013                     (rec->extent_end < rec->isize ||
1014                      first_extent_gap(&rec->holes) < rec->isize))
1015                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1016         }
1017
1018         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1019                 if (rec->found_csum_item && rec->nodatasum)
1020                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1021                 if (rec->some_csum_missing && !rec->nodatasum)
1022                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1023         }
1024
1025         BUG_ON(rec->refs != 1);
1026         if (can_free_inode_rec(rec)) {
1027                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1028                 node = container_of(cache, struct ptr_node, cache);
1029                 BUG_ON(node->data != rec);
1030                 remove_cache_extent(inode_cache, &node->cache);
1031                 free(node);
1032                 free_inode_rec(rec);
1033         }
1034 }
1035
1036 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1037 {
1038         struct btrfs_path path;
1039         struct btrfs_key key;
1040         int ret;
1041
1042         key.objectid = BTRFS_ORPHAN_OBJECTID;
1043         key.type = BTRFS_ORPHAN_ITEM_KEY;
1044         key.offset = ino;
1045
1046         btrfs_init_path(&path);
1047         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1048         btrfs_release_path(&path);
1049         if (ret > 0)
1050                 ret = -ENOENT;
1051         return ret;
1052 }
1053
1054 static int process_inode_item(struct extent_buffer *eb,
1055                               int slot, struct btrfs_key *key,
1056                               struct shared_node *active_node)
1057 {
1058         struct inode_record *rec;
1059         struct btrfs_inode_item *item;
1060
1061         rec = active_node->current;
1062         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1063         if (rec->found_inode_item) {
1064                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1065                 return 1;
1066         }
1067         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1068         rec->nlink = btrfs_inode_nlink(eb, item);
1069         rec->isize = btrfs_inode_size(eb, item);
1070         rec->nbytes = btrfs_inode_nbytes(eb, item);
1071         rec->imode = btrfs_inode_mode(eb, item);
1072         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1073                 rec->nodatasum = 1;
1074         rec->found_inode_item = 1;
1075         if (rec->nlink == 0)
1076                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1077         maybe_free_inode_rec(&active_node->inode_cache, rec);
1078         return 0;
1079 }
1080
1081 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1082                                                 const char *name,
1083                                                 int namelen, u64 dir)
1084 {
1085         struct inode_backref *backref;
1086
1087         list_for_each_entry(backref, &rec->backrefs, list) {
1088                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1089                         break;
1090                 if (backref->dir != dir || backref->namelen != namelen)
1091                         continue;
1092                 if (memcmp(name, backref->name, namelen))
1093                         continue;
1094                 return backref;
1095         }
1096
1097         backref = malloc(sizeof(*backref) + namelen + 1);
1098         if (!backref)
1099                 return NULL;
1100         memset(backref, 0, sizeof(*backref));
1101         backref->dir = dir;
1102         backref->namelen = namelen;
1103         memcpy(backref->name, name, namelen);
1104         backref->name[namelen] = '\0';
1105         list_add_tail(&backref->list, &rec->backrefs);
1106         return backref;
1107 }
1108
1109 static int add_inode_backref(struct cache_tree *inode_cache,
1110                              u64 ino, u64 dir, u64 index,
1111                              const char *name, int namelen,
1112                              int filetype, int itemtype, int errors)
1113 {
1114         struct inode_record *rec;
1115         struct inode_backref *backref;
1116
1117         rec = get_inode_rec(inode_cache, ino, 1);
1118         BUG_ON(IS_ERR(rec));
1119         backref = get_inode_backref(rec, name, namelen, dir);
1120         BUG_ON(!backref);
1121         if (errors)
1122                 backref->errors |= errors;
1123         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1124                 if (backref->found_dir_index)
1125                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1126                 if (backref->found_inode_ref && backref->index != index)
1127                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1128                 if (backref->found_dir_item && backref->filetype != filetype)
1129                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1130
1131                 backref->index = index;
1132                 backref->filetype = filetype;
1133                 backref->found_dir_index = 1;
1134         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1135                 rec->found_link++;
1136                 if (backref->found_dir_item)
1137                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1138                 if (backref->found_dir_index && backref->filetype != filetype)
1139                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1140
1141                 backref->filetype = filetype;
1142                 backref->found_dir_item = 1;
1143         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1144                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1145                 if (backref->found_inode_ref)
1146                         backref->errors |= REF_ERR_DUP_INODE_REF;
1147                 if (backref->found_dir_index && backref->index != index)
1148                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1149                 else
1150                         backref->index = index;
1151
1152                 backref->ref_type = itemtype;
1153                 backref->found_inode_ref = 1;
1154         } else {
1155                 BUG_ON(1);
1156         }
1157
1158         maybe_free_inode_rec(inode_cache, rec);
1159         return 0;
1160 }
1161
1162 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1163                             struct cache_tree *dst_cache)
1164 {
1165         struct inode_backref *backref;
1166         u32 dir_count = 0;
1167         int ret = 0;
1168
1169         dst->merging = 1;
1170         list_for_each_entry(backref, &src->backrefs, list) {
1171                 if (backref->found_dir_index) {
1172                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1173                                         backref->index, backref->name,
1174                                         backref->namelen, backref->filetype,
1175                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1176                 }
1177                 if (backref->found_dir_item) {
1178                         dir_count++;
1179                         add_inode_backref(dst_cache, dst->ino,
1180                                         backref->dir, 0, backref->name,
1181                                         backref->namelen, backref->filetype,
1182                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1183                 }
1184                 if (backref->found_inode_ref) {
1185                         add_inode_backref(dst_cache, dst->ino,
1186                                         backref->dir, backref->index,
1187                                         backref->name, backref->namelen, 0,
1188                                         backref->ref_type, backref->errors);
1189                 }
1190         }
1191
1192         if (src->found_dir_item)
1193                 dst->found_dir_item = 1;
1194         if (src->found_file_extent)
1195                 dst->found_file_extent = 1;
1196         if (src->found_csum_item)
1197                 dst->found_csum_item = 1;
1198         if (src->some_csum_missing)
1199                 dst->some_csum_missing = 1;
1200         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1201                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1202                 if (ret < 0)
1203                         return ret;
1204         }
1205
1206         BUG_ON(src->found_link < dir_count);
1207         dst->found_link += src->found_link - dir_count;
1208         dst->found_size += src->found_size;
1209         if (src->extent_start != (u64)-1) {
1210                 if (dst->extent_start == (u64)-1) {
1211                         dst->extent_start = src->extent_start;
1212                         dst->extent_end = src->extent_end;
1213                 } else {
1214                         if (dst->extent_end > src->extent_start)
1215                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1216                         else if (dst->extent_end < src->extent_start) {
1217                                 ret = add_file_extent_hole(&dst->holes,
1218                                         dst->extent_end,
1219                                         src->extent_start - dst->extent_end);
1220                         }
1221                         if (dst->extent_end < src->extent_end)
1222                                 dst->extent_end = src->extent_end;
1223                 }
1224         }
1225
1226         dst->errors |= src->errors;
1227         if (src->found_inode_item) {
1228                 if (!dst->found_inode_item) {
1229                         dst->nlink = src->nlink;
1230                         dst->isize = src->isize;
1231                         dst->nbytes = src->nbytes;
1232                         dst->imode = src->imode;
1233                         dst->nodatasum = src->nodatasum;
1234                         dst->found_inode_item = 1;
1235                 } else {
1236                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1237                 }
1238         }
1239         dst->merging = 0;
1240
1241         return 0;
1242 }
1243
1244 static int splice_shared_node(struct shared_node *src_node,
1245                               struct shared_node *dst_node)
1246 {
1247         struct cache_extent *cache;
1248         struct ptr_node *node, *ins;
1249         struct cache_tree *src, *dst;
1250         struct inode_record *rec, *conflict;
1251         u64 current_ino = 0;
1252         int splice = 0;
1253         int ret;
1254
1255         if (--src_node->refs == 0)
1256                 splice = 1;
1257         if (src_node->current)
1258                 current_ino = src_node->current->ino;
1259
1260         src = &src_node->root_cache;
1261         dst = &dst_node->root_cache;
1262 again:
1263         cache = search_cache_extent(src, 0);
1264         while (cache) {
1265                 node = container_of(cache, struct ptr_node, cache);
1266                 rec = node->data;
1267                 cache = next_cache_extent(cache);
1268
1269                 if (splice) {
1270                         remove_cache_extent(src, &node->cache);
1271                         ins = node;
1272                 } else {
1273                         ins = malloc(sizeof(*ins));
1274                         BUG_ON(!ins);
1275                         ins->cache.start = node->cache.start;
1276                         ins->cache.size = node->cache.size;
1277                         ins->data = rec;
1278                         rec->refs++;
1279                 }
1280                 ret = insert_cache_extent(dst, &ins->cache);
1281                 if (ret == -EEXIST) {
1282                         conflict = get_inode_rec(dst, rec->ino, 1);
1283                         BUG_ON(IS_ERR(conflict));
1284                         merge_inode_recs(rec, conflict, dst);
1285                         if (rec->checked) {
1286                                 conflict->checked = 1;
1287                                 if (dst_node->current == conflict)
1288                                         dst_node->current = NULL;
1289                         }
1290                         maybe_free_inode_rec(dst, conflict);
1291                         free_inode_rec(rec);
1292                         free(ins);
1293                 } else {
1294                         BUG_ON(ret);
1295                 }
1296         }
1297
1298         if (src == &src_node->root_cache) {
1299                 src = &src_node->inode_cache;
1300                 dst = &dst_node->inode_cache;
1301                 goto again;
1302         }
1303
1304         if (current_ino > 0 && (!dst_node->current ||
1305             current_ino > dst_node->current->ino)) {
1306                 if (dst_node->current) {
1307                         dst_node->current->checked = 1;
1308                         maybe_free_inode_rec(dst, dst_node->current);
1309                 }
1310                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1311                 BUG_ON(IS_ERR(dst_node->current));
1312         }
1313         return 0;
1314 }
1315
1316 static void free_inode_ptr(struct cache_extent *cache)
1317 {
1318         struct ptr_node *node;
1319         struct inode_record *rec;
1320
1321         node = container_of(cache, struct ptr_node, cache);
1322         rec = node->data;
1323         free_inode_rec(rec);
1324         free(node);
1325 }
1326
1327 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1328
1329 static struct shared_node *find_shared_node(struct cache_tree *shared,
1330                                             u64 bytenr)
1331 {
1332         struct cache_extent *cache;
1333         struct shared_node *node;
1334
1335         cache = lookup_cache_extent(shared, bytenr, 1);
1336         if (cache) {
1337                 node = container_of(cache, struct shared_node, cache);
1338                 return node;
1339         }
1340         return NULL;
1341 }
1342
1343 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1344 {
1345         int ret;
1346         struct shared_node *node;
1347
1348         node = calloc(1, sizeof(*node));
1349         if (!node)
1350                 return -ENOMEM;
1351         node->cache.start = bytenr;
1352         node->cache.size = 1;
1353         cache_tree_init(&node->root_cache);
1354         cache_tree_init(&node->inode_cache);
1355         node->refs = refs;
1356
1357         ret = insert_cache_extent(shared, &node->cache);
1358
1359         return ret;
1360 }
1361
1362 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1363                              struct walk_control *wc, int level)
1364 {
1365         struct shared_node *node;
1366         struct shared_node *dest;
1367         int ret;
1368
1369         if (level == wc->active_node)
1370                 return 0;
1371
1372         BUG_ON(wc->active_node <= level);
1373         node = find_shared_node(&wc->shared, bytenr);
1374         if (!node) {
1375                 ret = add_shared_node(&wc->shared, bytenr, refs);
1376                 BUG_ON(ret);
1377                 node = find_shared_node(&wc->shared, bytenr);
1378                 wc->nodes[level] = node;
1379                 wc->active_node = level;
1380                 return 0;
1381         }
1382
1383         if (wc->root_level == wc->active_node &&
1384             btrfs_root_refs(&root->root_item) == 0) {
1385                 if (--node->refs == 0) {
1386                         free_inode_recs_tree(&node->root_cache);
1387                         free_inode_recs_tree(&node->inode_cache);
1388                         remove_cache_extent(&wc->shared, &node->cache);
1389                         free(node);
1390                 }
1391                 return 1;
1392         }
1393
1394         dest = wc->nodes[wc->active_node];
1395         splice_shared_node(node, dest);
1396         if (node->refs == 0) {
1397                 remove_cache_extent(&wc->shared, &node->cache);
1398                 free(node);
1399         }
1400         return 1;
1401 }
1402
1403 static int leave_shared_node(struct btrfs_root *root,
1404                              struct walk_control *wc, int level)
1405 {
1406         struct shared_node *node;
1407         struct shared_node *dest;
1408         int i;
1409
1410         if (level == wc->root_level)
1411                 return 0;
1412
1413         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1414                 if (wc->nodes[i])
1415                         break;
1416         }
1417         BUG_ON(i >= BTRFS_MAX_LEVEL);
1418
1419         node = wc->nodes[wc->active_node];
1420         wc->nodes[wc->active_node] = NULL;
1421         wc->active_node = i;
1422
1423         dest = wc->nodes[wc->active_node];
1424         if (wc->active_node < wc->root_level ||
1425             btrfs_root_refs(&root->root_item) > 0) {
1426                 BUG_ON(node->refs <= 1);
1427                 splice_shared_node(node, dest);
1428         } else {
1429                 BUG_ON(node->refs < 2);
1430                 node->refs--;
1431         }
1432         return 0;
1433 }
1434
1435 /*
1436  * Returns:
1437  * < 0 - on error
1438  * 1   - if the root with id child_root_id is a child of root parent_root_id
1439  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1440  *       has other root(s) as parent(s)
1441  * 2   - if the root child_root_id doesn't have any parent roots
1442  */
1443 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1444                          u64 child_root_id)
1445 {
1446         struct btrfs_path path;
1447         struct btrfs_key key;
1448         struct extent_buffer *leaf;
1449         int has_parent = 0;
1450         int ret;
1451
1452         btrfs_init_path(&path);
1453
1454         key.objectid = parent_root_id;
1455         key.type = BTRFS_ROOT_REF_KEY;
1456         key.offset = child_root_id;
1457         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1458                                 0, 0);
1459         if (ret < 0)
1460                 return ret;
1461         btrfs_release_path(&path);
1462         if (!ret)
1463                 return 1;
1464
1465         key.objectid = child_root_id;
1466         key.type = BTRFS_ROOT_BACKREF_KEY;
1467         key.offset = 0;
1468         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1469                                 0, 0);
1470         if (ret < 0)
1471                 goto out;
1472
1473         while (1) {
1474                 leaf = path.nodes[0];
1475                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1476                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1477                         if (ret)
1478                                 break;
1479                         leaf = path.nodes[0];
1480                 }
1481
1482                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1483                 if (key.objectid != child_root_id ||
1484                     key.type != BTRFS_ROOT_BACKREF_KEY)
1485                         break;
1486
1487                 has_parent = 1;
1488
1489                 if (key.offset == parent_root_id) {
1490                         btrfs_release_path(&path);
1491                         return 1;
1492                 }
1493
1494                 path.slots[0]++;
1495         }
1496 out:
1497         btrfs_release_path(&path);
1498         if (ret < 0)
1499                 return ret;
1500         return has_parent ? 0 : 2;
1501 }
1502
1503 static int process_dir_item(struct btrfs_root *root,
1504                             struct extent_buffer *eb,
1505                             int slot, struct btrfs_key *key,
1506                             struct shared_node *active_node)
1507 {
1508         u32 total;
1509         u32 cur = 0;
1510         u32 len;
1511         u32 name_len;
1512         u32 data_len;
1513         int error;
1514         int nritems = 0;
1515         int filetype;
1516         struct btrfs_dir_item *di;
1517         struct inode_record *rec;
1518         struct cache_tree *root_cache;
1519         struct cache_tree *inode_cache;
1520         struct btrfs_key location;
1521         char namebuf[BTRFS_NAME_LEN];
1522
1523         root_cache = &active_node->root_cache;
1524         inode_cache = &active_node->inode_cache;
1525         rec = active_node->current;
1526         rec->found_dir_item = 1;
1527
1528         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1529         total = btrfs_item_size_nr(eb, slot);
1530         while (cur < total) {
1531                 nritems++;
1532                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1533                 name_len = btrfs_dir_name_len(eb, di);
1534                 data_len = btrfs_dir_data_len(eb, di);
1535                 filetype = btrfs_dir_type(eb, di);
1536
1537                 rec->found_size += name_len;
1538                 if (name_len <= BTRFS_NAME_LEN) {
1539                         len = name_len;
1540                         error = 0;
1541                 } else {
1542                         len = BTRFS_NAME_LEN;
1543                         error = REF_ERR_NAME_TOO_LONG;
1544                 }
1545                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1546
1547                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1548                         add_inode_backref(inode_cache, location.objectid,
1549                                           key->objectid, key->offset, namebuf,
1550                                           len, filetype, key->type, error);
1551                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1552                         add_inode_backref(root_cache, location.objectid,
1553                                           key->objectid, key->offset,
1554                                           namebuf, len, filetype,
1555                                           key->type, error);
1556                 } else {
1557                         fprintf(stderr, "invalid location in dir item %u\n",
1558                                 location.type);
1559                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1560                                           key->objectid, key->offset, namebuf,
1561                                           len, filetype, key->type, error);
1562                 }
1563
1564                 len = sizeof(*di) + name_len + data_len;
1565                 di = (struct btrfs_dir_item *)((char *)di + len);
1566                 cur += len;
1567         }
1568         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1569                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1570
1571         return 0;
1572 }
1573
1574 static int process_inode_ref(struct extent_buffer *eb,
1575                              int slot, struct btrfs_key *key,
1576                              struct shared_node *active_node)
1577 {
1578         u32 total;
1579         u32 cur = 0;
1580         u32 len;
1581         u32 name_len;
1582         u64 index;
1583         int error;
1584         struct cache_tree *inode_cache;
1585         struct btrfs_inode_ref *ref;
1586         char namebuf[BTRFS_NAME_LEN];
1587
1588         inode_cache = &active_node->inode_cache;
1589
1590         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1591         total = btrfs_item_size_nr(eb, slot);
1592         while (cur < total) {
1593                 name_len = btrfs_inode_ref_name_len(eb, ref);
1594                 index = btrfs_inode_ref_index(eb, ref);
1595                 if (name_len <= BTRFS_NAME_LEN) {
1596                         len = name_len;
1597                         error = 0;
1598                 } else {
1599                         len = BTRFS_NAME_LEN;
1600                         error = REF_ERR_NAME_TOO_LONG;
1601                 }
1602                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1603                 add_inode_backref(inode_cache, key->objectid, key->offset,
1604                                   index, namebuf, len, 0, key->type, error);
1605
1606                 len = sizeof(*ref) + name_len;
1607                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1608                 cur += len;
1609         }
1610         return 0;
1611 }
1612
1613 static int process_inode_extref(struct extent_buffer *eb,
1614                                 int slot, struct btrfs_key *key,
1615                                 struct shared_node *active_node)
1616 {
1617         u32 total;
1618         u32 cur = 0;
1619         u32 len;
1620         u32 name_len;
1621         u64 index;
1622         u64 parent;
1623         int error;
1624         struct cache_tree *inode_cache;
1625         struct btrfs_inode_extref *extref;
1626         char namebuf[BTRFS_NAME_LEN];
1627
1628         inode_cache = &active_node->inode_cache;
1629
1630         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1631         total = btrfs_item_size_nr(eb, slot);
1632         while (cur < total) {
1633                 name_len = btrfs_inode_extref_name_len(eb, extref);
1634                 index = btrfs_inode_extref_index(eb, extref);
1635                 parent = btrfs_inode_extref_parent(eb, extref);
1636                 if (name_len <= BTRFS_NAME_LEN) {
1637                         len = name_len;
1638                         error = 0;
1639                 } else {
1640                         len = BTRFS_NAME_LEN;
1641                         error = REF_ERR_NAME_TOO_LONG;
1642                 }
1643                 read_extent_buffer(eb, namebuf,
1644                                    (unsigned long)(extref + 1), len);
1645                 add_inode_backref(inode_cache, key->objectid, parent,
1646                                   index, namebuf, len, 0, key->type, error);
1647
1648                 len = sizeof(*extref) + name_len;
1649                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1650                 cur += len;
1651         }
1652         return 0;
1653
1654 }
1655
1656 static int count_csum_range(struct btrfs_root *root, u64 start,
1657                             u64 len, u64 *found)
1658 {
1659         struct btrfs_key key;
1660         struct btrfs_path path;
1661         struct extent_buffer *leaf;
1662         int ret;
1663         size_t size;
1664         *found = 0;
1665         u64 csum_end;
1666         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1667
1668         btrfs_init_path(&path);
1669
1670         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1671         key.offset = start;
1672         key.type = BTRFS_EXTENT_CSUM_KEY;
1673
1674         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1675                                 &key, &path, 0, 0);
1676         if (ret < 0)
1677                 goto out;
1678         if (ret > 0 && path.slots[0] > 0) {
1679                 leaf = path.nodes[0];
1680                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1681                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1682                     key.type == BTRFS_EXTENT_CSUM_KEY)
1683                         path.slots[0]--;
1684         }
1685
1686         while (len > 0) {
1687                 leaf = path.nodes[0];
1688                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1689                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1690                         if (ret > 0)
1691                                 break;
1692                         else if (ret < 0)
1693                                 goto out;
1694                         leaf = path.nodes[0];
1695                 }
1696
1697                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1698                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1699                     key.type != BTRFS_EXTENT_CSUM_KEY)
1700                         break;
1701
1702                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1703                 if (key.offset >= start + len)
1704                         break;
1705
1706                 if (key.offset > start)
1707                         start = key.offset;
1708
1709                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1710                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1711                 if (csum_end > start) {
1712                         size = min(csum_end - start, len);
1713                         len -= size;
1714                         start += size;
1715                         *found += size;
1716                 }
1717
1718                 path.slots[0]++;
1719         }
1720 out:
1721         btrfs_release_path(&path);
1722         if (ret < 0)
1723                 return ret;
1724         return 0;
1725 }
1726
1727 static int process_file_extent(struct btrfs_root *root,
1728                                 struct extent_buffer *eb,
1729                                 int slot, struct btrfs_key *key,
1730                                 struct shared_node *active_node)
1731 {
1732         struct inode_record *rec;
1733         struct btrfs_file_extent_item *fi;
1734         u64 num_bytes = 0;
1735         u64 disk_bytenr = 0;
1736         u64 extent_offset = 0;
1737         u64 mask = root->sectorsize - 1;
1738         int extent_type;
1739         int ret;
1740
1741         rec = active_node->current;
1742         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1743         rec->found_file_extent = 1;
1744
1745         if (rec->extent_start == (u64)-1) {
1746                 rec->extent_start = key->offset;
1747                 rec->extent_end = key->offset;
1748         }
1749
1750         if (rec->extent_end > key->offset)
1751                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1752         else if (rec->extent_end < key->offset) {
1753                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1754                                            key->offset - rec->extent_end);
1755                 if (ret < 0)
1756                         return ret;
1757         }
1758
1759         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1760         extent_type = btrfs_file_extent_type(eb, fi);
1761
1762         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1763                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1764                 if (num_bytes == 0)
1765                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1766                 rec->found_size += num_bytes;
1767                 num_bytes = (num_bytes + mask) & ~mask;
1768         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1769                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1770                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1771                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1772                 extent_offset = btrfs_file_extent_offset(eb, fi);
1773                 if (num_bytes == 0 || (num_bytes & mask))
1774                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1775                 if (num_bytes + extent_offset >
1776                     btrfs_file_extent_ram_bytes(eb, fi))
1777                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1778                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1779                     (btrfs_file_extent_compression(eb, fi) ||
1780                      btrfs_file_extent_encryption(eb, fi) ||
1781                      btrfs_file_extent_other_encoding(eb, fi)))
1782                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1783                 if (disk_bytenr > 0)
1784                         rec->found_size += num_bytes;
1785         } else {
1786                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1787         }
1788         rec->extent_end = key->offset + num_bytes;
1789
1790         /*
1791          * The data reloc tree will copy full extents into its inode and then
1792          * copy the corresponding csums.  Because the extent it copied could be
1793          * a preallocated extent that hasn't been written to yet there may be no
1794          * csums to copy, ergo we won't have csums for our file extent.  This is
1795          * ok so just don't bother checking csums if the inode belongs to the
1796          * data reloc tree.
1797          */
1798         if (disk_bytenr > 0 &&
1799             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1800                 u64 found;
1801                 if (btrfs_file_extent_compression(eb, fi))
1802                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1803                 else
1804                         disk_bytenr += extent_offset;
1805
1806                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1807                 if (ret < 0)
1808                         return ret;
1809                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1810                         if (found > 0)
1811                                 rec->found_csum_item = 1;
1812                         if (found < num_bytes)
1813                                 rec->some_csum_missing = 1;
1814                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1815                         if (found > 0)
1816                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1817                 }
1818         }
1819         return 0;
1820 }
1821
1822 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1823                             struct walk_control *wc)
1824 {
1825         struct btrfs_key key;
1826         u32 nritems;
1827         int i;
1828         int ret = 0;
1829         struct cache_tree *inode_cache;
1830         struct shared_node *active_node;
1831
1832         if (wc->root_level == wc->active_node &&
1833             btrfs_root_refs(&root->root_item) == 0)
1834                 return 0;
1835
1836         active_node = wc->nodes[wc->active_node];
1837         inode_cache = &active_node->inode_cache;
1838         nritems = btrfs_header_nritems(eb);
1839         for (i = 0; i < nritems; i++) {
1840                 btrfs_item_key_to_cpu(eb, &key, i);
1841
1842                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1843                         continue;
1844                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1845                         continue;
1846
1847                 if (active_node->current == NULL ||
1848                     active_node->current->ino < key.objectid) {
1849                         if (active_node->current) {
1850                                 active_node->current->checked = 1;
1851                                 maybe_free_inode_rec(inode_cache,
1852                                                      active_node->current);
1853                         }
1854                         active_node->current = get_inode_rec(inode_cache,
1855                                                              key.objectid, 1);
1856                         BUG_ON(IS_ERR(active_node->current));
1857                 }
1858                 switch (key.type) {
1859                 case BTRFS_DIR_ITEM_KEY:
1860                 case BTRFS_DIR_INDEX_KEY:
1861                         ret = process_dir_item(root, eb, i, &key, active_node);
1862                         break;
1863                 case BTRFS_INODE_REF_KEY:
1864                         ret = process_inode_ref(eb, i, &key, active_node);
1865                         break;
1866                 case BTRFS_INODE_EXTREF_KEY:
1867                         ret = process_inode_extref(eb, i, &key, active_node);
1868                         break;
1869                 case BTRFS_INODE_ITEM_KEY:
1870                         ret = process_inode_item(eb, i, &key, active_node);
1871                         break;
1872                 case BTRFS_EXTENT_DATA_KEY:
1873                         ret = process_file_extent(root, eb, i, &key,
1874                                                   active_node);
1875                         break;
1876                 default:
1877                         break;
1878                 };
1879         }
1880         return ret;
1881 }
1882
1883 static void reada_walk_down(struct btrfs_root *root,
1884                             struct extent_buffer *node, int slot)
1885 {
1886         u64 bytenr;
1887         u64 ptr_gen;
1888         u32 nritems;
1889         u32 blocksize;
1890         int i;
1891         int level;
1892
1893         level = btrfs_header_level(node);
1894         if (level != 1)
1895                 return;
1896
1897         nritems = btrfs_header_nritems(node);
1898         blocksize = root->nodesize;
1899         for (i = slot; i < nritems; i++) {
1900                 bytenr = btrfs_node_blockptr(node, i);
1901                 ptr_gen = btrfs_node_ptr_generation(node, i);
1902                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1903         }
1904 }
1905
1906 /*
1907  * Check the child node/leaf by the following condition:
1908  * 1. the first item key of the node/leaf should be the same with the one
1909  *    in parent.
1910  * 2. block in parent node should match the child node/leaf.
1911  * 3. generation of parent node and child's header should be consistent.
1912  *
1913  * Or the child node/leaf pointed by the key in parent is not valid.
1914  *
1915  * We hope to check leaf owner too, but since subvol may share leaves,
1916  * which makes leaf owner check not so strong, key check should be
1917  * sufficient enough for that case.
1918  */
1919 static int check_child_node(struct btrfs_root *root,
1920                             struct extent_buffer *parent, int slot,
1921                             struct extent_buffer *child)
1922 {
1923         struct btrfs_key parent_key;
1924         struct btrfs_key child_key;
1925         int ret = 0;
1926
1927         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1928         if (btrfs_header_level(child) == 0)
1929                 btrfs_item_key_to_cpu(child, &child_key, 0);
1930         else
1931                 btrfs_node_key_to_cpu(child, &child_key, 0);
1932
1933         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1934                 ret = -EINVAL;
1935                 fprintf(stderr,
1936                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1937                         parent_key.objectid, parent_key.type, parent_key.offset,
1938                         child_key.objectid, child_key.type, child_key.offset);
1939         }
1940         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1941                 ret = -EINVAL;
1942                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1943                         btrfs_node_blockptr(parent, slot),
1944                         btrfs_header_bytenr(child));
1945         }
1946         if (btrfs_node_ptr_generation(parent, slot) !=
1947             btrfs_header_generation(child)) {
1948                 ret = -EINVAL;
1949                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1950                         btrfs_header_generation(child),
1951                         btrfs_node_ptr_generation(parent, slot));
1952         }
1953         return ret;
1954 }
1955
1956 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1957                           struct walk_control *wc, int *level)
1958 {
1959         enum btrfs_tree_block_status status;
1960         u64 bytenr;
1961         u64 ptr_gen;
1962         struct extent_buffer *next;
1963         struct extent_buffer *cur;
1964         u32 blocksize;
1965         int ret, err = 0;
1966         u64 refs;
1967
1968         WARN_ON(*level < 0);
1969         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1970         ret = btrfs_lookup_extent_info(NULL, root,
1971                                        path->nodes[*level]->start,
1972                                        *level, 1, &refs, NULL);
1973         if (ret < 0) {
1974                 err = ret;
1975                 goto out;
1976         }
1977
1978         if (refs > 1) {
1979                 ret = enter_shared_node(root, path->nodes[*level]->start,
1980                                         refs, wc, *level);
1981                 if (ret > 0) {
1982                         err = ret;
1983                         goto out;
1984                 }
1985         }
1986
1987         while (*level >= 0) {
1988                 WARN_ON(*level < 0);
1989                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1990                 cur = path->nodes[*level];
1991
1992                 if (btrfs_header_level(cur) != *level)
1993                         WARN_ON(1);
1994
1995                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1996                         break;
1997                 if (*level == 0) {
1998                         ret = process_one_leaf(root, cur, wc);
1999                         if (ret < 0)
2000                                 err = ret;
2001                         break;
2002                 }
2003                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2004                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2005                 blocksize = root->nodesize;
2006                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
2007                                                1, &refs, NULL);
2008                 if (ret < 0)
2009                         refs = 0;
2010
2011                 if (refs > 1) {
2012                         ret = enter_shared_node(root, bytenr, refs,
2013                                                 wc, *level - 1);
2014                         if (ret > 0) {
2015                                 path->slots[*level]++;
2016                                 continue;
2017                         }
2018                 }
2019
2020                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2021                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2022                         free_extent_buffer(next);
2023                         reada_walk_down(root, cur, path->slots[*level]);
2024                         next = read_tree_block(root, bytenr, blocksize,
2025                                                ptr_gen);
2026                         if (!extent_buffer_uptodate(next)) {
2027                                 struct btrfs_key node_key;
2028
2029                                 btrfs_node_key_to_cpu(path->nodes[*level],
2030                                                       &node_key,
2031                                                       path->slots[*level]);
2032                                 btrfs_add_corrupt_extent_record(root->fs_info,
2033                                                 &node_key,
2034                                                 path->nodes[*level]->start,
2035                                                 root->nodesize, *level);
2036                                 err = -EIO;
2037                                 goto out;
2038                         }
2039                 }
2040
2041                 ret = check_child_node(root, cur, path->slots[*level], next);
2042                 if (ret) {
2043                         err = ret;
2044                         goto out;
2045                 }
2046
2047                 if (btrfs_is_leaf(next))
2048                         status = btrfs_check_leaf(root, NULL, next);
2049                 else
2050                         status = btrfs_check_node(root, NULL, next);
2051                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2052                         free_extent_buffer(next);
2053                         err = -EIO;
2054                         goto out;
2055                 }
2056
2057                 *level = *level - 1;
2058                 free_extent_buffer(path->nodes[*level]);
2059                 path->nodes[*level] = next;
2060                 path->slots[*level] = 0;
2061         }
2062 out:
2063         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2064         return err;
2065 }
2066
2067 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2068                         struct walk_control *wc, int *level)
2069 {
2070         int i;
2071         struct extent_buffer *leaf;
2072
2073         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2074                 leaf = path->nodes[i];
2075                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2076                         path->slots[i]++;
2077                         *level = i;
2078                         return 0;
2079                 } else {
2080                         free_extent_buffer(path->nodes[*level]);
2081                         path->nodes[*level] = NULL;
2082                         BUG_ON(*level > wc->active_node);
2083                         if (*level == wc->active_node)
2084                                 leave_shared_node(root, wc, *level);
2085                         *level = i + 1;
2086                 }
2087         }
2088         return 1;
2089 }
2090
2091 static int check_root_dir(struct inode_record *rec)
2092 {
2093         struct inode_backref *backref;
2094         int ret = -1;
2095
2096         if (!rec->found_inode_item || rec->errors)
2097                 goto out;
2098         if (rec->nlink != 1 || rec->found_link != 0)
2099                 goto out;
2100         if (list_empty(&rec->backrefs))
2101                 goto out;
2102         backref = to_inode_backref(rec->backrefs.next);
2103         if (!backref->found_inode_ref)
2104                 goto out;
2105         if (backref->index != 0 || backref->namelen != 2 ||
2106             memcmp(backref->name, "..", 2))
2107                 goto out;
2108         if (backref->found_dir_index || backref->found_dir_item)
2109                 goto out;
2110         ret = 0;
2111 out:
2112         return ret;
2113 }
2114
2115 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2116                               struct btrfs_root *root, struct btrfs_path *path,
2117                               struct inode_record *rec)
2118 {
2119         struct btrfs_inode_item *ei;
2120         struct btrfs_key key;
2121         int ret;
2122
2123         key.objectid = rec->ino;
2124         key.type = BTRFS_INODE_ITEM_KEY;
2125         key.offset = (u64)-1;
2126
2127         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2128         if (ret < 0)
2129                 goto out;
2130         if (ret) {
2131                 if (!path->slots[0]) {
2132                         ret = -ENOENT;
2133                         goto out;
2134                 }
2135                 path->slots[0]--;
2136                 ret = 0;
2137         }
2138         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2139         if (key.objectid != rec->ino) {
2140                 ret = -ENOENT;
2141                 goto out;
2142         }
2143
2144         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2145                             struct btrfs_inode_item);
2146         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2147         btrfs_mark_buffer_dirty(path->nodes[0]);
2148         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2149         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2150                root->root_key.objectid);
2151 out:
2152         btrfs_release_path(path);
2153         return ret;
2154 }
2155
2156 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2157                                     struct btrfs_root *root,
2158                                     struct btrfs_path *path,
2159                                     struct inode_record *rec)
2160 {
2161         int ret;
2162
2163         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2164         btrfs_release_path(path);
2165         if (!ret)
2166                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2167         return ret;
2168 }
2169
2170 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2171                                struct btrfs_root *root,
2172                                struct btrfs_path *path,
2173                                struct inode_record *rec)
2174 {
2175         struct btrfs_inode_item *ei;
2176         struct btrfs_key key;
2177         int ret = 0;
2178
2179         key.objectid = rec->ino;
2180         key.type = BTRFS_INODE_ITEM_KEY;
2181         key.offset = 0;
2182
2183         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2184         if (ret) {
2185                 if (ret > 0)
2186                         ret = -ENOENT;
2187                 goto out;
2188         }
2189
2190         /* Since ret == 0, no need to check anything */
2191         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2192                             struct btrfs_inode_item);
2193         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2194         btrfs_mark_buffer_dirty(path->nodes[0]);
2195         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2196         printf("reset nbytes for ino %llu root %llu\n",
2197                rec->ino, root->root_key.objectid);
2198 out:
2199         btrfs_release_path(path);
2200         return ret;
2201 }
2202
2203 static int add_missing_dir_index(struct btrfs_root *root,
2204                                  struct cache_tree *inode_cache,
2205                                  struct inode_record *rec,
2206                                  struct inode_backref *backref)
2207 {
2208         struct btrfs_path *path;
2209         struct btrfs_trans_handle *trans;
2210         struct btrfs_dir_item *dir_item;
2211         struct extent_buffer *leaf;
2212         struct btrfs_key key;
2213         struct btrfs_disk_key disk_key;
2214         struct inode_record *dir_rec;
2215         unsigned long name_ptr;
2216         u32 data_size = sizeof(*dir_item) + backref->namelen;
2217         int ret;
2218
2219         path = btrfs_alloc_path();
2220         if (!path)
2221                 return -ENOMEM;
2222
2223         trans = btrfs_start_transaction(root, 1);
2224         if (IS_ERR(trans)) {
2225                 btrfs_free_path(path);
2226                 return PTR_ERR(trans);
2227         }
2228
2229         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2230                 (unsigned long long)rec->ino);
2231         key.objectid = backref->dir;
2232         key.type = BTRFS_DIR_INDEX_KEY;
2233         key.offset = backref->index;
2234
2235         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2236         BUG_ON(ret);
2237
2238         leaf = path->nodes[0];
2239         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2240
2241         disk_key.objectid = cpu_to_le64(rec->ino);
2242         disk_key.type = BTRFS_INODE_ITEM_KEY;
2243         disk_key.offset = 0;
2244
2245         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2246         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2247         btrfs_set_dir_data_len(leaf, dir_item, 0);
2248         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2249         name_ptr = (unsigned long)(dir_item + 1);
2250         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2251         btrfs_mark_buffer_dirty(leaf);
2252         btrfs_free_path(path);
2253         btrfs_commit_transaction(trans, root);
2254
2255         backref->found_dir_index = 1;
2256         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2257         BUG_ON(IS_ERR(dir_rec));
2258         if (!dir_rec)
2259                 return 0;
2260         dir_rec->found_size += backref->namelen;
2261         if (dir_rec->found_size == dir_rec->isize &&
2262             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2263                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2264         if (dir_rec->found_size != dir_rec->isize)
2265                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2266
2267         return 0;
2268 }
2269
2270 static int delete_dir_index(struct btrfs_root *root,
2271                             struct cache_tree *inode_cache,
2272                             struct inode_record *rec,
2273                             struct inode_backref *backref)
2274 {
2275         struct btrfs_trans_handle *trans;
2276         struct btrfs_dir_item *di;
2277         struct btrfs_path *path;
2278         int ret = 0;
2279
2280         path = btrfs_alloc_path();
2281         if (!path)
2282                 return -ENOMEM;
2283
2284         trans = btrfs_start_transaction(root, 1);
2285         if (IS_ERR(trans)) {
2286                 btrfs_free_path(path);
2287                 return PTR_ERR(trans);
2288         }
2289
2290
2291         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2292                 (unsigned long long)backref->dir,
2293                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2294                 (unsigned long long)root->objectid);
2295
2296         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2297                                     backref->name, backref->namelen,
2298                                     backref->index, -1);
2299         if (IS_ERR(di)) {
2300                 ret = PTR_ERR(di);
2301                 btrfs_free_path(path);
2302                 btrfs_commit_transaction(trans, root);
2303                 if (ret == -ENOENT)
2304                         return 0;
2305                 return ret;
2306         }
2307
2308         if (!di)
2309                 ret = btrfs_del_item(trans, root, path);
2310         else
2311                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2312         BUG_ON(ret);
2313         btrfs_free_path(path);
2314         btrfs_commit_transaction(trans, root);
2315         return ret;
2316 }
2317
2318 static int create_inode_item(struct btrfs_root *root,
2319                              struct inode_record *rec,
2320                              struct inode_backref *backref, int root_dir)
2321 {
2322         struct btrfs_trans_handle *trans;
2323         struct btrfs_inode_item inode_item;
2324         time_t now = time(NULL);
2325         int ret;
2326
2327         trans = btrfs_start_transaction(root, 1);
2328         if (IS_ERR(trans)) {
2329                 ret = PTR_ERR(trans);
2330                 return ret;
2331         }
2332
2333         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2334                 "be incomplete, please check permissions and content after "
2335                 "the fsck completes.\n", (unsigned long long)root->objectid,
2336                 (unsigned long long)rec->ino);
2337
2338         memset(&inode_item, 0, sizeof(inode_item));
2339         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2340         if (root_dir)
2341                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2342         else
2343                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2344         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2345         if (rec->found_dir_item) {
2346                 if (rec->found_file_extent)
2347                         fprintf(stderr, "root %llu inode %llu has both a dir "
2348                                 "item and extents, unsure if it is a dir or a "
2349                                 "regular file so setting it as a directory\n",
2350                                 (unsigned long long)root->objectid,
2351                                 (unsigned long long)rec->ino);
2352                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2353                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2354         } else if (!rec->found_dir_item) {
2355                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2356                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2357         }
2358         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2359         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2360         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2361         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2362         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2363         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2364         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2365         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2366
2367         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2368         BUG_ON(ret);
2369         btrfs_commit_transaction(trans, root);
2370         return 0;
2371 }
2372
2373 static int repair_inode_backrefs(struct btrfs_root *root,
2374                                  struct inode_record *rec,
2375                                  struct cache_tree *inode_cache,
2376                                  int delete)
2377 {
2378         struct inode_backref *tmp, *backref;
2379         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2380         int ret = 0;
2381         int repaired = 0;
2382
2383         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2384                 if (!delete && rec->ino == root_dirid) {
2385                         if (!rec->found_inode_item) {
2386                                 ret = create_inode_item(root, rec, backref, 1);
2387                                 if (ret)
2388                                         break;
2389                                 repaired++;
2390                         }
2391                 }
2392
2393                 /* Index 0 for root dir's are special, don't mess with it */
2394                 if (rec->ino == root_dirid && backref->index == 0)
2395                         continue;
2396
2397                 if (delete &&
2398                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2399                      (backref->found_dir_index && backref->found_inode_ref &&
2400                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2401                         ret = delete_dir_index(root, inode_cache, rec, backref);
2402                         if (ret)
2403                                 break;
2404                         repaired++;
2405                         list_del(&backref->list);
2406                         free(backref);
2407                 }
2408
2409                 if (!delete && !backref->found_dir_index &&
2410                     backref->found_dir_item && backref->found_inode_ref) {
2411                         ret = add_missing_dir_index(root, inode_cache, rec,
2412                                                     backref);
2413                         if (ret)
2414                                 break;
2415                         repaired++;
2416                         if (backref->found_dir_item &&
2417                             backref->found_dir_index &&
2418                             backref->found_dir_index) {
2419                                 if (!backref->errors &&
2420                                     backref->found_inode_ref) {
2421                                         list_del(&backref->list);
2422                                         free(backref);
2423                                 }
2424                         }
2425                 }
2426
2427                 if (!delete && (!backref->found_dir_index &&
2428                                 !backref->found_dir_item &&
2429                                 backref->found_inode_ref)) {
2430                         struct btrfs_trans_handle *trans;
2431                         struct btrfs_key location;
2432
2433                         ret = check_dir_conflict(root, backref->name,
2434                                                  backref->namelen,
2435                                                  backref->dir,
2436                                                  backref->index);
2437                         if (ret) {
2438                                 /*
2439                                  * let nlink fixing routine to handle it,
2440                                  * which can do it better.
2441                                  */
2442                                 ret = 0;
2443                                 break;
2444                         }
2445                         location.objectid = rec->ino;
2446                         location.type = BTRFS_INODE_ITEM_KEY;
2447                         location.offset = 0;
2448
2449                         trans = btrfs_start_transaction(root, 1);
2450                         if (IS_ERR(trans)) {
2451                                 ret = PTR_ERR(trans);
2452                                 break;
2453                         }
2454                         fprintf(stderr, "adding missing dir index/item pair "
2455                                 "for inode %llu\n",
2456                                 (unsigned long long)rec->ino);
2457                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2458                                                     backref->namelen,
2459                                                     backref->dir, &location,
2460                                                     imode_to_type(rec->imode),
2461                                                     backref->index);
2462                         BUG_ON(ret);
2463                         btrfs_commit_transaction(trans, root);
2464                         repaired++;
2465                 }
2466
2467                 if (!delete && (backref->found_inode_ref &&
2468                                 backref->found_dir_index &&
2469                                 backref->found_dir_item &&
2470                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2471                                 !rec->found_inode_item)) {
2472                         ret = create_inode_item(root, rec, backref, 0);
2473                         if (ret)
2474                                 break;
2475                         repaired++;
2476                 }
2477
2478         }
2479         return ret ? ret : repaired;
2480 }
2481
2482 /*
2483  * To determine the file type for nlink/inode_item repair
2484  *
2485  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2486  * Return -ENOENT if file type is not found.
2487  */
2488 static int find_file_type(struct inode_record *rec, u8 *type)
2489 {
2490         struct inode_backref *backref;
2491
2492         /* For inode item recovered case */
2493         if (rec->found_inode_item) {
2494                 *type = imode_to_type(rec->imode);
2495                 return 0;
2496         }
2497
2498         list_for_each_entry(backref, &rec->backrefs, list) {
2499                 if (backref->found_dir_index || backref->found_dir_item) {
2500                         *type = backref->filetype;
2501                         return 0;
2502                 }
2503         }
2504         return -ENOENT;
2505 }
2506
2507 /*
2508  * To determine the file name for nlink repair
2509  *
2510  * Return 0 if file name is found, set name and namelen.
2511  * Return -ENOENT if file name is not found.
2512  */
2513 static int find_file_name(struct inode_record *rec,
2514                           char *name, int *namelen)
2515 {
2516         struct inode_backref *backref;
2517
2518         list_for_each_entry(backref, &rec->backrefs, list) {
2519                 if (backref->found_dir_index || backref->found_dir_item ||
2520                     backref->found_inode_ref) {
2521                         memcpy(name, backref->name, backref->namelen);
2522                         *namelen = backref->namelen;
2523                         return 0;
2524                 }
2525         }
2526         return -ENOENT;
2527 }
2528
2529 /* Reset the nlink of the inode to the correct one */
2530 static int reset_nlink(struct btrfs_trans_handle *trans,
2531                        struct btrfs_root *root,
2532                        struct btrfs_path *path,
2533                        struct inode_record *rec)
2534 {
2535         struct inode_backref *backref;
2536         struct inode_backref *tmp;
2537         struct btrfs_key key;
2538         struct btrfs_inode_item *inode_item;
2539         int ret = 0;
2540
2541         /* We don't believe this either, reset it and iterate backref */
2542         rec->found_link = 0;
2543
2544         /* Remove all backref including the valid ones */
2545         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2546                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2547                                    backref->index, backref->name,
2548                                    backref->namelen, 0);
2549                 if (ret < 0)
2550                         goto out;
2551
2552                 /* remove invalid backref, so it won't be added back */
2553                 if (!(backref->found_dir_index &&
2554                       backref->found_dir_item &&
2555                       backref->found_inode_ref)) {
2556                         list_del(&backref->list);
2557                         free(backref);
2558                 } else {
2559                         rec->found_link++;
2560                 }
2561         }
2562
2563         /* Set nlink to 0 */
2564         key.objectid = rec->ino;
2565         key.type = BTRFS_INODE_ITEM_KEY;
2566         key.offset = 0;
2567         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2568         if (ret < 0)
2569                 goto out;
2570         if (ret > 0) {
2571                 ret = -ENOENT;
2572                 goto out;
2573         }
2574         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2575                                     struct btrfs_inode_item);
2576         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2577         btrfs_mark_buffer_dirty(path->nodes[0]);
2578         btrfs_release_path(path);
2579
2580         /*
2581          * Add back valid inode_ref/dir_item/dir_index,
2582          * add_link() will handle the nlink inc, so new nlink must be correct
2583          */
2584         list_for_each_entry(backref, &rec->backrefs, list) {
2585                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2586                                      backref->name, backref->namelen,
2587                                      backref->filetype, &backref->index, 1);
2588                 if (ret < 0)
2589                         goto out;
2590         }
2591 out:
2592         btrfs_release_path(path);
2593         return ret;
2594 }
2595
2596 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2597                                struct btrfs_root *root,
2598                                struct btrfs_path *path,
2599                                struct inode_record *rec)
2600 {
2601         char *dir_name = "lost+found";
2602         char namebuf[BTRFS_NAME_LEN] = {0};
2603         u64 lost_found_ino;
2604         u32 mode = 0700;
2605         u8 type = 0;
2606         int namelen = 0;
2607         int name_recovered = 0;
2608         int type_recovered = 0;
2609         int ret = 0;
2610
2611         /*
2612          * Get file name and type first before these invalid inode ref
2613          * are deleted by remove_all_invalid_backref()
2614          */
2615         name_recovered = !find_file_name(rec, namebuf, &namelen);
2616         type_recovered = !find_file_type(rec, &type);
2617
2618         if (!name_recovered) {
2619                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2620                        rec->ino, rec->ino);
2621                 namelen = count_digits(rec->ino);
2622                 sprintf(namebuf, "%llu", rec->ino);
2623                 name_recovered = 1;
2624         }
2625         if (!type_recovered) {
2626                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2627                        rec->ino);
2628                 type = BTRFS_FT_REG_FILE;
2629                 type_recovered = 1;
2630         }
2631
2632         ret = reset_nlink(trans, root, path, rec);
2633         if (ret < 0) {
2634                 fprintf(stderr,
2635                         "Failed to reset nlink for inode %llu: %s\n",
2636                         rec->ino, strerror(-ret));
2637                 goto out;
2638         }
2639
2640         if (rec->found_link == 0) {
2641                 lost_found_ino = root->highest_inode;
2642                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2643                         ret = -EOVERFLOW;
2644                         goto out;
2645                 }
2646                 lost_found_ino++;
2647                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2648                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2649                                   mode);
2650                 if (ret < 0) {
2651                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2652                                 dir_name, strerror(-ret));
2653                         goto out;
2654                 }
2655                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2656                                      namebuf, namelen, type, NULL, 1);
2657                 /*
2658                  * Add ".INO" suffix several times to handle case where
2659                  * "FILENAME.INO" is already taken by another file.
2660                  */
2661                 while (ret == -EEXIST) {
2662                         /*
2663                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2664                          */
2665                         if (namelen + count_digits(rec->ino) + 1 >
2666                             BTRFS_NAME_LEN) {
2667                                 ret = -EFBIG;
2668                                 goto out;
2669                         }
2670                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2671                                  ".%llu", rec->ino);
2672                         namelen += count_digits(rec->ino) + 1;
2673                         ret = btrfs_add_link(trans, root, rec->ino,
2674                                              lost_found_ino, namebuf,
2675                                              namelen, type, NULL, 1);
2676                 }
2677                 if (ret < 0) {
2678                         fprintf(stderr,
2679                                 "Failed to link the inode %llu to %s dir: %s\n",
2680                                 rec->ino, dir_name, strerror(-ret));
2681                         goto out;
2682                 }
2683                 /*
2684                  * Just increase the found_link, don't actually add the
2685                  * backref. This will make things easier and this inode
2686                  * record will be freed after the repair is done.
2687                  * So fsck will not report problem about this inode.
2688                  */
2689                 rec->found_link++;
2690                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2691                        namelen, namebuf, dir_name);
2692         }
2693         printf("Fixed the nlink of inode %llu\n", rec->ino);
2694 out:
2695         /*
2696          * Clear the flag anyway, or we will loop forever for the same inode
2697          * as it will not be removed from the bad inode list and the dead loop
2698          * happens.
2699          */
2700         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2701         btrfs_release_path(path);
2702         return ret;
2703 }
2704
2705 /*
2706  * Check if there is any normal(reg or prealloc) file extent for given
2707  * ino.
2708  * This is used to determine the file type when neither its dir_index/item or
2709  * inode_item exists.
2710  *
2711  * This will *NOT* report error, if any error happens, just consider it does
2712  * not have any normal file extent.
2713  */
2714 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2715 {
2716         struct btrfs_path *path;
2717         struct btrfs_key key;
2718         struct btrfs_key found_key;
2719         struct btrfs_file_extent_item *fi;
2720         u8 type;
2721         int ret = 0;
2722
2723         path = btrfs_alloc_path();
2724         if (!path)
2725                 goto out;
2726         key.objectid = ino;
2727         key.type = BTRFS_EXTENT_DATA_KEY;
2728         key.offset = 0;
2729
2730         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2731         if (ret < 0) {
2732                 ret = 0;
2733                 goto out;
2734         }
2735         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2736                 ret = btrfs_next_leaf(root, path);
2737                 if (ret) {
2738                         ret = 0;
2739                         goto out;
2740                 }
2741         }
2742         while (1) {
2743                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2744                                       path->slots[0]);
2745                 if (found_key.objectid != ino ||
2746                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2747                         break;
2748                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2749                                     struct btrfs_file_extent_item);
2750                 type = btrfs_file_extent_type(path->nodes[0], fi);
2751                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2752                         ret = 1;
2753                         goto out;
2754                 }
2755         }
2756 out:
2757         btrfs_free_path(path);
2758         return ret;
2759 }
2760
2761 static u32 btrfs_type_to_imode(u8 type)
2762 {
2763         static u32 imode_by_btrfs_type[] = {
2764                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2765                 [BTRFS_FT_DIR]          = S_IFDIR,
2766                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2767                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2768                 [BTRFS_FT_FIFO]         = S_IFIFO,
2769                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2770                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2771         };
2772
2773         return imode_by_btrfs_type[(type)];
2774 }
2775
2776 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2777                                 struct btrfs_root *root,
2778                                 struct btrfs_path *path,
2779                                 struct inode_record *rec)
2780 {
2781         u8 filetype;
2782         u32 mode = 0700;
2783         int type_recovered = 0;
2784         int ret = 0;
2785
2786         printf("Trying to rebuild inode:%llu\n", rec->ino);
2787
2788         type_recovered = !find_file_type(rec, &filetype);
2789
2790         /*
2791          * Try to determine inode type if type not found.
2792          *
2793          * For found regular file extent, it must be FILE.
2794          * For found dir_item/index, it must be DIR.
2795          *
2796          * For undetermined one, use FILE as fallback.
2797          *
2798          * TODO:
2799          * 1. If found backref(inode_index/item is already handled) to it,
2800          *    it must be DIR.
2801          *    Need new inode-inode ref structure to allow search for that.
2802          */
2803         if (!type_recovered) {
2804                 if (rec->found_file_extent &&
2805                     find_normal_file_extent(root, rec->ino)) {
2806                         type_recovered = 1;
2807                         filetype = BTRFS_FT_REG_FILE;
2808                 } else if (rec->found_dir_item) {
2809                         type_recovered = 1;
2810                         filetype = BTRFS_FT_DIR;
2811                 } else if (!list_empty(&rec->orphan_extents)) {
2812                         type_recovered = 1;
2813                         filetype = BTRFS_FT_REG_FILE;
2814                 } else{
2815                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2816                                rec->ino);
2817                         type_recovered = 1;
2818                         filetype = BTRFS_FT_REG_FILE;
2819                 }
2820         }
2821
2822         ret = btrfs_new_inode(trans, root, rec->ino,
2823                               mode | btrfs_type_to_imode(filetype));
2824         if (ret < 0)
2825                 goto out;
2826
2827         /*
2828          * Here inode rebuild is done, we only rebuild the inode item,
2829          * don't repair the nlink(like move to lost+found).
2830          * That is the job of nlink repair.
2831          *
2832          * We just fill the record and return
2833          */
2834         rec->found_dir_item = 1;
2835         rec->imode = mode | btrfs_type_to_imode(filetype);
2836         rec->nlink = 0;
2837         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2838         /* Ensure the inode_nlinks repair function will be called */
2839         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2840 out:
2841         return ret;
2842 }
2843
2844 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2845                                       struct btrfs_root *root,
2846                                       struct btrfs_path *path,
2847                                       struct inode_record *rec)
2848 {
2849         struct orphan_data_extent *orphan;
2850         struct orphan_data_extent *tmp;
2851         int ret = 0;
2852
2853         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2854                 /*
2855                  * Check for conflicting file extents
2856                  *
2857                  * Here we don't know whether the extents is compressed or not,
2858                  * so we can only assume it not compressed nor data offset,
2859                  * and use its disk_len as extent length.
2860                  */
2861                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2862                                        orphan->offset, orphan->disk_len, 0);
2863                 btrfs_release_path(path);
2864                 if (ret < 0)
2865                         goto out;
2866                 if (!ret) {
2867                         fprintf(stderr,
2868                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2869                                 orphan->disk_bytenr, orphan->disk_len);
2870                         ret = btrfs_free_extent(trans,
2871                                         root->fs_info->extent_root,
2872                                         orphan->disk_bytenr, orphan->disk_len,
2873                                         0, root->objectid, orphan->objectid,
2874                                         orphan->offset);
2875                         if (ret < 0)
2876                                 goto out;
2877                 }
2878                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2879                                 orphan->offset, orphan->disk_bytenr,
2880                                 orphan->disk_len, orphan->disk_len);
2881                 if (ret < 0)
2882                         goto out;
2883
2884                 /* Update file size info */
2885                 rec->found_size += orphan->disk_len;
2886                 if (rec->found_size == rec->nbytes)
2887                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2888
2889                 /* Update the file extent hole info too */
2890                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2891                                            orphan->disk_len);
2892                 if (ret < 0)
2893                         goto out;
2894                 if (RB_EMPTY_ROOT(&rec->holes))
2895                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2896
2897                 list_del(&orphan->list);
2898                 free(orphan);
2899         }
2900         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2901 out:
2902         return ret;
2903 }
2904
2905 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2906                                         struct btrfs_root *root,
2907                                         struct btrfs_path *path,
2908                                         struct inode_record *rec)
2909 {
2910         struct rb_node *node;
2911         struct file_extent_hole *hole;
2912         int found = 0;
2913         int ret = 0;
2914
2915         node = rb_first(&rec->holes);
2916
2917         while (node) {
2918                 found = 1;
2919                 hole = rb_entry(node, struct file_extent_hole, node);
2920                 ret = btrfs_punch_hole(trans, root, rec->ino,
2921                                        hole->start, hole->len);
2922                 if (ret < 0)
2923                         goto out;
2924                 ret = del_file_extent_hole(&rec->holes, hole->start,
2925                                            hole->len);
2926                 if (ret < 0)
2927                         goto out;
2928                 if (RB_EMPTY_ROOT(&rec->holes))
2929                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2930                 node = rb_first(&rec->holes);
2931         }
2932         /* special case for a file losing all its file extent */
2933         if (!found) {
2934                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2935                                        round_up(rec->isize, root->sectorsize));
2936                 if (ret < 0)
2937                         goto out;
2938         }
2939         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2940                rec->ino, root->objectid);
2941 out:
2942         return ret;
2943 }
2944
2945 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2946 {
2947         struct btrfs_trans_handle *trans;
2948         struct btrfs_path *path;
2949         int ret = 0;
2950
2951         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2952                              I_ERR_NO_ORPHAN_ITEM |
2953                              I_ERR_LINK_COUNT_WRONG |
2954                              I_ERR_NO_INODE_ITEM |
2955                              I_ERR_FILE_EXTENT_ORPHAN |
2956                              I_ERR_FILE_EXTENT_DISCOUNT|
2957                              I_ERR_FILE_NBYTES_WRONG)))
2958                 return rec->errors;
2959
2960         path = btrfs_alloc_path();
2961         if (!path)
2962                 return -ENOMEM;
2963
2964         /*
2965          * For nlink repair, it may create a dir and add link, so
2966          * 2 for parent(256)'s dir_index and dir_item
2967          * 2 for lost+found dir's inode_item and inode_ref
2968          * 1 for the new inode_ref of the file
2969          * 2 for lost+found dir's dir_index and dir_item for the file
2970          */
2971         trans = btrfs_start_transaction(root, 7);
2972         if (IS_ERR(trans)) {
2973                 btrfs_free_path(path);
2974                 return PTR_ERR(trans);
2975         }
2976
2977         if (rec->errors & I_ERR_NO_INODE_ITEM)
2978                 ret = repair_inode_no_item(trans, root, path, rec);
2979         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2980                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2981         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2982                 ret = repair_inode_discount_extent(trans, root, path, rec);
2983         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2984                 ret = repair_inode_isize(trans, root, path, rec);
2985         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2986                 ret = repair_inode_orphan_item(trans, root, path, rec);
2987         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2988                 ret = repair_inode_nlinks(trans, root, path, rec);
2989         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2990                 ret = repair_inode_nbytes(trans, root, path, rec);
2991         btrfs_commit_transaction(trans, root);
2992         btrfs_free_path(path);
2993         return ret;
2994 }
2995
2996 static int check_inode_recs(struct btrfs_root *root,
2997                             struct cache_tree *inode_cache)
2998 {
2999         struct cache_extent *cache;
3000         struct ptr_node *node;
3001         struct inode_record *rec;
3002         struct inode_backref *backref;
3003         int stage = 0;
3004         int ret = 0;
3005         int err = 0;
3006         u64 error = 0;
3007         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3008
3009         if (btrfs_root_refs(&root->root_item) == 0) {
3010                 if (!cache_tree_empty(inode_cache))
3011                         fprintf(stderr, "warning line %d\n", __LINE__);
3012                 return 0;
3013         }
3014
3015         /*
3016          * We need to record the highest inode number for later 'lost+found'
3017          * dir creation.
3018          * We must select an ino not used/referred by any existing inode, or
3019          * 'lost+found' ino may be a missing ino in a corrupted leaf,
3020          * this may cause 'lost+found' dir has wrong nlinks.
3021          */
3022         cache = last_cache_extent(inode_cache);
3023         if (cache) {
3024                 node = container_of(cache, struct ptr_node, cache);
3025                 rec = node->data;
3026                 if (rec->ino > root->highest_inode)
3027                         root->highest_inode = rec->ino;
3028         }
3029
3030         /*
3031          * We need to repair backrefs first because we could change some of the
3032          * errors in the inode recs.
3033          *
3034          * We also need to go through and delete invalid backrefs first and then
3035          * add the correct ones second.  We do this because we may get EEXIST
3036          * when adding back the correct index because we hadn't yet deleted the
3037          * invalid index.
3038          *
3039          * For example, if we were missing a dir index then the directories
3040          * isize would be wrong, so if we fixed the isize to what we thought it
3041          * would be and then fixed the backref we'd still have a invalid fs, so
3042          * we need to add back the dir index and then check to see if the isize
3043          * is still wrong.
3044          */
3045         while (stage < 3) {
3046                 stage++;
3047                 if (stage == 3 && !err)
3048                         break;
3049
3050                 cache = search_cache_extent(inode_cache, 0);
3051                 while (repair && cache) {
3052                         node = container_of(cache, struct ptr_node, cache);
3053                         rec = node->data;
3054                         cache = next_cache_extent(cache);
3055
3056                         /* Need to free everything up and rescan */
3057                         if (stage == 3) {
3058                                 remove_cache_extent(inode_cache, &node->cache);
3059                                 free(node);
3060                                 free_inode_rec(rec);
3061                                 continue;
3062                         }
3063
3064                         if (list_empty(&rec->backrefs))
3065                                 continue;
3066
3067                         ret = repair_inode_backrefs(root, rec, inode_cache,
3068                                                     stage == 1);
3069                         if (ret < 0) {
3070                                 err = ret;
3071                                 stage = 2;
3072                                 break;
3073                         } if (ret > 0) {
3074                                 err = -EAGAIN;
3075                         }
3076                 }
3077         }
3078         if (err)
3079                 return err;
3080
3081         rec = get_inode_rec(inode_cache, root_dirid, 0);
3082         BUG_ON(IS_ERR(rec));
3083         if (rec) {
3084                 ret = check_root_dir(rec);
3085                 if (ret) {
3086                         fprintf(stderr, "root %llu root dir %llu error\n",
3087                                 (unsigned long long)root->root_key.objectid,
3088                                 (unsigned long long)root_dirid);
3089                         print_inode_error(root, rec);
3090                         error++;
3091                 }
3092         } else {
3093                 if (repair) {
3094                         struct btrfs_trans_handle *trans;
3095
3096                         trans = btrfs_start_transaction(root, 1);
3097                         if (IS_ERR(trans)) {
3098                                 err = PTR_ERR(trans);
3099                                 return err;
3100                         }
3101
3102                         fprintf(stderr,
3103                                 "root %llu missing its root dir, recreating\n",
3104                                 (unsigned long long)root->objectid);
3105
3106                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3107                         BUG_ON(ret);
3108
3109                         btrfs_commit_transaction(trans, root);
3110                         return -EAGAIN;
3111                 }
3112
3113                 fprintf(stderr, "root %llu root dir %llu not found\n",
3114                         (unsigned long long)root->root_key.objectid,
3115                         (unsigned long long)root_dirid);
3116         }
3117
3118         while (1) {
3119                 cache = search_cache_extent(inode_cache, 0);
3120                 if (!cache)
3121                         break;
3122                 node = container_of(cache, struct ptr_node, cache);
3123                 rec = node->data;
3124                 remove_cache_extent(inode_cache, &node->cache);
3125                 free(node);
3126                 if (rec->ino == root_dirid ||
3127                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3128                         free_inode_rec(rec);
3129                         continue;
3130                 }
3131
3132                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3133                         ret = check_orphan_item(root, rec->ino);
3134                         if (ret == 0)
3135                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3136                         if (can_free_inode_rec(rec)) {
3137                                 free_inode_rec(rec);
3138                                 continue;
3139                         }
3140                 }
3141
3142                 if (!rec->found_inode_item)
3143                         rec->errors |= I_ERR_NO_INODE_ITEM;
3144                 if (rec->found_link != rec->nlink)
3145                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3146                 if (repair) {
3147                         ret = try_repair_inode(root, rec);
3148                         if (ret == 0 && can_free_inode_rec(rec)) {
3149                                 free_inode_rec(rec);
3150                                 continue;
3151                         }
3152                         ret = 0;
3153                 }
3154
3155                 if (!(repair && ret == 0))
3156                         error++;
3157                 print_inode_error(root, rec);
3158                 list_for_each_entry(backref, &rec->backrefs, list) {
3159                         if (!backref->found_dir_item)
3160                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3161                         if (!backref->found_dir_index)
3162                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3163                         if (!backref->found_inode_ref)
3164                                 backref->errors |= REF_ERR_NO_INODE_REF;
3165                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3166                                 " namelen %u name %s filetype %d errors %x",
3167                                 (unsigned long long)backref->dir,
3168                                 (unsigned long long)backref->index,
3169                                 backref->namelen, backref->name,
3170                                 backref->filetype, backref->errors);
3171                         print_ref_error(backref->errors);
3172                 }
3173                 free_inode_rec(rec);
3174         }
3175         return (error > 0) ? -1 : 0;
3176 }
3177
3178 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3179                                         u64 objectid)
3180 {
3181         struct cache_extent *cache;
3182         struct root_record *rec = NULL;
3183         int ret;
3184
3185         cache = lookup_cache_extent(root_cache, objectid, 1);
3186         if (cache) {
3187                 rec = container_of(cache, struct root_record, cache);
3188         } else {
3189                 rec = calloc(1, sizeof(*rec));
3190                 if (!rec)
3191                         return ERR_PTR(-ENOMEM);
3192                 rec->objectid = objectid;
3193                 INIT_LIST_HEAD(&rec->backrefs);
3194                 rec->cache.start = objectid;
3195                 rec->cache.size = 1;
3196
3197                 ret = insert_cache_extent(root_cache, &rec->cache);
3198                 if (ret)
3199                         return ERR_PTR(-EEXIST);
3200         }
3201         return rec;
3202 }
3203
3204 static struct root_backref *get_root_backref(struct root_record *rec,
3205                                              u64 ref_root, u64 dir, u64 index,
3206                                              const char *name, int namelen)
3207 {
3208         struct root_backref *backref;
3209
3210         list_for_each_entry(backref, &rec->backrefs, list) {
3211                 if (backref->ref_root != ref_root || backref->dir != dir ||
3212                     backref->namelen != namelen)
3213                         continue;
3214                 if (memcmp(name, backref->name, namelen))
3215                         continue;
3216                 return backref;
3217         }
3218
3219         backref = calloc(1, sizeof(*backref) + namelen + 1);
3220         if (!backref)
3221                 return NULL;
3222         backref->ref_root = ref_root;
3223         backref->dir = dir;
3224         backref->index = index;
3225         backref->namelen = namelen;
3226         memcpy(backref->name, name, namelen);
3227         backref->name[namelen] = '\0';
3228         list_add_tail(&backref->list, &rec->backrefs);
3229         return backref;
3230 }
3231
3232 static void free_root_record(struct cache_extent *cache)
3233 {
3234         struct root_record *rec;
3235         struct root_backref *backref;
3236
3237         rec = container_of(cache, struct root_record, cache);
3238         while (!list_empty(&rec->backrefs)) {
3239                 backref = to_root_backref(rec->backrefs.next);
3240                 list_del(&backref->list);
3241                 free(backref);
3242         }
3243
3244         kfree(rec);
3245 }
3246
3247 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3248
3249 static int add_root_backref(struct cache_tree *root_cache,
3250                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3251                             const char *name, int namelen,
3252                             int item_type, int errors)
3253 {
3254         struct root_record *rec;
3255         struct root_backref *backref;
3256
3257         rec = get_root_rec(root_cache, root_id);
3258         BUG_ON(IS_ERR(rec));
3259         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3260         BUG_ON(!backref);
3261
3262         backref->errors |= errors;
3263
3264         if (item_type != BTRFS_DIR_ITEM_KEY) {
3265                 if (backref->found_dir_index || backref->found_back_ref ||
3266                     backref->found_forward_ref) {
3267                         if (backref->index != index)
3268                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3269                 } else {
3270                         backref->index = index;
3271                 }
3272         }
3273
3274         if (item_type == BTRFS_DIR_ITEM_KEY) {
3275                 if (backref->found_forward_ref)
3276                         rec->found_ref++;
3277                 backref->found_dir_item = 1;
3278         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3279                 backref->found_dir_index = 1;
3280         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3281                 if (backref->found_forward_ref)
3282                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3283                 else if (backref->found_dir_item)
3284                         rec->found_ref++;
3285                 backref->found_forward_ref = 1;
3286         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3287                 if (backref->found_back_ref)
3288                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3289                 backref->found_back_ref = 1;
3290         } else {
3291                 BUG_ON(1);
3292         }
3293
3294         if (backref->found_forward_ref && backref->found_dir_item)
3295                 backref->reachable = 1;
3296         return 0;
3297 }
3298
3299 static int merge_root_recs(struct btrfs_root *root,
3300                            struct cache_tree *src_cache,
3301                            struct cache_tree *dst_cache)
3302 {
3303         struct cache_extent *cache;
3304         struct ptr_node *node;
3305         struct inode_record *rec;
3306         struct inode_backref *backref;
3307         int ret = 0;
3308
3309         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3310                 free_inode_recs_tree(src_cache);
3311                 return 0;
3312         }
3313
3314         while (1) {
3315                 cache = search_cache_extent(src_cache, 0);
3316                 if (!cache)
3317                         break;
3318                 node = container_of(cache, struct ptr_node, cache);
3319                 rec = node->data;
3320                 remove_cache_extent(src_cache, &node->cache);
3321                 free(node);
3322
3323                 ret = is_child_root(root, root->objectid, rec->ino);
3324                 if (ret < 0)
3325                         break;
3326                 else if (ret == 0)
3327                         goto skip;
3328
3329                 list_for_each_entry(backref, &rec->backrefs, list) {
3330                         BUG_ON(backref->found_inode_ref);
3331                         if (backref->found_dir_item)
3332                                 add_root_backref(dst_cache, rec->ino,
3333                                         root->root_key.objectid, backref->dir,
3334                                         backref->index, backref->name,
3335                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3336                                         backref->errors);
3337                         if (backref->found_dir_index)
3338                                 add_root_backref(dst_cache, rec->ino,
3339                                         root->root_key.objectid, backref->dir,
3340                                         backref->index, backref->name,
3341                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3342                                         backref->errors);
3343                 }
3344 skip:
3345                 free_inode_rec(rec);
3346         }
3347         if (ret < 0)
3348                 return ret;
3349         return 0;
3350 }
3351
3352 static int check_root_refs(struct btrfs_root *root,
3353                            struct cache_tree *root_cache)
3354 {
3355         struct root_record *rec;
3356         struct root_record *ref_root;
3357         struct root_backref *backref;
3358         struct cache_extent *cache;
3359         int loop = 1;
3360         int ret;
3361         int error;
3362         int errors = 0;
3363
3364         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3365         BUG_ON(IS_ERR(rec));
3366         rec->found_ref = 1;
3367
3368         /* fixme: this can not detect circular references */
3369         while (loop) {
3370                 loop = 0;
3371                 cache = search_cache_extent(root_cache, 0);
3372                 while (1) {
3373                         if (!cache)
3374                                 break;
3375                         rec = container_of(cache, struct root_record, cache);
3376                         cache = next_cache_extent(cache);
3377
3378                         if (rec->found_ref == 0)
3379                                 continue;
3380
3381                         list_for_each_entry(backref, &rec->backrefs, list) {
3382                                 if (!backref->reachable)
3383                                         continue;
3384
3385                                 ref_root = get_root_rec(root_cache,
3386                                                         backref->ref_root);
3387                                 BUG_ON(IS_ERR(ref_root));
3388                                 if (ref_root->found_ref > 0)
3389                                         continue;
3390
3391                                 backref->reachable = 0;
3392                                 rec->found_ref--;
3393                                 if (rec->found_ref == 0)
3394                                         loop = 1;
3395                         }
3396                 }
3397         }
3398
3399         cache = search_cache_extent(root_cache, 0);
3400         while (1) {
3401                 if (!cache)
3402                         break;
3403                 rec = container_of(cache, struct root_record, cache);
3404                 cache = next_cache_extent(cache);
3405
3406                 if (rec->found_ref == 0 &&
3407                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3408                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3409                         ret = check_orphan_item(root->fs_info->tree_root,
3410                                                 rec->objectid);
3411                         if (ret == 0)
3412                                 continue;
3413
3414                         /*
3415                          * If we don't have a root item then we likely just have
3416                          * a dir item in a snapshot for this root but no actual
3417                          * ref key or anything so it's meaningless.
3418                          */
3419                         if (!rec->found_root_item)
3420                                 continue;
3421                         errors++;
3422                         fprintf(stderr, "fs tree %llu not referenced\n",
3423                                 (unsigned long long)rec->objectid);
3424                 }
3425
3426                 error = 0;
3427                 if (rec->found_ref > 0 && !rec->found_root_item)
3428                         error = 1;
3429                 list_for_each_entry(backref, &rec->backrefs, list) {
3430                         if (!backref->found_dir_item)
3431                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3432                         if (!backref->found_dir_index)
3433                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3434                         if (!backref->found_back_ref)
3435                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3436                         if (!backref->found_forward_ref)
3437                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3438                         if (backref->reachable && backref->errors)
3439                                 error = 1;
3440                 }
3441                 if (!error)
3442                         continue;
3443
3444                 errors++;
3445                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3446                         (unsigned long long)rec->objectid, rec->found_ref,
3447                          rec->found_root_item ? "" : "not found");
3448
3449                 list_for_each_entry(backref, &rec->backrefs, list) {
3450                         if (!backref->reachable)
3451                                 continue;
3452                         if (!backref->errors && rec->found_root_item)
3453                                 continue;
3454                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3455                                 " index %llu namelen %u name %s errors %x\n",
3456                                 (unsigned long long)backref->ref_root,
3457                                 (unsigned long long)backref->dir,
3458                                 (unsigned long long)backref->index,
3459                                 backref->namelen, backref->name,
3460                                 backref->errors);
3461                         print_ref_error(backref->errors);
3462                 }
3463         }
3464         return errors > 0 ? 1 : 0;
3465 }
3466
3467 static int process_root_ref(struct extent_buffer *eb, int slot,
3468                             struct btrfs_key *key,
3469                             struct cache_tree *root_cache)
3470 {
3471         u64 dirid;
3472         u64 index;
3473         u32 len;
3474         u32 name_len;
3475         struct btrfs_root_ref *ref;
3476         char namebuf[BTRFS_NAME_LEN];
3477         int error;
3478
3479         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3480
3481         dirid = btrfs_root_ref_dirid(eb, ref);
3482         index = btrfs_root_ref_sequence(eb, ref);
3483         name_len = btrfs_root_ref_name_len(eb, ref);
3484
3485         if (name_len <= BTRFS_NAME_LEN) {
3486                 len = name_len;
3487                 error = 0;
3488         } else {
3489                 len = BTRFS_NAME_LEN;
3490                 error = REF_ERR_NAME_TOO_LONG;
3491         }
3492         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3493
3494         if (key->type == BTRFS_ROOT_REF_KEY) {
3495                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3496                                  index, namebuf, len, key->type, error);
3497         } else {
3498                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3499                                  index, namebuf, len, key->type, error);
3500         }
3501         return 0;
3502 }
3503
3504 static void free_corrupt_block(struct cache_extent *cache)
3505 {
3506         struct btrfs_corrupt_block *corrupt;
3507
3508         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3509         free(corrupt);
3510 }
3511
3512 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3513
3514 /*
3515  * Repair the btree of the given root.
3516  *
3517  * The fix is to remove the node key in corrupt_blocks cache_tree.
3518  * and rebalance the tree.
3519  * After the fix, the btree should be writeable.
3520  */
3521 static int repair_btree(struct btrfs_root *root,
3522                         struct cache_tree *corrupt_blocks)
3523 {
3524         struct btrfs_trans_handle *trans;
3525         struct btrfs_path *path;
3526         struct btrfs_corrupt_block *corrupt;
3527         struct cache_extent *cache;
3528         struct btrfs_key key;
3529         u64 offset;
3530         int level;
3531         int ret = 0;
3532
3533         if (cache_tree_empty(corrupt_blocks))
3534                 return 0;
3535
3536         path = btrfs_alloc_path();
3537         if (!path)
3538                 return -ENOMEM;
3539
3540         trans = btrfs_start_transaction(root, 1);
3541         if (IS_ERR(trans)) {
3542                 ret = PTR_ERR(trans);
3543                 fprintf(stderr, "Error starting transaction: %s\n",
3544                         strerror(-ret));
3545                 goto out_free_path;
3546         }
3547         cache = first_cache_extent(corrupt_blocks);
3548         while (cache) {
3549                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3550                                        cache);
3551                 level = corrupt->level;
3552                 path->lowest_level = level;
3553                 key.objectid = corrupt->key.objectid;
3554                 key.type = corrupt->key.type;
3555                 key.offset = corrupt->key.offset;
3556
3557                 /*
3558                  * Here we don't want to do any tree balance, since it may
3559                  * cause a balance with corrupted brother leaf/node,
3560                  * so ins_len set to 0 here.
3561                  * Balance will be done after all corrupt node/leaf is deleted.
3562                  */
3563                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3564                 if (ret < 0)
3565                         goto out;
3566                 offset = btrfs_node_blockptr(path->nodes[level],
3567                                              path->slots[level]);
3568
3569                 /* Remove the ptr */
3570                 ret = btrfs_del_ptr(trans, root, path, level,
3571                                     path->slots[level]);
3572                 if (ret < 0)
3573                         goto out;
3574                 /*
3575                  * Remove the corresponding extent
3576                  * return value is not concerned.
3577                  */
3578                 btrfs_release_path(path);
3579                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3580                                         0, root->root_key.objectid,
3581                                         level - 1, 0);
3582                 cache = next_cache_extent(cache);
3583         }
3584
3585         /* Balance the btree using btrfs_search_slot() */
3586         cache = first_cache_extent(corrupt_blocks);
3587         while (cache) {
3588                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3589                                        cache);
3590                 memcpy(&key, &corrupt->key, sizeof(key));
3591                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3592                 if (ret < 0)
3593                         goto out;
3594                 /* return will always >0 since it won't find the item */
3595                 ret = 0;
3596                 btrfs_release_path(path);
3597                 cache = next_cache_extent(cache);
3598         }
3599 out:
3600         btrfs_commit_transaction(trans, root);
3601 out_free_path:
3602         btrfs_free_path(path);
3603         return ret;
3604 }
3605
3606 static int check_fs_root(struct btrfs_root *root,
3607                          struct cache_tree *root_cache,
3608                          struct walk_control *wc)
3609 {
3610         int ret = 0;
3611         int err = 0;
3612         int wret;
3613         int level;
3614         struct btrfs_path path;
3615         struct shared_node root_node;
3616         struct root_record *rec;
3617         struct btrfs_root_item *root_item = &root->root_item;
3618         struct cache_tree corrupt_blocks;
3619         struct orphan_data_extent *orphan;
3620         struct orphan_data_extent *tmp;
3621         enum btrfs_tree_block_status status;
3622
3623         /*
3624          * Reuse the corrupt_block cache tree to record corrupted tree block
3625          *
3626          * Unlike the usage in extent tree check, here we do it in a per
3627          * fs/subvol tree base.
3628          */
3629         cache_tree_init(&corrupt_blocks);
3630         root->fs_info->corrupt_blocks = &corrupt_blocks;
3631
3632         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3633                 rec = get_root_rec(root_cache, root->root_key.objectid);
3634                 BUG_ON(IS_ERR(rec));
3635                 if (btrfs_root_refs(root_item) > 0)
3636                         rec->found_root_item = 1;
3637         }
3638
3639         btrfs_init_path(&path);
3640         memset(&root_node, 0, sizeof(root_node));
3641         cache_tree_init(&root_node.root_cache);
3642         cache_tree_init(&root_node.inode_cache);
3643
3644         /* Move the orphan extent record to corresponding inode_record */
3645         list_for_each_entry_safe(orphan, tmp,
3646                                  &root->orphan_data_extents, list) {
3647                 struct inode_record *inode;
3648
3649                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3650                                       1);
3651                 BUG_ON(IS_ERR(inode));
3652                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3653                 list_move(&orphan->list, &inode->orphan_extents);
3654         }
3655
3656         level = btrfs_header_level(root->node);
3657         memset(wc->nodes, 0, sizeof(wc->nodes));
3658         wc->nodes[level] = &root_node;
3659         wc->active_node = level;
3660         wc->root_level = level;
3661
3662         /* We may not have checked the root block, lets do that now */
3663         if (btrfs_is_leaf(root->node))
3664                 status = btrfs_check_leaf(root, NULL, root->node);
3665         else
3666                 status = btrfs_check_node(root, NULL, root->node);
3667         if (status != BTRFS_TREE_BLOCK_CLEAN)
3668                 return -EIO;
3669
3670         if (btrfs_root_refs(root_item) > 0 ||
3671             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3672                 path.nodes[level] = root->node;
3673                 extent_buffer_get(root->node);
3674                 path.slots[level] = 0;
3675         } else {
3676                 struct btrfs_key key;
3677                 struct btrfs_disk_key found_key;
3678
3679                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3680                 level = root_item->drop_level;
3681                 path.lowest_level = level;
3682                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3683                 if (wret < 0)
3684                         goto skip_walking;
3685                 btrfs_node_key(path.nodes[level], &found_key,
3686                                 path.slots[level]);
3687                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3688                                         sizeof(found_key)));
3689         }
3690
3691         while (1) {
3692                 wret = walk_down_tree(root, &path, wc, &level);
3693                 if (wret < 0)
3694                         ret = wret;
3695                 if (wret != 0)
3696                         break;
3697
3698                 wret = walk_up_tree(root, &path, wc, &level);
3699                 if (wret < 0)
3700                         ret = wret;
3701                 if (wret != 0)
3702                         break;
3703         }
3704 skip_walking:
3705         btrfs_release_path(&path);
3706
3707         if (!cache_tree_empty(&corrupt_blocks)) {
3708                 struct cache_extent *cache;
3709                 struct btrfs_corrupt_block *corrupt;
3710
3711                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3712                        root->root_key.objectid);
3713                 cache = first_cache_extent(&corrupt_blocks);
3714                 while (cache) {
3715                         corrupt = container_of(cache,
3716                                                struct btrfs_corrupt_block,
3717                                                cache);
3718                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3719                                cache->start, corrupt->level,
3720                                corrupt->key.objectid, corrupt->key.type,
3721                                corrupt->key.offset);
3722                         cache = next_cache_extent(cache);
3723                 }
3724                 if (repair) {
3725                         printf("Try to repair the btree for root %llu\n",
3726                                root->root_key.objectid);
3727                         ret = repair_btree(root, &corrupt_blocks);
3728                         if (ret < 0)
3729                                 fprintf(stderr, "Failed to repair btree: %s\n",
3730                                         strerror(-ret));
3731                         if (!ret)
3732                                 printf("Btree for root %llu is fixed\n",
3733                                        root->root_key.objectid);
3734                 }
3735         }
3736
3737         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3738         if (err < 0)
3739                 ret = err;
3740
3741         if (root_node.current) {
3742                 root_node.current->checked = 1;
3743                 maybe_free_inode_rec(&root_node.inode_cache,
3744                                 root_node.current);
3745         }
3746
3747         err = check_inode_recs(root, &root_node.inode_cache);
3748         if (!ret)
3749                 ret = err;
3750
3751         free_corrupt_blocks_tree(&corrupt_blocks);
3752         root->fs_info->corrupt_blocks = NULL;
3753         free_orphan_data_extents(&root->orphan_data_extents);
3754         return ret;
3755 }
3756
3757 static int fs_root_objectid(u64 objectid)
3758 {
3759         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3760             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3761                 return 1;
3762         return is_fstree(objectid);
3763 }
3764
3765 static int check_fs_roots(struct btrfs_root *root,
3766                           struct cache_tree *root_cache)
3767 {
3768         struct btrfs_path path;
3769         struct btrfs_key key;
3770         struct walk_control wc;
3771         struct extent_buffer *leaf, *tree_node;
3772         struct btrfs_root *tmp_root;
3773         struct btrfs_root *tree_root = root->fs_info->tree_root;
3774         int ret;
3775         int err = 0;
3776
3777         if (ctx.progress_enabled) {
3778                 ctx.tp = TASK_FS_ROOTS;
3779                 task_start(ctx.info);
3780         }
3781
3782         /*
3783          * Just in case we made any changes to the extent tree that weren't
3784          * reflected into the free space cache yet.
3785          */
3786         if (repair)
3787                 reset_cached_block_groups(root->fs_info);
3788         memset(&wc, 0, sizeof(wc));
3789         cache_tree_init(&wc.shared);
3790         btrfs_init_path(&path);
3791
3792 again:
3793         key.offset = 0;
3794         key.objectid = 0;
3795         key.type = BTRFS_ROOT_ITEM_KEY;
3796         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3797         if (ret < 0) {
3798                 err = 1;
3799                 goto out;
3800         }
3801         tree_node = tree_root->node;
3802         while (1) {
3803                 if (tree_node != tree_root->node) {
3804                         free_root_recs_tree(root_cache);
3805                         btrfs_release_path(&path);
3806                         goto again;
3807                 }
3808                 leaf = path.nodes[0];
3809                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3810                         ret = btrfs_next_leaf(tree_root, &path);
3811                         if (ret) {
3812                                 if (ret < 0)
3813                                         err = 1;
3814                                 break;
3815                         }
3816                         leaf = path.nodes[0];
3817                 }
3818                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3819                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3820                     fs_root_objectid(key.objectid)) {
3821                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3822                                 tmp_root = btrfs_read_fs_root_no_cache(
3823                                                 root->fs_info, &key);
3824                         } else {
3825                                 key.offset = (u64)-1;
3826                                 tmp_root = btrfs_read_fs_root(
3827                                                 root->fs_info, &key);
3828                         }
3829                         if (IS_ERR(tmp_root)) {
3830                                 err = 1;
3831                                 goto next;
3832                         }
3833                         ret = check_fs_root(tmp_root, root_cache, &wc);
3834                         if (ret == -EAGAIN) {
3835                                 free_root_recs_tree(root_cache);
3836                                 btrfs_release_path(&path);
3837                                 goto again;
3838                         }
3839                         if (ret)
3840                                 err = 1;
3841                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3842                                 btrfs_free_fs_root(tmp_root);
3843                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3844                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3845                         process_root_ref(leaf, path.slots[0], &key,
3846                                          root_cache);
3847                 }
3848 next:
3849                 path.slots[0]++;
3850         }
3851 out:
3852         btrfs_release_path(&path);
3853         if (err)
3854                 free_extent_cache_tree(&wc.shared);
3855         if (!cache_tree_empty(&wc.shared))
3856                 fprintf(stderr, "warning line %d\n", __LINE__);
3857
3858         task_stop(ctx.info);
3859
3860         return err;
3861 }
3862
3863 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3864 {
3865         struct rb_node *n;
3866         struct extent_backref *back;
3867         struct tree_backref *tback;
3868         struct data_backref *dback;
3869         u64 found = 0;
3870         int err = 0;
3871
3872         for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
3873                 back = rb_node_to_extent_backref(n);
3874                 if (!back->found_extent_tree) {
3875                         err = 1;
3876                         if (!print_errs)
3877                                 goto out;
3878                         if (back->is_data) {
3879                                 dback = to_data_backref(back);
3880                                 fprintf(stderr, "Backref %llu %s %llu"
3881                                         " owner %llu offset %llu num_refs %lu"
3882                                         " not found in extent tree\n",
3883                                         (unsigned long long)rec->start,
3884                                         back->full_backref ?
3885                                         "parent" : "root",
3886                                         back->full_backref ?
3887                                         (unsigned long long)dback->parent:
3888                                         (unsigned long long)dback->root,
3889                                         (unsigned long long)dback->owner,
3890                                         (unsigned long long)dback->offset,
3891                                         (unsigned long)dback->num_refs);
3892                         } else {
3893                                 tback = to_tree_backref(back);
3894                                 fprintf(stderr, "Backref %llu parent %llu"
3895                                         " root %llu not found in extent tree\n",
3896                                         (unsigned long long)rec->start,
3897                                         (unsigned long long)tback->parent,
3898                                         (unsigned long long)tback->root);
3899                         }
3900                 }
3901                 if (!back->is_data && !back->found_ref) {
3902                         err = 1;
3903                         if (!print_errs)
3904                                 goto out;
3905                         tback = to_tree_backref(back);
3906                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3907                                 (unsigned long long)rec->start,
3908                                 back->full_backref ? "parent" : "root",
3909                                 back->full_backref ?
3910                                 (unsigned long long)tback->parent :
3911                                 (unsigned long long)tback->root, back);
3912                 }
3913                 if (back->is_data) {
3914                         dback = to_data_backref(back);
3915                         if (dback->found_ref != dback->num_refs) {
3916                                 err = 1;
3917                                 if (!print_errs)
3918                                         goto out;
3919                                 fprintf(stderr, "Incorrect local backref count"
3920                                         " on %llu %s %llu owner %llu"
3921                                         " offset %llu found %u wanted %u back %p\n",
3922                                         (unsigned long long)rec->start,
3923                                         back->full_backref ?
3924                                         "parent" : "root",
3925                                         back->full_backref ?
3926                                         (unsigned long long)dback->parent:
3927                                         (unsigned long long)dback->root,
3928                                         (unsigned long long)dback->owner,
3929                                         (unsigned long long)dback->offset,
3930                                         dback->found_ref, dback->num_refs, back);
3931                         }
3932                         if (dback->disk_bytenr != rec->start) {
3933                                 err = 1;
3934                                 if (!print_errs)
3935                                         goto out;
3936                                 fprintf(stderr, "Backref disk bytenr does not"
3937                                         " match extent record, bytenr=%llu, "
3938                                         "ref bytenr=%llu\n",
3939                                         (unsigned long long)rec->start,
3940                                         (unsigned long long)dback->disk_bytenr);
3941                         }
3942
3943                         if (dback->bytes != rec->nr) {
3944                                 err = 1;
3945                                 if (!print_errs)
3946                                         goto out;
3947                                 fprintf(stderr, "Backref bytes do not match "
3948                                         "extent backref, bytenr=%llu, ref "
3949                                         "bytes=%llu, backref bytes=%llu\n",
3950                                         (unsigned long long)rec->start,
3951                                         (unsigned long long)rec->nr,
3952                                         (unsigned long long)dback->bytes);
3953                         }
3954                 }
3955                 if (!back->is_data) {
3956                         found += 1;
3957                 } else {
3958                         dback = to_data_backref(back);
3959                         found += dback->found_ref;
3960                 }
3961         }
3962         if (found != rec->refs) {
3963                 err = 1;
3964                 if (!print_errs)
3965                         goto out;
3966                 fprintf(stderr, "Incorrect global backref count "
3967                         "on %llu found %llu wanted %llu\n",
3968                         (unsigned long long)rec->start,
3969                         (unsigned long long)found,
3970                         (unsigned long long)rec->refs);
3971         }
3972 out:
3973         return err;
3974 }
3975
3976 static void __free_one_backref(struct rb_node *node)
3977 {
3978         struct extent_backref *back = rb_node_to_extent_backref(node);
3979
3980         free(back);
3981 }
3982
3983 static void free_all_extent_backrefs(struct extent_record *rec)
3984 {
3985         rb_free_nodes(&rec->backref_tree, __free_one_backref);
3986 }
3987
3988 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3989                                      struct cache_tree *extent_cache)
3990 {
3991         struct cache_extent *cache;
3992         struct extent_record *rec;
3993
3994         while (1) {
3995                 cache = first_cache_extent(extent_cache);
3996                 if (!cache)
3997                         break;
3998                 rec = container_of(cache, struct extent_record, cache);
3999                 remove_cache_extent(extent_cache, cache);
4000                 free_all_extent_backrefs(rec);
4001                 free(rec);
4002         }
4003 }
4004
4005 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4006                                  struct extent_record *rec)
4007 {
4008         if (rec->content_checked && rec->owner_ref_checked &&
4009             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4010             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4011             !rec->bad_full_backref && !rec->crossing_stripes &&
4012             !rec->wrong_chunk_type) {
4013                 remove_cache_extent(extent_cache, &rec->cache);
4014                 free_all_extent_backrefs(rec);
4015                 list_del_init(&rec->list);
4016                 free(rec);
4017         }
4018         return 0;
4019 }
4020
4021 static int check_owner_ref(struct btrfs_root *root,
4022                             struct extent_record *rec,
4023                             struct extent_buffer *buf)
4024 {
4025         struct extent_backref *node, *tmp;
4026         struct tree_backref *back;
4027         struct btrfs_root *ref_root;
4028         struct btrfs_key key;
4029         struct btrfs_path path;
4030         struct extent_buffer *parent;
4031         int level;
4032         int found = 0;
4033         int ret;
4034
4035         rbtree_postorder_for_each_entry_safe(node, tmp,
4036                                              &rec->backref_tree, node) {
4037                 if (node->is_data)
4038                         continue;
4039                 if (!node->found_ref)
4040                         continue;
4041                 if (node->full_backref)
4042                         continue;
4043                 back = to_tree_backref(node);
4044                 if (btrfs_header_owner(buf) == back->root)
4045                         return 0;
4046         }
4047         BUG_ON(rec->is_root);
4048
4049         /* try to find the block by search corresponding fs tree */
4050         key.objectid = btrfs_header_owner(buf);
4051         key.type = BTRFS_ROOT_ITEM_KEY;
4052         key.offset = (u64)-1;
4053
4054         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4055         if (IS_ERR(ref_root))
4056                 return 1;
4057
4058         level = btrfs_header_level(buf);
4059         if (level == 0)
4060                 btrfs_item_key_to_cpu(buf, &key, 0);
4061         else
4062                 btrfs_node_key_to_cpu(buf, &key, 0);
4063
4064         btrfs_init_path(&path);
4065         path.lowest_level = level + 1;
4066         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4067         if (ret < 0)
4068                 return 0;
4069
4070         parent = path.nodes[level + 1];
4071         if (parent && buf->start == btrfs_node_blockptr(parent,
4072                                                         path.slots[level + 1]))
4073                 found = 1;
4074
4075         btrfs_release_path(&path);
4076         return found ? 0 : 1;
4077 }
4078
4079 static int is_extent_tree_record(struct extent_record *rec)
4080 {
4081         struct extent_backref *ref, *tmp;
4082         struct tree_backref *back;
4083         int is_extent = 0;
4084
4085         rbtree_postorder_for_each_entry_safe(ref, tmp,
4086                                              &rec->backref_tree, node) {
4087                 if (ref->is_data)
4088                         return 0;
4089                 back = to_tree_backref(ref);
4090                 if (ref->full_backref)
4091                         return 0;
4092                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4093                         is_extent = 1;
4094         }
4095         return is_extent;
4096 }
4097
4098
4099 static int record_bad_block_io(struct btrfs_fs_info *info,
4100                                struct cache_tree *extent_cache,
4101                                u64 start, u64 len)
4102 {
4103         struct extent_record *rec;
4104         struct cache_extent *cache;
4105         struct btrfs_key key;
4106
4107         cache = lookup_cache_extent(extent_cache, start, len);
4108         if (!cache)
4109                 return 0;
4110
4111         rec = container_of(cache, struct extent_record, cache);
4112         if (!is_extent_tree_record(rec))
4113                 return 0;
4114
4115         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4116         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4117 }
4118
4119 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4120                        struct extent_buffer *buf, int slot)
4121 {
4122         if (btrfs_header_level(buf)) {
4123                 struct btrfs_key_ptr ptr1, ptr2;
4124
4125                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4126                                    sizeof(struct btrfs_key_ptr));
4127                 read_extent_buffer(buf, &ptr2,
4128                                    btrfs_node_key_ptr_offset(slot + 1),
4129                                    sizeof(struct btrfs_key_ptr));
4130                 write_extent_buffer(buf, &ptr1,
4131                                     btrfs_node_key_ptr_offset(slot + 1),
4132                                     sizeof(struct btrfs_key_ptr));
4133                 write_extent_buffer(buf, &ptr2,
4134                                     btrfs_node_key_ptr_offset(slot),
4135                                     sizeof(struct btrfs_key_ptr));
4136                 if (slot == 0) {
4137                         struct btrfs_disk_key key;
4138                         btrfs_node_key(buf, &key, 0);
4139                         btrfs_fixup_low_keys(root, path, &key,
4140                                              btrfs_header_level(buf) + 1);
4141                 }
4142         } else {
4143                 struct btrfs_item *item1, *item2;
4144                 struct btrfs_key k1, k2;
4145                 char *item1_data, *item2_data;
4146                 u32 item1_offset, item2_offset, item1_size, item2_size;
4147
4148                 item1 = btrfs_item_nr(slot);
4149                 item2 = btrfs_item_nr(slot + 1);
4150                 btrfs_item_key_to_cpu(buf, &k1, slot);
4151                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4152                 item1_offset = btrfs_item_offset(buf, item1);
4153                 item2_offset = btrfs_item_offset(buf, item2);
4154                 item1_size = btrfs_item_size(buf, item1);
4155                 item2_size = btrfs_item_size(buf, item2);
4156
4157                 item1_data = malloc(item1_size);
4158                 if (!item1_data)
4159                         return -ENOMEM;
4160                 item2_data = malloc(item2_size);
4161                 if (!item2_data) {
4162                         free(item1_data);
4163                         return -ENOMEM;
4164                 }
4165
4166                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4167                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4168
4169                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4170                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4171                 free(item1_data);
4172                 free(item2_data);
4173
4174                 btrfs_set_item_offset(buf, item1, item2_offset);
4175                 btrfs_set_item_offset(buf, item2, item1_offset);
4176                 btrfs_set_item_size(buf, item1, item2_size);
4177                 btrfs_set_item_size(buf, item2, item1_size);
4178
4179                 path->slots[0] = slot;
4180                 btrfs_set_item_key_unsafe(root, path, &k2);
4181                 path->slots[0] = slot + 1;
4182                 btrfs_set_item_key_unsafe(root, path, &k1);
4183         }
4184         return 0;
4185 }
4186
4187 static int fix_key_order(struct btrfs_trans_handle *trans,
4188                          struct btrfs_root *root,
4189                          struct btrfs_path *path)
4190 {
4191         struct extent_buffer *buf;
4192         struct btrfs_key k1, k2;
4193         int i;
4194         int level = path->lowest_level;
4195         int ret = -EIO;
4196
4197         buf = path->nodes[level];
4198         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4199                 if (level) {
4200                         btrfs_node_key_to_cpu(buf, &k1, i);
4201                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4202                 } else {
4203                         btrfs_item_key_to_cpu(buf, &k1, i);
4204                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4205                 }
4206                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4207                         continue;
4208                 ret = swap_values(root, path, buf, i);
4209                 if (ret)
4210                         break;
4211                 btrfs_mark_buffer_dirty(buf);
4212                 i = 0;
4213         }
4214         return ret;
4215 }
4216
4217 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4218                              struct btrfs_root *root,
4219                              struct btrfs_path *path,
4220                              struct extent_buffer *buf, int slot)
4221 {
4222         struct btrfs_key key;
4223         int nritems = btrfs_header_nritems(buf);
4224
4225         btrfs_item_key_to_cpu(buf, &key, slot);
4226
4227         /* These are all the keys we can deal with missing. */
4228         if (key.type != BTRFS_DIR_INDEX_KEY &&
4229             key.type != BTRFS_EXTENT_ITEM_KEY &&
4230             key.type != BTRFS_METADATA_ITEM_KEY &&
4231             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4232             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4233                 return -1;
4234
4235         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4236                (unsigned long long)key.objectid, key.type,
4237                (unsigned long long)key.offset, slot, buf->start);
4238         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4239                               btrfs_item_nr_offset(slot + 1),
4240                               sizeof(struct btrfs_item) *
4241                               (nritems - slot - 1));
4242         btrfs_set_header_nritems(buf, nritems - 1);
4243         if (slot == 0) {
4244                 struct btrfs_disk_key disk_key;
4245
4246                 btrfs_item_key(buf, &disk_key, 0);
4247                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4248         }
4249         btrfs_mark_buffer_dirty(buf);
4250         return 0;
4251 }
4252
4253 static int fix_item_offset(struct btrfs_trans_handle *trans,
4254                            struct btrfs_root *root,
4255                            struct btrfs_path *path)
4256 {
4257         struct extent_buffer *buf;
4258         int i;
4259         int ret = 0;
4260
4261         /* We should only get this for leaves */
4262         BUG_ON(path->lowest_level);
4263         buf = path->nodes[0];
4264 again:
4265         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4266                 unsigned int shift = 0, offset;
4267
4268                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4269                     BTRFS_LEAF_DATA_SIZE(root)) {
4270                         if (btrfs_item_end_nr(buf, i) >
4271                             BTRFS_LEAF_DATA_SIZE(root)) {
4272                                 ret = delete_bogus_item(trans, root, path,
4273                                                         buf, i);
4274                                 if (!ret)
4275                                         goto again;
4276                                 fprintf(stderr, "item is off the end of the "
4277                                         "leaf, can't fix\n");
4278                                 ret = -EIO;
4279                                 break;
4280                         }
4281                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4282                                 btrfs_item_end_nr(buf, i);
4283                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4284                            btrfs_item_offset_nr(buf, i - 1)) {
4285                         if (btrfs_item_end_nr(buf, i) >
4286                             btrfs_item_offset_nr(buf, i - 1)) {
4287                                 ret = delete_bogus_item(trans, root, path,
4288                                                         buf, i);
4289                                 if (!ret)
4290                                         goto again;
4291                                 fprintf(stderr, "items overlap, can't fix\n");
4292                                 ret = -EIO;
4293                                 break;
4294                         }
4295                         shift = btrfs_item_offset_nr(buf, i - 1) -
4296                                 btrfs_item_end_nr(buf, i);
4297                 }
4298                 if (!shift)
4299                         continue;
4300
4301                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4302                        i, shift, (unsigned long long)buf->start);
4303                 offset = btrfs_item_offset_nr(buf, i);
4304                 memmove_extent_buffer(buf,
4305                                       btrfs_leaf_data(buf) + offset + shift,
4306                                       btrfs_leaf_data(buf) + offset,
4307                                       btrfs_item_size_nr(buf, i));
4308                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4309                                       offset + shift);
4310                 btrfs_mark_buffer_dirty(buf);
4311         }
4312
4313         /*
4314          * We may have moved things, in which case we want to exit so we don't
4315          * write those changes out.  Once we have proper abort functionality in
4316          * progs this can be changed to something nicer.
4317          */
4318         BUG_ON(ret);
4319         return ret;
4320 }
4321
4322 /*
4323  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4324  * then just return -EIO.
4325  */
4326 static int try_to_fix_bad_block(struct btrfs_root *root,
4327                                 struct extent_buffer *buf,
4328                                 enum btrfs_tree_block_status status)
4329 {
4330         struct btrfs_trans_handle *trans;
4331         struct ulist *roots;
4332         struct ulist_node *node;
4333         struct btrfs_root *search_root;
4334         struct btrfs_path *path;
4335         struct ulist_iterator iter;
4336         struct btrfs_key root_key, key;
4337         int ret;
4338
4339         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4340             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4341                 return -EIO;
4342
4343         path = btrfs_alloc_path();
4344         if (!path)
4345                 return -EIO;
4346
4347         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4348                                    0, &roots);
4349         if (ret) {
4350                 btrfs_free_path(path);
4351                 return -EIO;
4352         }
4353
4354         ULIST_ITER_INIT(&iter);
4355         while ((node = ulist_next(roots, &iter))) {
4356                 root_key.objectid = node->val;
4357                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4358                 root_key.offset = (u64)-1;
4359
4360                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4361                 if (IS_ERR(root)) {
4362                         ret = -EIO;
4363                         break;
4364                 }
4365
4366
4367                 trans = btrfs_start_transaction(search_root, 0);
4368                 if (IS_ERR(trans)) {
4369                         ret = PTR_ERR(trans);
4370                         break;
4371                 }
4372
4373                 path->lowest_level = btrfs_header_level(buf);
4374                 path->skip_check_block = 1;
4375                 if (path->lowest_level)
4376                         btrfs_node_key_to_cpu(buf, &key, 0);
4377                 else
4378                         btrfs_item_key_to_cpu(buf, &key, 0);
4379                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4380                 if (ret) {
4381                         ret = -EIO;
4382                         btrfs_commit_transaction(trans, search_root);
4383                         break;
4384                 }
4385                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4386                         ret = fix_key_order(trans, search_root, path);
4387                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4388                         ret = fix_item_offset(trans, search_root, path);
4389                 if (ret) {
4390                         btrfs_commit_transaction(trans, search_root);
4391                         break;
4392                 }
4393                 btrfs_release_path(path);
4394                 btrfs_commit_transaction(trans, search_root);
4395         }
4396         ulist_free(roots);
4397         btrfs_free_path(path);
4398         return ret;
4399 }
4400
4401 static int check_block(struct btrfs_root *root,
4402                        struct cache_tree *extent_cache,
4403                        struct extent_buffer *buf, u64 flags)
4404 {
4405         struct extent_record *rec;
4406         struct cache_extent *cache;
4407         struct btrfs_key key;
4408         enum btrfs_tree_block_status status;
4409         int ret = 0;
4410         int level;
4411
4412         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4413         if (!cache)
4414                 return 1;
4415         rec = container_of(cache, struct extent_record, cache);
4416         rec->generation = btrfs_header_generation(buf);
4417
4418         level = btrfs_header_level(buf);
4419         if (btrfs_header_nritems(buf) > 0) {
4420
4421                 if (level == 0)
4422                         btrfs_item_key_to_cpu(buf, &key, 0);
4423                 else
4424                         btrfs_node_key_to_cpu(buf, &key, 0);
4425
4426                 rec->info_objectid = key.objectid;
4427         }
4428         rec->info_level = level;
4429
4430         if (btrfs_is_leaf(buf))
4431                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4432         else
4433                 status = btrfs_check_node(root, &rec->parent_key, buf);
4434
4435         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4436                 if (repair)
4437                         status = try_to_fix_bad_block(root, buf, status);
4438                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4439                         ret = -EIO;
4440                         fprintf(stderr, "bad block %llu\n",
4441                                 (unsigned long long)buf->start);
4442                 } else {
4443                         /*
4444                          * Signal to callers we need to start the scan over
4445                          * again since we'll have cowed blocks.
4446                          */
4447                         ret = -EAGAIN;
4448                 }
4449         } else {
4450                 rec->content_checked = 1;
4451                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4452                         rec->owner_ref_checked = 1;
4453                 else {
4454                         ret = check_owner_ref(root, rec, buf);
4455                         if (!ret)
4456                                 rec->owner_ref_checked = 1;
4457                 }
4458         }
4459         if (!ret)
4460                 maybe_free_extent_rec(extent_cache, rec);
4461         return ret;
4462 }
4463
4464
4465 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4466                                                 u64 parent, u64 root)
4467 {
4468         struct rb_node *node;
4469         struct tree_backref *back = NULL;
4470         struct tree_backref match = {
4471                 .node = {
4472                         .is_data = 0,
4473                 },
4474         };
4475
4476         if (parent) {
4477                 match.parent = parent;
4478                 match.node.full_backref = 1;
4479         } else {
4480                 match.root = root;
4481         }
4482
4483         node = rb_search(&rec->backref_tree, &match.node.node,
4484                          (rb_compare_keys)compare_extent_backref, NULL);
4485         if (node)
4486                 back = to_tree_backref(rb_node_to_extent_backref(node));
4487
4488         return back;
4489 }
4490
4491 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4492                                                 u64 parent, u64 root)
4493 {
4494         struct tree_backref *ref = malloc(sizeof(*ref));
4495
4496         if (!ref)
4497                 return NULL;
4498         memset(&ref->node, 0, sizeof(ref->node));
4499         if (parent > 0) {
4500                 ref->parent = parent;
4501                 ref->node.full_backref = 1;
4502         } else {
4503                 ref->root = root;
4504                 ref->node.full_backref = 0;
4505         }
4506         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4507
4508         return ref;
4509 }
4510
4511 static struct data_backref *find_data_backref(struct extent_record *rec,
4512                                                 u64 parent, u64 root,
4513                                                 u64 owner, u64 offset,
4514                                                 int found_ref,
4515                                                 u64 disk_bytenr, u64 bytes)
4516 {
4517         struct rb_node *node;
4518         struct data_backref *back = NULL;
4519         struct data_backref match = {
4520                 .node = {
4521                         .is_data = 1,
4522                 },
4523                 .owner = owner,
4524                 .offset = offset,
4525                 .bytes = bytes,
4526                 .found_ref = found_ref,
4527                 .disk_bytenr = disk_bytenr,
4528         };
4529
4530         if (parent) {
4531                 match.parent = parent;
4532                 match.node.full_backref = 1;
4533         } else {
4534                 match.root = root;
4535         }
4536
4537         node = rb_search(&rec->backref_tree, &match.node.node,
4538                          (rb_compare_keys)compare_extent_backref, NULL);
4539         if (node)
4540                 back = to_data_backref(rb_node_to_extent_backref(node));
4541
4542         return back;
4543 }
4544
4545 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4546                                                 u64 parent, u64 root,
4547                                                 u64 owner, u64 offset,
4548                                                 u64 max_size)
4549 {
4550         struct data_backref *ref = malloc(sizeof(*ref));
4551
4552         if (!ref)
4553                 return NULL;
4554         memset(&ref->node, 0, sizeof(ref->node));
4555         ref->node.is_data = 1;
4556
4557         if (parent > 0) {
4558                 ref->parent = parent;
4559                 ref->owner = 0;
4560                 ref->offset = 0;
4561                 ref->node.full_backref = 1;
4562         } else {
4563                 ref->root = root;
4564                 ref->owner = owner;
4565                 ref->offset = offset;
4566                 ref->node.full_backref = 0;
4567         }
4568         ref->bytes = max_size;
4569         ref->found_ref = 0;
4570         ref->num_refs = 0;
4571         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4572         if (max_size > rec->max_size)
4573                 rec->max_size = max_size;
4574         return ref;
4575 }
4576
4577 /* Check if the type of extent matches with its chunk */
4578 static void check_extent_type(struct extent_record *rec)
4579 {
4580         struct btrfs_block_group_cache *bg_cache;
4581
4582         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4583         if (!bg_cache)
4584                 return;
4585
4586         /* data extent, check chunk directly*/
4587         if (!rec->metadata) {
4588                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4589                         rec->wrong_chunk_type = 1;
4590                 return;
4591         }
4592
4593         /* metadata extent, check the obvious case first */
4594         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4595                                  BTRFS_BLOCK_GROUP_METADATA))) {
4596                 rec->wrong_chunk_type = 1;
4597                 return;
4598         }
4599
4600         /*
4601          * Check SYSTEM extent, as it's also marked as metadata, we can only
4602          * make sure it's a SYSTEM extent by its backref
4603          */
4604         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4605                 struct extent_backref *node;
4606                 struct tree_backref *tback;
4607                 u64 bg_type;
4608
4609                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4610                 if (node->is_data) {
4611                         /* tree block shouldn't have data backref */
4612                         rec->wrong_chunk_type = 1;
4613                         return;
4614                 }
4615                 tback = container_of(node, struct tree_backref, node);
4616
4617                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4618                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4619                 else
4620                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4621                 if (!(bg_cache->flags & bg_type))
4622                         rec->wrong_chunk_type = 1;
4623         }
4624 }
4625
4626 /*
4627  * Allocate a new extent record, fill default values from @tmpl and insert int
4628  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4629  * the cache, otherwise it fails.
4630  */
4631 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4632                 struct extent_record *tmpl)
4633 {
4634         struct extent_record *rec;
4635         int ret = 0;
4636
4637         rec = malloc(sizeof(*rec));
4638         if (!rec)
4639                 return -ENOMEM;
4640         rec->start = tmpl->start;
4641         rec->max_size = tmpl->max_size;
4642         rec->nr = max(tmpl->nr, tmpl->max_size);
4643         rec->found_rec = tmpl->found_rec;
4644         rec->content_checked = tmpl->content_checked;
4645         rec->owner_ref_checked = tmpl->owner_ref_checked;
4646         rec->num_duplicates = 0;
4647         rec->metadata = tmpl->metadata;
4648         rec->flag_block_full_backref = FLAG_UNSET;
4649         rec->bad_full_backref = 0;
4650         rec->crossing_stripes = 0;
4651         rec->wrong_chunk_type = 0;
4652         rec->is_root = tmpl->is_root;
4653         rec->refs = tmpl->refs;
4654         rec->extent_item_refs = tmpl->extent_item_refs;
4655         rec->parent_generation = tmpl->parent_generation;
4656         INIT_LIST_HEAD(&rec->backrefs);
4657         INIT_LIST_HEAD(&rec->dups);
4658         INIT_LIST_HEAD(&rec->list);
4659         rec->backref_tree = RB_ROOT;
4660         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4661         rec->cache.start = tmpl->start;
4662         rec->cache.size = tmpl->nr;
4663         ret = insert_cache_extent(extent_cache, &rec->cache);
4664         BUG_ON(ret);
4665         bytes_used += rec->nr;
4666
4667         if (tmpl->metadata)
4668                 rec->crossing_stripes = check_crossing_stripes(rec->start,
4669                                 global_info->tree_root->nodesize);
4670         check_extent_type(rec);
4671         return ret;
4672 }
4673
4674 /*
4675  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4676  * some are hints:
4677  * - refs              - if found, increase refs
4678  * - is_root           - if found, set
4679  * - content_checked   - if found, set
4680  * - owner_ref_checked - if found, set
4681  *
4682  * If not found, create a new one, initialize and insert.
4683  */
4684 static int add_extent_rec(struct cache_tree *extent_cache,
4685                 struct extent_record *tmpl)
4686 {
4687         struct extent_record *rec;
4688         struct cache_extent *cache;
4689         int ret = 0;
4690         int dup = 0;
4691
4692         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4693         if (cache) {
4694                 rec = container_of(cache, struct extent_record, cache);
4695                 if (tmpl->refs)
4696                         rec->refs++;
4697                 if (rec->nr == 1)
4698                         rec->nr = max(tmpl->nr, tmpl->max_size);
4699
4700                 /*
4701                  * We need to make sure to reset nr to whatever the extent
4702                  * record says was the real size, this way we can compare it to
4703                  * the backrefs.
4704                  */
4705                 if (tmpl->found_rec) {
4706                         if (tmpl->start != rec->start || rec->found_rec) {
4707                                 struct extent_record *tmp;
4708
4709                                 dup = 1;
4710                                 if (list_empty(&rec->list))
4711                                         list_add_tail(&rec->list,
4712                                                       &duplicate_extents);
4713
4714                                 /*
4715                                  * We have to do this song and dance in case we
4716                                  * find an extent record that falls inside of
4717                                  * our current extent record but does not have
4718                                  * the same objectid.
4719                                  */
4720                                 tmp = malloc(sizeof(*tmp));
4721                                 if (!tmp)
4722                                         return -ENOMEM;
4723                                 tmp->start = tmpl->start;
4724                                 tmp->max_size = tmpl->max_size;
4725                                 tmp->nr = tmpl->nr;
4726                                 tmp->found_rec = 1;
4727                                 tmp->metadata = tmpl->metadata;
4728                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4729                                 INIT_LIST_HEAD(&tmp->list);
4730                                 list_add_tail(&tmp->list, &rec->dups);
4731                                 rec->num_duplicates++;
4732                         } else {
4733                                 rec->nr = tmpl->nr;
4734                                 rec->found_rec = 1;
4735                         }
4736                 }
4737
4738                 if (tmpl->extent_item_refs && !dup) {
4739                         if (rec->extent_item_refs) {
4740                                 fprintf(stderr, "block %llu rec "
4741                                         "extent_item_refs %llu, passed %llu\n",
4742                                         (unsigned long long)tmpl->start,
4743                                         (unsigned long long)
4744                                                         rec->extent_item_refs,
4745                                         (unsigned long long)tmpl->extent_item_refs);
4746                         }
4747                         rec->extent_item_refs = tmpl->extent_item_refs;
4748                 }
4749                 if (tmpl->is_root)
4750                         rec->is_root = 1;
4751                 if (tmpl->content_checked)
4752                         rec->content_checked = 1;
4753                 if (tmpl->owner_ref_checked)
4754                         rec->owner_ref_checked = 1;
4755                 memcpy(&rec->parent_key, &tmpl->parent_key,
4756                                 sizeof(tmpl->parent_key));
4757                 if (tmpl->parent_generation)
4758                         rec->parent_generation = tmpl->parent_generation;
4759                 if (rec->max_size < tmpl->max_size)
4760                         rec->max_size = tmpl->max_size;
4761
4762                 /*
4763                  * A metadata extent can't cross stripe_len boundary, otherwise
4764                  * kernel scrub won't be able to handle it.
4765                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4766                  * it.
4767                  */
4768                 if (tmpl->metadata)
4769                         rec->crossing_stripes = check_crossing_stripes(
4770                                 rec->start, global_info->tree_root->nodesize);
4771                 check_extent_type(rec);
4772                 maybe_free_extent_rec(extent_cache, rec);
4773                 return ret;
4774         }
4775
4776         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4777
4778         return ret;
4779 }
4780
4781 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4782                             u64 parent, u64 root, int found_ref)
4783 {
4784         struct extent_record *rec;
4785         struct tree_backref *back;
4786         struct cache_extent *cache;
4787
4788         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4789         if (!cache) {
4790                 struct extent_record tmpl;
4791
4792                 memset(&tmpl, 0, sizeof(tmpl));
4793                 tmpl.start = bytenr;
4794                 tmpl.nr = 1;
4795                 tmpl.metadata = 1;
4796
4797                 add_extent_rec_nolookup(extent_cache, &tmpl);
4798
4799                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4800                 if (!cache)
4801                         abort();
4802         }
4803
4804         rec = container_of(cache, struct extent_record, cache);
4805         if (rec->start != bytenr) {
4806                 abort();
4807         }
4808
4809         back = find_tree_backref(rec, parent, root);
4810         if (!back) {
4811                 back = alloc_tree_backref(rec, parent, root);
4812                 BUG_ON(!back);
4813         }
4814
4815         if (found_ref) {
4816                 if (back->node.found_ref) {
4817                         fprintf(stderr, "Extent back ref already exists "
4818                                 "for %llu parent %llu root %llu \n",
4819                                 (unsigned long long)bytenr,
4820                                 (unsigned long long)parent,
4821                                 (unsigned long long)root);
4822                 }
4823                 back->node.found_ref = 1;
4824         } else {
4825                 if (back->node.found_extent_tree) {
4826                         fprintf(stderr, "Extent back ref already exists "
4827                                 "for %llu parent %llu root %llu \n",
4828                                 (unsigned long long)bytenr,
4829                                 (unsigned long long)parent,
4830                                 (unsigned long long)root);
4831                 }
4832                 back->node.found_extent_tree = 1;
4833         }
4834         check_extent_type(rec);
4835         maybe_free_extent_rec(extent_cache, rec);
4836         return 0;
4837 }
4838
4839 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4840                             u64 parent, u64 root, u64 owner, u64 offset,
4841                             u32 num_refs, int found_ref, u64 max_size)
4842 {
4843         struct extent_record *rec;
4844         struct data_backref *back;
4845         struct cache_extent *cache;
4846
4847         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4848         if (!cache) {
4849                 struct extent_record tmpl;
4850
4851                 memset(&tmpl, 0, sizeof(tmpl));
4852                 tmpl.start = bytenr;
4853                 tmpl.nr = 1;
4854                 tmpl.max_size = max_size;
4855
4856                 add_extent_rec_nolookup(extent_cache, &tmpl);
4857
4858                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4859                 if (!cache)
4860                         abort();
4861         }
4862
4863         rec = container_of(cache, struct extent_record, cache);
4864         if (rec->max_size < max_size)
4865                 rec->max_size = max_size;
4866
4867         /*
4868          * If found_ref is set then max_size is the real size and must match the
4869          * existing refs.  So if we have already found a ref then we need to
4870          * make sure that this ref matches the existing one, otherwise we need
4871          * to add a new backref so we can notice that the backrefs don't match
4872          * and we need to figure out who is telling the truth.  This is to
4873          * account for that awful fsync bug I introduced where we'd end up with
4874          * a btrfs_file_extent_item that would have its length include multiple
4875          * prealloc extents or point inside of a prealloc extent.
4876          */
4877         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4878                                  bytenr, max_size);
4879         if (!back) {
4880                 back = alloc_data_backref(rec, parent, root, owner, offset,
4881                                           max_size);
4882                 BUG_ON(!back);
4883         }
4884
4885         if (found_ref) {
4886                 BUG_ON(num_refs != 1);
4887                 if (back->node.found_ref)
4888                         BUG_ON(back->bytes != max_size);
4889                 back->node.found_ref = 1;
4890                 back->found_ref += 1;
4891                 back->bytes = max_size;
4892                 back->disk_bytenr = bytenr;
4893                 rec->refs += 1;
4894                 rec->content_checked = 1;
4895                 rec->owner_ref_checked = 1;
4896         } else {
4897                 if (back->node.found_extent_tree) {
4898                         fprintf(stderr, "Extent back ref already exists "
4899                                 "for %llu parent %llu root %llu "
4900                                 "owner %llu offset %llu num_refs %lu\n",
4901                                 (unsigned long long)bytenr,
4902                                 (unsigned long long)parent,
4903                                 (unsigned long long)root,
4904                                 (unsigned long long)owner,
4905                                 (unsigned long long)offset,
4906                                 (unsigned long)num_refs);
4907                 }
4908                 back->num_refs = num_refs;
4909                 back->node.found_extent_tree = 1;
4910         }
4911         maybe_free_extent_rec(extent_cache, rec);
4912         return 0;
4913 }
4914
4915 static int add_pending(struct cache_tree *pending,
4916                        struct cache_tree *seen, u64 bytenr, u32 size)
4917 {
4918         int ret;
4919         ret = add_cache_extent(seen, bytenr, size);
4920         if (ret)
4921                 return ret;
4922         add_cache_extent(pending, bytenr, size);
4923         return 0;
4924 }
4925
4926 static int pick_next_pending(struct cache_tree *pending,
4927                         struct cache_tree *reada,
4928                         struct cache_tree *nodes,
4929                         u64 last, struct block_info *bits, int bits_nr,
4930                         int *reada_bits)
4931 {
4932         unsigned long node_start = last;
4933         struct cache_extent *cache;
4934         int ret;
4935
4936         cache = search_cache_extent(reada, 0);
4937         if (cache) {
4938                 bits[0].start = cache->start;
4939                 bits[0].size = cache->size;
4940                 *reada_bits = 1;
4941                 return 1;
4942         }
4943         *reada_bits = 0;
4944         if (node_start > 32768)
4945                 node_start -= 32768;
4946
4947         cache = search_cache_extent(nodes, node_start);
4948         if (!cache)
4949                 cache = search_cache_extent(nodes, 0);
4950
4951         if (!cache) {
4952                  cache = search_cache_extent(pending, 0);
4953                  if (!cache)
4954                          return 0;
4955                  ret = 0;
4956                  do {
4957                          bits[ret].start = cache->start;
4958                          bits[ret].size = cache->size;
4959                          cache = next_cache_extent(cache);
4960                          ret++;
4961                  } while (cache && ret < bits_nr);
4962                  return ret;
4963         }
4964
4965         ret = 0;
4966         do {
4967                 bits[ret].start = cache->start;
4968                 bits[ret].size = cache->size;
4969                 cache = next_cache_extent(cache);
4970                 ret++;
4971         } while (cache && ret < bits_nr);
4972
4973         if (bits_nr - ret > 8) {
4974                 u64 lookup = bits[0].start + bits[0].size;
4975                 struct cache_extent *next;
4976                 next = search_cache_extent(pending, lookup);
4977                 while(next) {
4978                         if (next->start - lookup > 32768)
4979                                 break;
4980                         bits[ret].start = next->start;
4981                         bits[ret].size = next->size;
4982                         lookup = next->start + next->size;
4983                         ret++;
4984                         if (ret == bits_nr)
4985                                 break;
4986                         next = next_cache_extent(next);
4987                         if (!next)
4988                                 break;
4989                 }
4990         }
4991         return ret;
4992 }
4993
4994 static void free_chunk_record(struct cache_extent *cache)
4995 {
4996         struct chunk_record *rec;
4997
4998         rec = container_of(cache, struct chunk_record, cache);
4999         list_del_init(&rec->list);
5000         list_del_init(&rec->dextents);
5001         free(rec);
5002 }
5003
5004 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5005 {
5006         cache_tree_free_extents(chunk_cache, free_chunk_record);
5007 }
5008
5009 static void free_device_record(struct rb_node *node)
5010 {
5011         struct device_record *rec;
5012
5013         rec = container_of(node, struct device_record, node);
5014         free(rec);
5015 }
5016
5017 FREE_RB_BASED_TREE(device_cache, free_device_record);
5018
5019 int insert_block_group_record(struct block_group_tree *tree,
5020                               struct block_group_record *bg_rec)
5021 {
5022         int ret;
5023
5024         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5025         if (ret)
5026                 return ret;
5027
5028         list_add_tail(&bg_rec->list, &tree->block_groups);
5029         return 0;
5030 }
5031
5032 static void free_block_group_record(struct cache_extent *cache)
5033 {
5034         struct block_group_record *rec;
5035
5036         rec = container_of(cache, struct block_group_record, cache);
5037         list_del_init(&rec->list);
5038         free(rec);
5039 }
5040
5041 void free_block_group_tree(struct block_group_tree *tree)
5042 {
5043         cache_tree_free_extents(&tree->tree, free_block_group_record);
5044 }
5045
5046 int insert_device_extent_record(struct device_extent_tree *tree,
5047                                 struct device_extent_record *de_rec)
5048 {
5049         int ret;
5050
5051         /*
5052          * Device extent is a bit different from the other extents, because
5053          * the extents which belong to the different devices may have the
5054          * same start and size, so we need use the special extent cache
5055          * search/insert functions.
5056          */
5057         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5058         if (ret)
5059                 return ret;
5060
5061         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5062         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5063         return 0;
5064 }
5065
5066 static void free_device_extent_record(struct cache_extent *cache)
5067 {
5068         struct device_extent_record *rec;
5069
5070         rec = container_of(cache, struct device_extent_record, cache);
5071         if (!list_empty(&rec->chunk_list))
5072                 list_del_init(&rec->chunk_list);
5073         if (!list_empty(&rec->device_list))
5074                 list_del_init(&rec->device_list);
5075         free(rec);
5076 }
5077
5078 void free_device_extent_tree(struct device_extent_tree *tree)
5079 {
5080         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5081 }
5082
5083 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5084 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5085                                  struct extent_buffer *leaf, int slot)
5086 {
5087         struct btrfs_extent_ref_v0 *ref0;
5088         struct btrfs_key key;
5089
5090         btrfs_item_key_to_cpu(leaf, &key, slot);
5091         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5092         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5093                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5094         } else {
5095                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5096                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5097         }
5098         return 0;
5099 }
5100 #endif
5101
5102 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5103                                             struct btrfs_key *key,
5104                                             int slot)
5105 {
5106         struct btrfs_chunk *ptr;
5107         struct chunk_record *rec;
5108         int num_stripes, i;
5109
5110         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5111         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5112
5113         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5114         if (!rec) {
5115                 fprintf(stderr, "memory allocation failed\n");
5116                 exit(-1);
5117         }
5118
5119         INIT_LIST_HEAD(&rec->list);
5120         INIT_LIST_HEAD(&rec->dextents);
5121         rec->bg_rec = NULL;
5122
5123         rec->cache.start = key->offset;
5124         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5125
5126         rec->generation = btrfs_header_generation(leaf);
5127
5128         rec->objectid = key->objectid;
5129         rec->type = key->type;
5130         rec->offset = key->offset;
5131
5132         rec->length = rec->cache.size;
5133         rec->owner = btrfs_chunk_owner(leaf, ptr);
5134         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5135         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5136         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5137         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5138         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5139         rec->num_stripes = num_stripes;
5140         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5141
5142         for (i = 0; i < rec->num_stripes; ++i) {
5143                 rec->stripes[i].devid =
5144                         btrfs_stripe_devid_nr(leaf, ptr, i);
5145                 rec->stripes[i].offset =
5146                         btrfs_stripe_offset_nr(leaf, ptr, i);
5147                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5148                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5149                                 BTRFS_UUID_SIZE);
5150         }
5151
5152         return rec;
5153 }
5154
5155 static int process_chunk_item(struct cache_tree *chunk_cache,
5156                               struct btrfs_key *key, struct extent_buffer *eb,
5157                               int slot)
5158 {
5159         struct chunk_record *rec;
5160         int ret = 0;
5161
5162         rec = btrfs_new_chunk_record(eb, key, slot);
5163         ret = insert_cache_extent(chunk_cache, &rec->cache);
5164         if (ret) {
5165                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5166                         rec->offset, rec->length);
5167                 free(rec);
5168         }
5169
5170         return ret;
5171 }
5172
5173 static int process_device_item(struct rb_root *dev_cache,
5174                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5175 {
5176         struct btrfs_dev_item *ptr;
5177         struct device_record *rec;
5178         int ret = 0;
5179
5180         ptr = btrfs_item_ptr(eb,
5181                 slot, struct btrfs_dev_item);
5182
5183         rec = malloc(sizeof(*rec));
5184         if (!rec) {
5185                 fprintf(stderr, "memory allocation failed\n");
5186                 return -ENOMEM;
5187         }
5188
5189         rec->devid = key->offset;
5190         rec->generation = btrfs_header_generation(eb);
5191
5192         rec->objectid = key->objectid;
5193         rec->type = key->type;
5194         rec->offset = key->offset;
5195
5196         rec->devid = btrfs_device_id(eb, ptr);
5197         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5198         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5199
5200         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5201         if (ret) {
5202                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5203                 free(rec);
5204         }
5205
5206         return ret;
5207 }
5208
5209 struct block_group_record *
5210 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5211                              int slot)
5212 {
5213         struct btrfs_block_group_item *ptr;
5214         struct block_group_record *rec;
5215
5216         rec = calloc(1, sizeof(*rec));
5217         if (!rec) {
5218                 fprintf(stderr, "memory allocation failed\n");
5219                 exit(-1);
5220         }
5221
5222         rec->cache.start = key->objectid;
5223         rec->cache.size = key->offset;
5224
5225         rec->generation = btrfs_header_generation(leaf);
5226
5227         rec->objectid = key->objectid;
5228         rec->type = key->type;
5229         rec->offset = key->offset;
5230
5231         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5232         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5233
5234         INIT_LIST_HEAD(&rec->list);
5235
5236         return rec;
5237 }
5238
5239 static int process_block_group_item(struct block_group_tree *block_group_cache,
5240                                     struct btrfs_key *key,
5241                                     struct extent_buffer *eb, int slot)
5242 {
5243         struct block_group_record *rec;
5244         int ret = 0;
5245
5246         rec = btrfs_new_block_group_record(eb, key, slot);
5247         ret = insert_block_group_record(block_group_cache, rec);
5248         if (ret) {
5249                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5250                         rec->objectid, rec->offset);
5251                 free(rec);
5252         }
5253
5254         return ret;
5255 }
5256
5257 struct device_extent_record *
5258 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5259                                struct btrfs_key *key, int slot)
5260 {
5261         struct device_extent_record *rec;
5262         struct btrfs_dev_extent *ptr;
5263
5264         rec = calloc(1, sizeof(*rec));
5265         if (!rec) {
5266                 fprintf(stderr, "memory allocation failed\n");
5267                 exit(-1);
5268         }
5269
5270         rec->cache.objectid = key->objectid;
5271         rec->cache.start = key->offset;
5272
5273         rec->generation = btrfs_header_generation(leaf);
5274
5275         rec->objectid = key->objectid;
5276         rec->type = key->type;
5277         rec->offset = key->offset;
5278
5279         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5280         rec->chunk_objecteid =
5281                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5282         rec->chunk_offset =
5283                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5284         rec->length = btrfs_dev_extent_length(leaf, ptr);
5285         rec->cache.size = rec->length;
5286
5287         INIT_LIST_HEAD(&rec->chunk_list);
5288         INIT_LIST_HEAD(&rec->device_list);
5289
5290         return rec;
5291 }
5292
5293 static int
5294 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5295                            struct btrfs_key *key, struct extent_buffer *eb,
5296                            int slot)
5297 {
5298         struct device_extent_record *rec;
5299         int ret;
5300
5301         rec = btrfs_new_device_extent_record(eb, key, slot);
5302         ret = insert_device_extent_record(dev_extent_cache, rec);
5303         if (ret) {
5304                 fprintf(stderr,
5305                         "Device extent[%llu, %llu, %llu] existed.\n",
5306                         rec->objectid, rec->offset, rec->length);
5307                 free(rec);
5308         }
5309
5310         return ret;
5311 }
5312
5313 static int process_extent_item(struct btrfs_root *root,
5314                                struct cache_tree *extent_cache,
5315                                struct extent_buffer *eb, int slot)
5316 {
5317         struct btrfs_extent_item *ei;
5318         struct btrfs_extent_inline_ref *iref;
5319         struct btrfs_extent_data_ref *dref;
5320         struct btrfs_shared_data_ref *sref;
5321         struct btrfs_key key;
5322         struct extent_record tmpl;
5323         unsigned long end;
5324         unsigned long ptr;
5325         int type;
5326         u32 item_size = btrfs_item_size_nr(eb, slot);
5327         u64 refs = 0;
5328         u64 offset;
5329         u64 num_bytes;
5330         int metadata = 0;
5331
5332         btrfs_item_key_to_cpu(eb, &key, slot);
5333
5334         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5335                 metadata = 1;
5336                 num_bytes = root->nodesize;
5337         } else {
5338                 num_bytes = key.offset;
5339         }
5340
5341         if (item_size < sizeof(*ei)) {
5342 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5343                 struct btrfs_extent_item_v0 *ei0;
5344                 BUG_ON(item_size != sizeof(*ei0));
5345                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5346                 refs = btrfs_extent_refs_v0(eb, ei0);
5347 #else
5348                 BUG();
5349 #endif
5350                 memset(&tmpl, 0, sizeof(tmpl));
5351                 tmpl.start = key.objectid;
5352                 tmpl.nr = num_bytes;
5353                 tmpl.extent_item_refs = refs;
5354                 tmpl.metadata = metadata;
5355                 tmpl.found_rec = 1;
5356                 tmpl.max_size = num_bytes;
5357
5358                 return add_extent_rec(extent_cache, &tmpl);
5359         }
5360
5361         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5362         refs = btrfs_extent_refs(eb, ei);
5363         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5364                 metadata = 1;
5365         else
5366                 metadata = 0;
5367
5368         memset(&tmpl, 0, sizeof(tmpl));
5369         tmpl.start = key.objectid;
5370         tmpl.nr = num_bytes;
5371         tmpl.extent_item_refs = refs;
5372         tmpl.metadata = metadata;
5373         tmpl.found_rec = 1;
5374         tmpl.max_size = num_bytes;
5375         add_extent_rec(extent_cache, &tmpl);
5376
5377         ptr = (unsigned long)(ei + 1);
5378         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5379             key.type == BTRFS_EXTENT_ITEM_KEY)
5380                 ptr += sizeof(struct btrfs_tree_block_info);
5381
5382         end = (unsigned long)ei + item_size;
5383         while (ptr < end) {
5384                 iref = (struct btrfs_extent_inline_ref *)ptr;
5385                 type = btrfs_extent_inline_ref_type(eb, iref);
5386                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5387                 switch (type) {
5388                 case BTRFS_TREE_BLOCK_REF_KEY:
5389                         add_tree_backref(extent_cache, key.objectid,
5390                                          0, offset, 0);
5391                         break;
5392                 case BTRFS_SHARED_BLOCK_REF_KEY:
5393                         add_tree_backref(extent_cache, key.objectid,
5394                                          offset, 0, 0);
5395                         break;
5396                 case BTRFS_EXTENT_DATA_REF_KEY:
5397                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5398                         add_data_backref(extent_cache, key.objectid, 0,
5399                                         btrfs_extent_data_ref_root(eb, dref),
5400                                         btrfs_extent_data_ref_objectid(eb,
5401                                                                        dref),
5402                                         btrfs_extent_data_ref_offset(eb, dref),
5403                                         btrfs_extent_data_ref_count(eb, dref),
5404                                         0, num_bytes);
5405                         break;
5406                 case BTRFS_SHARED_DATA_REF_KEY:
5407                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5408                         add_data_backref(extent_cache, key.objectid, offset,
5409                                         0, 0, 0,
5410                                         btrfs_shared_data_ref_count(eb, sref),
5411                                         0, num_bytes);
5412                         break;
5413                 default:
5414                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5415                                 key.objectid, key.type, num_bytes);
5416                         goto out;
5417                 }
5418                 ptr += btrfs_extent_inline_ref_size(type);
5419         }
5420         WARN_ON(ptr > end);
5421 out:
5422         return 0;
5423 }
5424
5425 static int check_cache_range(struct btrfs_root *root,
5426                              struct btrfs_block_group_cache *cache,
5427                              u64 offset, u64 bytes)
5428 {
5429         struct btrfs_free_space *entry;
5430         u64 *logical;
5431         u64 bytenr;
5432         int stripe_len;
5433         int i, nr, ret;
5434
5435         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5436                 bytenr = btrfs_sb_offset(i);
5437                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5438                                        cache->key.objectid, bytenr, 0,
5439                                        &logical, &nr, &stripe_len);
5440                 if (ret)
5441                         return ret;
5442
5443                 while (nr--) {
5444                         if (logical[nr] + stripe_len <= offset)
5445                                 continue;
5446                         if (offset + bytes <= logical[nr])
5447                                 continue;
5448                         if (logical[nr] == offset) {
5449                                 if (stripe_len >= bytes) {
5450                                         kfree(logical);
5451                                         return 0;
5452                                 }
5453                                 bytes -= stripe_len;
5454                                 offset += stripe_len;
5455                         } else if (logical[nr] < offset) {
5456                                 if (logical[nr] + stripe_len >=
5457                                     offset + bytes) {
5458                                         kfree(logical);
5459                                         return 0;
5460                                 }
5461                                 bytes = (offset + bytes) -
5462                                         (logical[nr] + stripe_len);
5463                                 offset = logical[nr] + stripe_len;
5464                         } else {
5465                                 /*
5466                                  * Could be tricky, the super may land in the
5467                                  * middle of the area we're checking.  First
5468                                  * check the easiest case, it's at the end.
5469                                  */
5470                                 if (logical[nr] + stripe_len >=
5471                                     bytes + offset) {
5472                                         bytes = logical[nr] - offset;
5473                                         continue;
5474                                 }
5475
5476                                 /* Check the left side */
5477                                 ret = check_cache_range(root, cache,
5478                                                         offset,
5479                                                         logical[nr] - offset);
5480                                 if (ret) {
5481                                         kfree(logical);
5482                                         return ret;
5483                                 }
5484
5485                                 /* Now we continue with the right side */
5486                                 bytes = (offset + bytes) -
5487                                         (logical[nr] + stripe_len);
5488                                 offset = logical[nr] + stripe_len;
5489                         }
5490                 }
5491
5492                 kfree(logical);
5493         }
5494
5495         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5496         if (!entry) {
5497                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5498                         offset, offset+bytes);
5499                 return -EINVAL;
5500         }
5501
5502         if (entry->offset != offset) {
5503                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5504                         entry->offset);
5505                 return -EINVAL;
5506         }
5507
5508         if (entry->bytes != bytes) {
5509                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5510                         bytes, entry->bytes, offset);
5511                 return -EINVAL;
5512         }
5513
5514         unlink_free_space(cache->free_space_ctl, entry);
5515         free(entry);
5516         return 0;
5517 }
5518
5519 static int verify_space_cache(struct btrfs_root *root,
5520                               struct btrfs_block_group_cache *cache)
5521 {
5522         struct btrfs_path *path;
5523         struct extent_buffer *leaf;
5524         struct btrfs_key key;
5525         u64 last;
5526         int ret = 0;
5527
5528         path = btrfs_alloc_path();
5529         if (!path)
5530                 return -ENOMEM;
5531
5532         root = root->fs_info->extent_root;
5533
5534         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5535
5536         key.objectid = last;
5537         key.offset = 0;
5538         key.type = BTRFS_EXTENT_ITEM_KEY;
5539
5540         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5541         if (ret < 0)
5542                 goto out;
5543         ret = 0;
5544         while (1) {
5545                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5546                         ret = btrfs_next_leaf(root, path);
5547                         if (ret < 0)
5548                                 goto out;
5549                         if (ret > 0) {
5550                                 ret = 0;
5551                                 break;
5552                         }
5553                 }
5554                 leaf = path->nodes[0];
5555                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5556                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5557                         break;
5558                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5559                     key.type != BTRFS_METADATA_ITEM_KEY) {
5560                         path->slots[0]++;
5561                         continue;
5562                 }
5563
5564                 if (last == key.objectid) {
5565                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5566                                 last = key.objectid + key.offset;
5567                         else
5568                                 last = key.objectid + root->nodesize;
5569                         path->slots[0]++;
5570                         continue;
5571                 }
5572
5573                 ret = check_cache_range(root, cache, last,
5574                                         key.objectid - last);
5575                 if (ret)
5576                         break;
5577                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5578                         last = key.objectid + key.offset;
5579                 else
5580                         last = key.objectid + root->nodesize;
5581                 path->slots[0]++;
5582         }
5583
5584         if (last < cache->key.objectid + cache->key.offset)
5585                 ret = check_cache_range(root, cache, last,
5586                                         cache->key.objectid +
5587                                         cache->key.offset - last);
5588
5589 out:
5590         btrfs_free_path(path);
5591
5592         if (!ret &&
5593             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5594                 fprintf(stderr, "There are still entries left in the space "
5595                         "cache\n");
5596                 ret = -EINVAL;
5597         }
5598
5599         return ret;
5600 }
5601
5602 static int check_space_cache(struct btrfs_root *root)
5603 {
5604         struct btrfs_block_group_cache *cache;
5605         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5606         int ret;
5607         int error = 0;
5608
5609         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5610             btrfs_super_generation(root->fs_info->super_copy) !=
5611             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5612                 printf("cache and super generation don't match, space cache "
5613                        "will be invalidated\n");
5614                 return 0;
5615         }
5616
5617         if (ctx.progress_enabled) {
5618                 ctx.tp = TASK_FREE_SPACE;
5619                 task_start(ctx.info);
5620         }
5621
5622         while (1) {
5623                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5624                 if (!cache)
5625                         break;
5626
5627                 start = cache->key.objectid + cache->key.offset;
5628                 if (!cache->free_space_ctl) {
5629                         if (btrfs_init_free_space_ctl(cache,
5630                                                       root->sectorsize)) {
5631                                 ret = -ENOMEM;
5632                                 break;
5633                         }
5634                 } else {
5635                         btrfs_remove_free_space_cache(cache);
5636                 }
5637
5638                 if (btrfs_fs_compat_ro(root->fs_info,
5639                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5640                         ret = exclude_super_stripes(root, cache);
5641                         if (ret) {
5642                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5643                                         strerror(-ret));
5644                                 error++;
5645                                 continue;
5646                         }
5647                         ret = load_free_space_tree(root->fs_info, cache);
5648                         free_excluded_extents(root, cache);
5649                         if (ret < 0) {
5650                                 fprintf(stderr, "could not load free space tree: %s\n",
5651                                         strerror(-ret));
5652                                 error++;
5653                                 continue;
5654                         }
5655                         error += ret;
5656                 } else {
5657                         ret = load_free_space_cache(root->fs_info, cache);
5658                         if (!ret)
5659                                 continue;
5660                 }
5661
5662                 ret = verify_space_cache(root, cache);
5663                 if (ret) {
5664                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5665                                 cache->key.objectid);
5666                         error++;
5667                 }
5668         }
5669
5670         task_stop(ctx.info);
5671
5672         return error ? -EINVAL : 0;
5673 }
5674
5675 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5676                         u64 num_bytes, unsigned long leaf_offset,
5677                         struct extent_buffer *eb) {
5678
5679         u64 offset = 0;
5680         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5681         char *data;
5682         unsigned long csum_offset;
5683         u32 csum;
5684         u32 csum_expected;
5685         u64 read_len;
5686         u64 data_checked = 0;
5687         u64 tmp;
5688         int ret = 0;
5689         int mirror;
5690         int num_copies;
5691
5692         if (num_bytes % root->sectorsize)
5693                 return -EINVAL;
5694
5695         data = malloc(num_bytes);
5696         if (!data)
5697                 return -ENOMEM;
5698
5699         while (offset < num_bytes) {
5700                 mirror = 0;
5701 again:
5702                 read_len = num_bytes - offset;
5703                 /* read as much space once a time */
5704                 ret = read_extent_data(root, data + offset,
5705                                 bytenr + offset, &read_len, mirror);
5706                 if (ret)
5707                         goto out;
5708                 data_checked = 0;
5709                 /* verify every 4k data's checksum */
5710                 while (data_checked < read_len) {
5711                         csum = ~(u32)0;
5712                         tmp = offset + data_checked;
5713
5714                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5715                                                csum, root->sectorsize);
5716                         btrfs_csum_final(csum, (char *)&csum);
5717
5718                         csum_offset = leaf_offset +
5719                                  tmp / root->sectorsize * csum_size;
5720                         read_extent_buffer(eb, (char *)&csum_expected,
5721                                            csum_offset, csum_size);
5722                         /* try another mirror */
5723                         if (csum != csum_expected) {
5724                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5725                                                 mirror, bytenr + tmp,
5726                                                 csum, csum_expected);
5727                                 num_copies = btrfs_num_copies(
5728                                                 &root->fs_info->mapping_tree,
5729                                                 bytenr, num_bytes);
5730                                 if (mirror < num_copies - 1) {
5731                                         mirror += 1;
5732                                         goto again;
5733                                 }
5734                         }
5735                         data_checked += root->sectorsize;
5736                 }
5737                 offset += read_len;
5738         }
5739 out:
5740         free(data);
5741         return ret;
5742 }
5743
5744 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5745                                u64 num_bytes)
5746 {
5747         struct btrfs_path *path;
5748         struct extent_buffer *leaf;
5749         struct btrfs_key key;
5750         int ret;
5751
5752         path = btrfs_alloc_path();
5753         if (!path) {
5754                 fprintf(stderr, "Error allocating path\n");
5755                 return -ENOMEM;
5756         }
5757
5758         key.objectid = bytenr;
5759         key.type = BTRFS_EXTENT_ITEM_KEY;
5760         key.offset = (u64)-1;
5761
5762 again:
5763         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5764                                 0, 0);
5765         if (ret < 0) {
5766                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5767                 btrfs_free_path(path);
5768                 return ret;
5769         } else if (ret) {
5770                 if (path->slots[0] > 0) {
5771                         path->slots[0]--;
5772                 } else {
5773                         ret = btrfs_prev_leaf(root, path);
5774                         if (ret < 0) {
5775                                 goto out;
5776                         } else if (ret > 0) {
5777                                 ret = 0;
5778                                 goto out;
5779                         }
5780                 }
5781         }
5782
5783         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5784
5785         /*
5786          * Block group items come before extent items if they have the same
5787          * bytenr, so walk back one more just in case.  Dear future traveller,
5788          * first congrats on mastering time travel.  Now if it's not too much
5789          * trouble could you go back to 2006 and tell Chris to make the
5790          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5791          * EXTENT_ITEM_KEY please?
5792          */
5793         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5794                 if (path->slots[0] > 0) {
5795                         path->slots[0]--;
5796                 } else {
5797                         ret = btrfs_prev_leaf(root, path);
5798                         if (ret < 0) {
5799                                 goto out;
5800                         } else if (ret > 0) {
5801                                 ret = 0;
5802                                 goto out;
5803                         }
5804                 }
5805                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5806         }
5807
5808         while (num_bytes) {
5809                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5810                         ret = btrfs_next_leaf(root, path);
5811                         if (ret < 0) {
5812                                 fprintf(stderr, "Error going to next leaf "
5813                                         "%d\n", ret);
5814                                 btrfs_free_path(path);
5815                                 return ret;
5816                         } else if (ret) {
5817                                 break;
5818                         }
5819                 }
5820                 leaf = path->nodes[0];
5821                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5822                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5823                         path->slots[0]++;
5824                         continue;
5825                 }
5826                 if (key.objectid + key.offset < bytenr) {
5827                         path->slots[0]++;
5828                         continue;
5829                 }
5830                 if (key.objectid > bytenr + num_bytes)
5831                         break;
5832
5833                 if (key.objectid == bytenr) {
5834                         if (key.offset >= num_bytes) {
5835                                 num_bytes = 0;
5836                                 break;
5837                         }
5838                         num_bytes -= key.offset;
5839                         bytenr += key.offset;
5840                 } else if (key.objectid < bytenr) {
5841                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5842                                 num_bytes = 0;
5843                                 break;
5844                         }
5845                         num_bytes = (bytenr + num_bytes) -
5846                                 (key.objectid + key.offset);
5847                         bytenr = key.objectid + key.offset;
5848                 } else {
5849                         if (key.objectid + key.offset < bytenr + num_bytes) {
5850                                 u64 new_start = key.objectid + key.offset;
5851                                 u64 new_bytes = bytenr + num_bytes - new_start;
5852
5853                                 /*
5854                                  * Weird case, the extent is in the middle of
5855                                  * our range, we'll have to search one side
5856                                  * and then the other.  Not sure if this happens
5857                                  * in real life, but no harm in coding it up
5858                                  * anyway just in case.
5859                                  */
5860                                 btrfs_release_path(path);
5861                                 ret = check_extent_exists(root, new_start,
5862                                                           new_bytes);
5863                                 if (ret) {
5864                                         fprintf(stderr, "Right section didn't "
5865                                                 "have a record\n");
5866                                         break;
5867                                 }
5868                                 num_bytes = key.objectid - bytenr;
5869                                 goto again;
5870                         }
5871                         num_bytes = key.objectid - bytenr;
5872                 }
5873                 path->slots[0]++;
5874         }
5875         ret = 0;
5876
5877 out:
5878         if (num_bytes && !ret) {
5879                 fprintf(stderr, "There are no extents for csum range "
5880                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5881                 ret = 1;
5882         }
5883
5884         btrfs_free_path(path);
5885         return ret;
5886 }
5887
5888 static int check_csums(struct btrfs_root *root)
5889 {
5890         struct btrfs_path *path;
5891         struct extent_buffer *leaf;
5892         struct btrfs_key key;
5893         u64 offset = 0, num_bytes = 0;
5894         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5895         int errors = 0;
5896         int ret;
5897         u64 data_len;
5898         unsigned long leaf_offset;
5899
5900         root = root->fs_info->csum_root;
5901         if (!extent_buffer_uptodate(root->node)) {
5902                 fprintf(stderr, "No valid csum tree found\n");
5903                 return -ENOENT;
5904         }
5905
5906         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5907         key.type = BTRFS_EXTENT_CSUM_KEY;
5908         key.offset = 0;
5909
5910         path = btrfs_alloc_path();
5911         if (!path)
5912                 return -ENOMEM;
5913
5914         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5915         if (ret < 0) {
5916                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5917                 btrfs_free_path(path);
5918                 return ret;
5919         }
5920
5921         if (ret > 0 && path->slots[0])
5922                 path->slots[0]--;
5923         ret = 0;
5924
5925         while (1) {
5926                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5927                         ret = btrfs_next_leaf(root, path);
5928                         if (ret < 0) {
5929                                 fprintf(stderr, "Error going to next leaf "
5930                                         "%d\n", ret);
5931                                 break;
5932                         }
5933                         if (ret)
5934                                 break;
5935                 }
5936                 leaf = path->nodes[0];
5937
5938                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5939                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5940                         path->slots[0]++;
5941                         continue;
5942                 }
5943
5944                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5945                               csum_size) * root->sectorsize;
5946                 if (!check_data_csum)
5947                         goto skip_csum_check;
5948                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5949                 ret = check_extent_csums(root, key.offset, data_len,
5950                                          leaf_offset, leaf);
5951                 if (ret)
5952                         break;
5953 skip_csum_check:
5954                 if (!num_bytes) {
5955                         offset = key.offset;
5956                 } else if (key.offset != offset + num_bytes) {
5957                         ret = check_extent_exists(root, offset, num_bytes);
5958                         if (ret) {
5959                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5960                                         "there is no extent record\n",
5961                                         offset, offset+num_bytes);
5962                                 errors++;
5963                         }
5964                         offset = key.offset;
5965                         num_bytes = 0;
5966                 }
5967                 num_bytes += data_len;
5968                 path->slots[0]++;
5969         }
5970
5971         btrfs_free_path(path);
5972         return errors;
5973 }
5974
5975 static int is_dropped_key(struct btrfs_key *key,
5976                           struct btrfs_key *drop_key) {
5977         if (key->objectid < drop_key->objectid)
5978                 return 1;
5979         else if (key->objectid == drop_key->objectid) {
5980                 if (key->type < drop_key->type)
5981                         return 1;
5982                 else if (key->type == drop_key->type) {
5983                         if (key->offset < drop_key->offset)
5984                                 return 1;
5985                 }
5986         }
5987         return 0;
5988 }
5989
5990 /*
5991  * Here are the rules for FULL_BACKREF.
5992  *
5993  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5994  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5995  *      FULL_BACKREF set.
5996  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
5997  *    if it happened after the relocation occurred since we'll have dropped the
5998  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5999  *    have no real way to know for sure.
6000  *
6001  * We process the blocks one root at a time, and we start from the lowest root
6002  * objectid and go to the highest.  So we can just lookup the owner backref for
6003  * the record and if we don't find it then we know it doesn't exist and we have
6004  * a FULL BACKREF.
6005  *
6006  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6007  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6008  * be set or not and then we can check later once we've gathered all the refs.
6009  */
6010 static int calc_extent_flag(struct btrfs_root *root,
6011                            struct cache_tree *extent_cache,
6012                            struct extent_buffer *buf,
6013                            struct root_item_record *ri,
6014                            u64 *flags)
6015 {
6016         struct extent_record *rec;
6017         struct cache_extent *cache;
6018         struct tree_backref *tback;
6019         u64 owner = 0;
6020
6021         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6022         /* we have added this extent before */
6023         BUG_ON(!cache);
6024         rec = container_of(cache, struct extent_record, cache);
6025
6026         /*
6027          * Except file/reloc tree, we can not have
6028          * FULL BACKREF MODE
6029          */
6030         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6031                 goto normal;
6032         /*
6033          * root node
6034          */
6035         if (buf->start == ri->bytenr)
6036                 goto normal;
6037
6038         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6039                 goto full_backref;
6040
6041         owner = btrfs_header_owner(buf);
6042         if (owner == ri->objectid)
6043                 goto normal;
6044
6045         tback = find_tree_backref(rec, 0, owner);
6046         if (!tback)
6047                 goto full_backref;
6048 normal:
6049         *flags = 0;
6050         if (rec->flag_block_full_backref != FLAG_UNSET &&
6051             rec->flag_block_full_backref != 0)
6052                 rec->bad_full_backref = 1;
6053         return 0;
6054 full_backref:
6055         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6056         if (rec->flag_block_full_backref != FLAG_UNSET &&
6057             rec->flag_block_full_backref != 1)
6058                 rec->bad_full_backref = 1;
6059         return 0;
6060 }
6061
6062 static int run_next_block(struct btrfs_root *root,
6063                           struct block_info *bits,
6064                           int bits_nr,
6065                           u64 *last,
6066                           struct cache_tree *pending,
6067                           struct cache_tree *seen,
6068                           struct cache_tree *reada,
6069                           struct cache_tree *nodes,
6070                           struct cache_tree *extent_cache,
6071                           struct cache_tree *chunk_cache,
6072                           struct rb_root *dev_cache,
6073                           struct block_group_tree *block_group_cache,
6074                           struct device_extent_tree *dev_extent_cache,
6075                           struct root_item_record *ri)
6076 {
6077         struct extent_buffer *buf;
6078         struct extent_record *rec = NULL;
6079         u64 bytenr;
6080         u32 size;
6081         u64 parent;
6082         u64 owner;
6083         u64 flags;
6084         u64 ptr;
6085         u64 gen = 0;
6086         int ret = 0;
6087         int i;
6088         int nritems;
6089         struct btrfs_key key;
6090         struct cache_extent *cache;
6091         int reada_bits;
6092
6093         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6094                                     bits_nr, &reada_bits);
6095         if (nritems == 0)
6096                 return 1;
6097
6098         if (!reada_bits) {
6099                 for(i = 0; i < nritems; i++) {
6100                         ret = add_cache_extent(reada, bits[i].start,
6101                                                bits[i].size);
6102                         if (ret == -EEXIST)
6103                                 continue;
6104
6105                         /* fixme, get the parent transid */
6106                         readahead_tree_block(root, bits[i].start,
6107                                              bits[i].size, 0);
6108                 }
6109         }
6110         *last = bits[0].start;
6111         bytenr = bits[0].start;
6112         size = bits[0].size;
6113
6114         cache = lookup_cache_extent(pending, bytenr, size);
6115         if (cache) {
6116                 remove_cache_extent(pending, cache);
6117                 free(cache);
6118         }
6119         cache = lookup_cache_extent(reada, bytenr, size);
6120         if (cache) {
6121                 remove_cache_extent(reada, cache);
6122                 free(cache);
6123         }
6124         cache = lookup_cache_extent(nodes, bytenr, size);
6125         if (cache) {
6126                 remove_cache_extent(nodes, cache);
6127                 free(cache);
6128         }
6129         cache = lookup_cache_extent(extent_cache, bytenr, size);
6130         if (cache) {
6131                 rec = container_of(cache, struct extent_record, cache);
6132                 gen = rec->parent_generation;
6133         }
6134
6135         /* fixme, get the real parent transid */
6136         buf = read_tree_block(root, bytenr, size, gen);
6137         if (!extent_buffer_uptodate(buf)) {
6138                 record_bad_block_io(root->fs_info,
6139                                     extent_cache, bytenr, size);
6140                 goto out;
6141         }
6142
6143         nritems = btrfs_header_nritems(buf);
6144
6145         flags = 0;
6146         if (!init_extent_tree) {
6147                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6148                                        btrfs_header_level(buf), 1, NULL,
6149                                        &flags);
6150                 if (ret < 0) {
6151                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6152                         if (ret < 0) {
6153                                 fprintf(stderr, "Couldn't calc extent flags\n");
6154                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6155                         }
6156                 }
6157         } else {
6158                 flags = 0;
6159                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6160                 if (ret < 0) {
6161                         fprintf(stderr, "Couldn't calc extent flags\n");
6162                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6163                 }
6164         }
6165
6166         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6167                 if (ri != NULL &&
6168                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6169                     ri->objectid == btrfs_header_owner(buf)) {
6170                         /*
6171                          * Ok we got to this block from it's original owner and
6172                          * we have FULL_BACKREF set.  Relocation can leave
6173                          * converted blocks over so this is altogether possible,
6174                          * however it's not possible if the generation > the
6175                          * last snapshot, so check for this case.
6176                          */
6177                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6178                             btrfs_header_generation(buf) > ri->last_snapshot) {
6179                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6180                                 rec->bad_full_backref = 1;
6181                         }
6182                 }
6183         } else {
6184                 if (ri != NULL &&
6185                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6186                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6187                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6188                         rec->bad_full_backref = 1;
6189                 }
6190         }
6191
6192         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6193                 rec->flag_block_full_backref = 1;
6194                 parent = bytenr;
6195                 owner = 0;
6196         } else {
6197                 rec->flag_block_full_backref = 0;
6198                 parent = 0;
6199                 owner = btrfs_header_owner(buf);
6200         }
6201
6202         ret = check_block(root, extent_cache, buf, flags);
6203         if (ret)
6204                 goto out;
6205
6206         if (btrfs_is_leaf(buf)) {
6207                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6208                 for (i = 0; i < nritems; i++) {
6209                         struct btrfs_file_extent_item *fi;
6210                         btrfs_item_key_to_cpu(buf, &key, i);
6211                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6212                                 process_extent_item(root, extent_cache, buf,
6213                                                     i);
6214                                 continue;
6215                         }
6216                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6217                                 process_extent_item(root, extent_cache, buf,
6218                                                     i);
6219                                 continue;
6220                         }
6221                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6222                                 total_csum_bytes +=
6223                                         btrfs_item_size_nr(buf, i);
6224                                 continue;
6225                         }
6226                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6227                                 process_chunk_item(chunk_cache, &key, buf, i);
6228                                 continue;
6229                         }
6230                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6231                                 process_device_item(dev_cache, &key, buf, i);
6232                                 continue;
6233                         }
6234                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6235                                 process_block_group_item(block_group_cache,
6236                                         &key, buf, i);
6237                                 continue;
6238                         }
6239                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6240                                 process_device_extent_item(dev_extent_cache,
6241                                         &key, buf, i);
6242                                 continue;
6243
6244                         }
6245                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6246 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6247                                 process_extent_ref_v0(extent_cache, buf, i);
6248 #else
6249                                 BUG();
6250 #endif
6251                                 continue;
6252                         }
6253
6254                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6255                                 add_tree_backref(extent_cache, key.objectid, 0,
6256                                                  key.offset, 0);
6257                                 continue;
6258                         }
6259                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6260                                 add_tree_backref(extent_cache, key.objectid,
6261                                                  key.offset, 0, 0);
6262                                 continue;
6263                         }
6264                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6265                                 struct btrfs_extent_data_ref *ref;
6266                                 ref = btrfs_item_ptr(buf, i,
6267                                                 struct btrfs_extent_data_ref);
6268                                 add_data_backref(extent_cache,
6269                                         key.objectid, 0,
6270                                         btrfs_extent_data_ref_root(buf, ref),
6271                                         btrfs_extent_data_ref_objectid(buf,
6272                                                                        ref),
6273                                         btrfs_extent_data_ref_offset(buf, ref),
6274                                         btrfs_extent_data_ref_count(buf, ref),
6275                                         0, root->sectorsize);
6276                                 continue;
6277                         }
6278                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6279                                 struct btrfs_shared_data_ref *ref;
6280                                 ref = btrfs_item_ptr(buf, i,
6281                                                 struct btrfs_shared_data_ref);
6282                                 add_data_backref(extent_cache,
6283                                         key.objectid, key.offset, 0, 0, 0,
6284                                         btrfs_shared_data_ref_count(buf, ref),
6285                                         0, root->sectorsize);
6286                                 continue;
6287                         }
6288                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6289                                 struct bad_item *bad;
6290
6291                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6292                                         continue;
6293                                 if (!owner)
6294                                         continue;
6295                                 bad = malloc(sizeof(struct bad_item));
6296                                 if (!bad)
6297                                         continue;
6298                                 INIT_LIST_HEAD(&bad->list);
6299                                 memcpy(&bad->key, &key,
6300                                        sizeof(struct btrfs_key));
6301                                 bad->root_id = owner;
6302                                 list_add_tail(&bad->list, &delete_items);
6303                                 continue;
6304                         }
6305                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6306                                 continue;
6307                         fi = btrfs_item_ptr(buf, i,
6308                                             struct btrfs_file_extent_item);
6309                         if (btrfs_file_extent_type(buf, fi) ==
6310                             BTRFS_FILE_EXTENT_INLINE)
6311                                 continue;
6312                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6313                                 continue;
6314
6315                         data_bytes_allocated +=
6316                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6317                         if (data_bytes_allocated < root->sectorsize) {
6318                                 abort();
6319                         }
6320                         data_bytes_referenced +=
6321                                 btrfs_file_extent_num_bytes(buf, fi);
6322                         add_data_backref(extent_cache,
6323                                 btrfs_file_extent_disk_bytenr(buf, fi),
6324                                 parent, owner, key.objectid, key.offset -
6325                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6326                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6327                 }
6328         } else {
6329                 int level;
6330                 struct btrfs_key first_key;
6331
6332                 first_key.objectid = 0;
6333
6334                 if (nritems > 0)
6335                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6336                 level = btrfs_header_level(buf);
6337                 for (i = 0; i < nritems; i++) {
6338                         struct extent_record tmpl;
6339
6340                         ptr = btrfs_node_blockptr(buf, i);
6341                         size = root->nodesize;
6342                         btrfs_node_key_to_cpu(buf, &key, i);
6343                         if (ri != NULL) {
6344                                 if ((level == ri->drop_level)
6345                                     && is_dropped_key(&key, &ri->drop_key)) {
6346                                         continue;
6347                                 }
6348                         }
6349
6350                         memset(&tmpl, 0, sizeof(tmpl));
6351                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6352                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6353                         tmpl.start = ptr;
6354                         tmpl.nr = size;
6355                         tmpl.refs = 1;
6356                         tmpl.metadata = 1;
6357                         tmpl.max_size = size;
6358                         ret = add_extent_rec(extent_cache, &tmpl);
6359                         BUG_ON(ret);
6360
6361                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6362
6363                         if (level > 1) {
6364                                 add_pending(nodes, seen, ptr, size);
6365                         } else {
6366                                 add_pending(pending, seen, ptr, size);
6367                         }
6368                 }
6369                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6370                                       nritems) * sizeof(struct btrfs_key_ptr);
6371         }
6372         total_btree_bytes += buf->len;
6373         if (fs_root_objectid(btrfs_header_owner(buf)))
6374                 total_fs_tree_bytes += buf->len;
6375         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6376                 total_extent_tree_bytes += buf->len;
6377         if (!found_old_backref &&
6378             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6379             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6380             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6381                 found_old_backref = 1;
6382 out:
6383         free_extent_buffer(buf);
6384         return ret;
6385 }
6386
6387 static int add_root_to_pending(struct extent_buffer *buf,
6388                                struct cache_tree *extent_cache,
6389                                struct cache_tree *pending,
6390                                struct cache_tree *seen,
6391                                struct cache_tree *nodes,
6392                                u64 objectid)
6393 {
6394         struct extent_record tmpl;
6395
6396         if (btrfs_header_level(buf) > 0)
6397                 add_pending(nodes, seen, buf->start, buf->len);
6398         else
6399                 add_pending(pending, seen, buf->start, buf->len);
6400
6401         memset(&tmpl, 0, sizeof(tmpl));
6402         tmpl.start = buf->start;
6403         tmpl.nr = buf->len;
6404         tmpl.is_root = 1;
6405         tmpl.refs = 1;
6406         tmpl.metadata = 1;
6407         tmpl.max_size = buf->len;
6408         add_extent_rec(extent_cache, &tmpl);
6409
6410         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6411             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6412                 add_tree_backref(extent_cache, buf->start, buf->start,
6413                                  0, 1);
6414         else
6415                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6416         return 0;
6417 }
6418
6419 /* as we fix the tree, we might be deleting blocks that
6420  * we're tracking for repair.  This hook makes sure we
6421  * remove any backrefs for blocks as we are fixing them.
6422  */
6423 static int free_extent_hook(struct btrfs_trans_handle *trans,
6424                             struct btrfs_root *root,
6425                             u64 bytenr, u64 num_bytes, u64 parent,
6426                             u64 root_objectid, u64 owner, u64 offset,
6427                             int refs_to_drop)
6428 {
6429         struct extent_record *rec;
6430         struct cache_extent *cache;
6431         int is_data;
6432         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6433
6434         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6435         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6436         if (!cache)
6437                 return 0;
6438
6439         rec = container_of(cache, struct extent_record, cache);
6440         if (is_data) {
6441                 struct data_backref *back;
6442                 back = find_data_backref(rec, parent, root_objectid, owner,
6443                                          offset, 1, bytenr, num_bytes);
6444                 if (!back)
6445                         goto out;
6446                 if (back->node.found_ref) {
6447                         back->found_ref -= refs_to_drop;
6448                         if (rec->refs)
6449                                 rec->refs -= refs_to_drop;
6450                 }
6451                 if (back->node.found_extent_tree) {
6452                         back->num_refs -= refs_to_drop;
6453                         if (rec->extent_item_refs)
6454                                 rec->extent_item_refs -= refs_to_drop;
6455                 }
6456                 if (back->found_ref == 0)
6457                         back->node.found_ref = 0;
6458                 if (back->num_refs == 0)
6459                         back->node.found_extent_tree = 0;
6460
6461                 if (!back->node.found_extent_tree && back->node.found_ref) {
6462                         rb_erase(&back->node.node, &rec->backref_tree);
6463                         free(back);
6464                 }
6465         } else {
6466                 struct tree_backref *back;
6467                 back = find_tree_backref(rec, parent, root_objectid);
6468                 if (!back)
6469                         goto out;
6470                 if (back->node.found_ref) {
6471                         if (rec->refs)
6472                                 rec->refs--;
6473                         back->node.found_ref = 0;
6474                 }
6475                 if (back->node.found_extent_tree) {
6476                         if (rec->extent_item_refs)
6477                                 rec->extent_item_refs--;
6478                         back->node.found_extent_tree = 0;
6479                 }
6480                 if (!back->node.found_extent_tree && back->node.found_ref) {
6481                         rb_erase(&back->node.node, &rec->backref_tree);
6482                         free(back);
6483                 }
6484         }
6485         maybe_free_extent_rec(extent_cache, rec);
6486 out:
6487         return 0;
6488 }
6489
6490 static int delete_extent_records(struct btrfs_trans_handle *trans,
6491                                  struct btrfs_root *root,
6492                                  struct btrfs_path *path,
6493                                  u64 bytenr, u64 new_len)
6494 {
6495         struct btrfs_key key;
6496         struct btrfs_key found_key;
6497         struct extent_buffer *leaf;
6498         int ret;
6499         int slot;
6500
6501
6502         key.objectid = bytenr;
6503         key.type = (u8)-1;
6504         key.offset = (u64)-1;
6505
6506         while(1) {
6507                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6508                                         &key, path, 0, 1);
6509                 if (ret < 0)
6510                         break;
6511
6512                 if (ret > 0) {
6513                         ret = 0;
6514                         if (path->slots[0] == 0)
6515                                 break;
6516                         path->slots[0]--;
6517                 }
6518                 ret = 0;
6519
6520                 leaf = path->nodes[0];
6521                 slot = path->slots[0];
6522
6523                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6524                 if (found_key.objectid != bytenr)
6525                         break;
6526
6527                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6528                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6529                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6530                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6531                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6532                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6533                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6534                         btrfs_release_path(path);
6535                         if (found_key.type == 0) {
6536                                 if (found_key.offset == 0)
6537                                         break;
6538                                 key.offset = found_key.offset - 1;
6539                                 key.type = found_key.type;
6540                         }
6541                         key.type = found_key.type - 1;
6542                         key.offset = (u64)-1;
6543                         continue;
6544                 }
6545
6546                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6547                         found_key.objectid, found_key.type, found_key.offset);
6548
6549                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6550                 if (ret)
6551                         break;
6552                 btrfs_release_path(path);
6553
6554                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6555                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6556                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6557                                 found_key.offset : root->nodesize;
6558
6559                         ret = btrfs_update_block_group(trans, root, bytenr,
6560                                                        bytes, 0, 0);
6561                         if (ret)
6562                                 break;
6563                 }
6564         }
6565
6566         btrfs_release_path(path);
6567         return ret;
6568 }
6569
6570 /*
6571  * for a single backref, this will allocate a new extent
6572  * and add the backref to it.
6573  */
6574 static int record_extent(struct btrfs_trans_handle *trans,
6575                          struct btrfs_fs_info *info,
6576                          struct btrfs_path *path,
6577                          struct extent_record *rec,
6578                          struct extent_backref *back,
6579                          int allocated, u64 flags)
6580 {
6581         int ret;
6582         struct btrfs_root *extent_root = info->extent_root;
6583         struct extent_buffer *leaf;
6584         struct btrfs_key ins_key;
6585         struct btrfs_extent_item *ei;
6586         struct tree_backref *tback;
6587         struct data_backref *dback;
6588         struct btrfs_tree_block_info *bi;
6589
6590         if (!back->is_data)
6591                 rec->max_size = max_t(u64, rec->max_size,
6592                                     info->extent_root->nodesize);
6593
6594         if (!allocated) {
6595                 u32 item_size = sizeof(*ei);
6596
6597                 if (!back->is_data)
6598                         item_size += sizeof(*bi);
6599
6600                 ins_key.objectid = rec->start;
6601                 ins_key.offset = rec->max_size;
6602                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6603
6604                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6605                                         &ins_key, item_size);
6606                 if (ret)
6607                         goto fail;
6608
6609                 leaf = path->nodes[0];
6610                 ei = btrfs_item_ptr(leaf, path->slots[0],
6611                                     struct btrfs_extent_item);
6612
6613                 btrfs_set_extent_refs(leaf, ei, 0);
6614                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6615
6616                 if (back->is_data) {
6617                         btrfs_set_extent_flags(leaf, ei,
6618                                                BTRFS_EXTENT_FLAG_DATA);
6619                 } else {
6620                         struct btrfs_disk_key copy_key;;
6621
6622                         tback = to_tree_backref(back);
6623                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6624                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6625                                              sizeof(*bi));
6626
6627                         btrfs_set_disk_key_objectid(&copy_key,
6628                                                     rec->info_objectid);
6629                         btrfs_set_disk_key_type(&copy_key, 0);
6630                         btrfs_set_disk_key_offset(&copy_key, 0);
6631
6632                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6633                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6634
6635                         btrfs_set_extent_flags(leaf, ei,
6636                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6637                 }
6638
6639                 btrfs_mark_buffer_dirty(leaf);
6640                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6641                                                rec->max_size, 1, 0);
6642                 if (ret)
6643                         goto fail;
6644                 btrfs_release_path(path);
6645         }
6646
6647         if (back->is_data) {
6648                 u64 parent;
6649                 int i;
6650
6651                 dback = to_data_backref(back);
6652                 if (back->full_backref)
6653                         parent = dback->parent;
6654                 else
6655                         parent = 0;
6656
6657                 for (i = 0; i < dback->found_ref; i++) {
6658                         /* if parent != 0, we're doing a full backref
6659                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6660                          * just makes the backref allocator create a data
6661                          * backref
6662                          */
6663                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6664                                                    rec->start, rec->max_size,
6665                                                    parent,
6666                                                    dback->root,
6667                                                    parent ?
6668                                                    BTRFS_FIRST_FREE_OBJECTID :
6669                                                    dback->owner,
6670                                                    dback->offset);
6671                         if (ret)
6672                                 break;
6673                 }
6674                 fprintf(stderr, "adding new data backref"
6675                                 " on %llu %s %llu owner %llu"
6676                                 " offset %llu found %d\n",
6677                                 (unsigned long long)rec->start,
6678                                 back->full_backref ?
6679                                 "parent" : "root",
6680                                 back->full_backref ?
6681                                 (unsigned long long)parent :
6682                                 (unsigned long long)dback->root,
6683                                 (unsigned long long)dback->owner,
6684                                 (unsigned long long)dback->offset,
6685                                 dback->found_ref);
6686         } else {
6687                 u64 parent;
6688
6689                 tback = to_tree_backref(back);
6690                 if (back->full_backref)
6691                         parent = tback->parent;
6692                 else
6693                         parent = 0;
6694
6695                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6696                                            rec->start, rec->max_size,
6697                                            parent, tback->root, 0, 0);
6698                 fprintf(stderr, "adding new tree backref on "
6699                         "start %llu len %llu parent %llu root %llu\n",
6700                         rec->start, rec->max_size, parent, tback->root);
6701         }
6702 fail:
6703         btrfs_release_path(path);
6704         return ret;
6705 }
6706
6707 static struct extent_entry *find_entry(struct list_head *entries,
6708                                        u64 bytenr, u64 bytes)
6709 {
6710         struct extent_entry *entry = NULL;
6711
6712         list_for_each_entry(entry, entries, list) {
6713                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6714                         return entry;
6715         }
6716
6717         return NULL;
6718 }
6719
6720 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6721 {
6722         struct extent_entry *entry, *best = NULL, *prev = NULL;
6723
6724         list_for_each_entry(entry, entries, list) {
6725                 if (!prev) {
6726                         prev = entry;
6727                         continue;
6728                 }
6729
6730                 /*
6731                  * If there are as many broken entries as entries then we know
6732                  * not to trust this particular entry.
6733                  */
6734                 if (entry->broken == entry->count)
6735                         continue;
6736
6737                 /*
6738                  * If our current entry == best then we can't be sure our best
6739                  * is really the best, so we need to keep searching.
6740                  */
6741                 if (best && best->count == entry->count) {
6742                         prev = entry;
6743                         best = NULL;
6744                         continue;
6745                 }
6746
6747                 /* Prev == entry, not good enough, have to keep searching */
6748                 if (!prev->broken && prev->count == entry->count)
6749                         continue;
6750
6751                 if (!best)
6752                         best = (prev->count > entry->count) ? prev : entry;
6753                 else if (best->count < entry->count)
6754                         best = entry;
6755                 prev = entry;
6756         }
6757
6758         return best;
6759 }
6760
6761 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6762                       struct data_backref *dback, struct extent_entry *entry)
6763 {
6764         struct btrfs_trans_handle *trans;
6765         struct btrfs_root *root;
6766         struct btrfs_file_extent_item *fi;
6767         struct extent_buffer *leaf;
6768         struct btrfs_key key;
6769         u64 bytenr, bytes;
6770         int ret, err;
6771
6772         key.objectid = dback->root;
6773         key.type = BTRFS_ROOT_ITEM_KEY;
6774         key.offset = (u64)-1;
6775         root = btrfs_read_fs_root(info, &key);
6776         if (IS_ERR(root)) {
6777                 fprintf(stderr, "Couldn't find root for our ref\n");
6778                 return -EINVAL;
6779         }
6780
6781         /*
6782          * The backref points to the original offset of the extent if it was
6783          * split, so we need to search down to the offset we have and then walk
6784          * forward until we find the backref we're looking for.
6785          */
6786         key.objectid = dback->owner;
6787         key.type = BTRFS_EXTENT_DATA_KEY;
6788         key.offset = dback->offset;
6789         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6790         if (ret < 0) {
6791                 fprintf(stderr, "Error looking up ref %d\n", ret);
6792                 return ret;
6793         }
6794
6795         while (1) {
6796                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6797                         ret = btrfs_next_leaf(root, path);
6798                         if (ret) {
6799                                 fprintf(stderr, "Couldn't find our ref, next\n");
6800                                 return -EINVAL;
6801                         }
6802                 }
6803                 leaf = path->nodes[0];
6804                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6805                 if (key.objectid != dback->owner ||
6806                     key.type != BTRFS_EXTENT_DATA_KEY) {
6807                         fprintf(stderr, "Couldn't find our ref, search\n");
6808                         return -EINVAL;
6809                 }
6810                 fi = btrfs_item_ptr(leaf, path->slots[0],
6811                                     struct btrfs_file_extent_item);
6812                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6813                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6814
6815                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6816                         break;
6817                 path->slots[0]++;
6818         }
6819
6820         btrfs_release_path(path);
6821
6822         trans = btrfs_start_transaction(root, 1);
6823         if (IS_ERR(trans))
6824                 return PTR_ERR(trans);
6825
6826         /*
6827          * Ok we have the key of the file extent we want to fix, now we can cow
6828          * down to the thing and fix it.
6829          */
6830         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6831         if (ret < 0) {
6832                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6833                         key.objectid, key.type, key.offset, ret);
6834                 goto out;
6835         }
6836         if (ret > 0) {
6837                 fprintf(stderr, "Well that's odd, we just found this key "
6838                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6839                         key.offset);
6840                 ret = -EINVAL;
6841                 goto out;
6842         }
6843         leaf = path->nodes[0];
6844         fi = btrfs_item_ptr(leaf, path->slots[0],
6845                             struct btrfs_file_extent_item);
6846
6847         if (btrfs_file_extent_compression(leaf, fi) &&
6848             dback->disk_bytenr != entry->bytenr) {
6849                 fprintf(stderr, "Ref doesn't match the record start and is "
6850                         "compressed, please take a btrfs-image of this file "
6851                         "system and send it to a btrfs developer so they can "
6852                         "complete this functionality for bytenr %Lu\n",
6853                         dback->disk_bytenr);
6854                 ret = -EINVAL;
6855                 goto out;
6856         }
6857
6858         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6859                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6860         } else if (dback->disk_bytenr > entry->bytenr) {
6861                 u64 off_diff, offset;
6862
6863                 off_diff = dback->disk_bytenr - entry->bytenr;
6864                 offset = btrfs_file_extent_offset(leaf, fi);
6865                 if (dback->disk_bytenr + offset +
6866                     btrfs_file_extent_num_bytes(leaf, fi) >
6867                     entry->bytenr + entry->bytes) {
6868                         fprintf(stderr, "Ref is past the entry end, please "
6869                                 "take a btrfs-image of this file system and "
6870                                 "send it to a btrfs developer, ref %Lu\n",
6871                                 dback->disk_bytenr);
6872                         ret = -EINVAL;
6873                         goto out;
6874                 }
6875                 offset += off_diff;
6876                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6877                 btrfs_set_file_extent_offset(leaf, fi, offset);
6878         } else if (dback->disk_bytenr < entry->bytenr) {
6879                 u64 offset;
6880
6881                 offset = btrfs_file_extent_offset(leaf, fi);
6882                 if (dback->disk_bytenr + offset < entry->bytenr) {
6883                         fprintf(stderr, "Ref is before the entry start, please"
6884                                 " take a btrfs-image of this file system and "
6885                                 "send it to a btrfs developer, ref %Lu\n",
6886                                 dback->disk_bytenr);
6887                         ret = -EINVAL;
6888                         goto out;
6889                 }
6890
6891                 offset += dback->disk_bytenr;
6892                 offset -= entry->bytenr;
6893                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6894                 btrfs_set_file_extent_offset(leaf, fi, offset);
6895         }
6896
6897         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6898
6899         /*
6900          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6901          * only do this if we aren't using compression, otherwise it's a
6902          * trickier case.
6903          */
6904         if (!btrfs_file_extent_compression(leaf, fi))
6905                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6906         else
6907                 printf("ram bytes may be wrong?\n");
6908         btrfs_mark_buffer_dirty(leaf);
6909 out:
6910         err = btrfs_commit_transaction(trans, root);
6911         btrfs_release_path(path);
6912         return ret ? ret : err;
6913 }
6914
6915 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6916                            struct extent_record *rec)
6917 {
6918         struct extent_backref *back, *tmp;
6919         struct data_backref *dback;
6920         struct extent_entry *entry, *best = NULL;
6921         LIST_HEAD(entries);
6922         int nr_entries = 0;
6923         int broken_entries = 0;
6924         int ret = 0;
6925         short mismatch = 0;
6926
6927         /*
6928          * Metadata is easy and the backrefs should always agree on bytenr and
6929          * size, if not we've got bigger issues.
6930          */
6931         if (rec->metadata)
6932                 return 0;
6933
6934         rbtree_postorder_for_each_entry_safe(back, tmp,
6935                                              &rec->backref_tree, node) {
6936                 if (back->full_backref || !back->is_data)
6937                         continue;
6938
6939                 dback = to_data_backref(back);
6940
6941                 /*
6942                  * We only pay attention to backrefs that we found a real
6943                  * backref for.
6944                  */
6945                 if (dback->found_ref == 0)
6946                         continue;
6947
6948                 /*
6949                  * For now we only catch when the bytes don't match, not the
6950                  * bytenr.  We can easily do this at the same time, but I want
6951                  * to have a fs image to test on before we just add repair
6952                  * functionality willy-nilly so we know we won't screw up the
6953                  * repair.
6954                  */
6955
6956                 entry = find_entry(&entries, dback->disk_bytenr,
6957                                    dback->bytes);
6958                 if (!entry) {
6959                         entry = malloc(sizeof(struct extent_entry));
6960                         if (!entry) {
6961                                 ret = -ENOMEM;
6962                                 goto out;
6963                         }
6964                         memset(entry, 0, sizeof(*entry));
6965                         entry->bytenr = dback->disk_bytenr;
6966                         entry->bytes = dback->bytes;
6967                         list_add_tail(&entry->list, &entries);
6968                         nr_entries++;
6969                 }
6970
6971                 /*
6972                  * If we only have on entry we may think the entries agree when
6973                  * in reality they don't so we have to do some extra checking.
6974                  */
6975                 if (dback->disk_bytenr != rec->start ||
6976                     dback->bytes != rec->nr || back->broken)
6977                         mismatch = 1;
6978
6979                 if (back->broken) {
6980                         entry->broken++;
6981                         broken_entries++;
6982                 }
6983
6984                 entry->count++;
6985         }
6986
6987         /* Yay all the backrefs agree, carry on good sir */
6988         if (nr_entries <= 1 && !mismatch)
6989                 goto out;
6990
6991         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6992                 "%Lu\n", rec->start);
6993
6994         /*
6995          * First we want to see if the backrefs can agree amongst themselves who
6996          * is right, so figure out which one of the entries has the highest
6997          * count.
6998          */
6999         best = find_most_right_entry(&entries);
7000
7001         /*
7002          * Ok so we may have an even split between what the backrefs think, so
7003          * this is where we use the extent ref to see what it thinks.
7004          */
7005         if (!best) {
7006                 entry = find_entry(&entries, rec->start, rec->nr);
7007                 if (!entry && (!broken_entries || !rec->found_rec)) {
7008                         fprintf(stderr, "Backrefs don't agree with each other "
7009                                 "and extent record doesn't agree with anybody,"
7010                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7011                                 rec->start, rec->nr);
7012                         ret = -EINVAL;
7013                         goto out;
7014                 } else if (!entry) {
7015                         /*
7016                          * Ok our backrefs were broken, we'll assume this is the
7017                          * correct value and add an entry for this range.
7018                          */
7019                         entry = malloc(sizeof(struct extent_entry));
7020                         if (!entry) {
7021                                 ret = -ENOMEM;
7022                                 goto out;
7023                         }
7024                         memset(entry, 0, sizeof(*entry));
7025                         entry->bytenr = rec->start;
7026                         entry->bytes = rec->nr;
7027                         list_add_tail(&entry->list, &entries);
7028                         nr_entries++;
7029                 }
7030                 entry->count++;
7031                 best = find_most_right_entry(&entries);
7032                 if (!best) {
7033                         fprintf(stderr, "Backrefs and extent record evenly "
7034                                 "split on who is right, this is going to "
7035                                 "require user input to fix bytenr %Lu bytes "
7036                                 "%Lu\n", rec->start, rec->nr);
7037                         ret = -EINVAL;
7038                         goto out;
7039                 }
7040         }
7041
7042         /*
7043          * I don't think this can happen currently as we'll abort() if we catch
7044          * this case higher up, but in case somebody removes that we still can't
7045          * deal with it properly here yet, so just bail out of that's the case.
7046          */
7047         if (best->bytenr != rec->start) {
7048                 fprintf(stderr, "Extent start and backref starts don't match, "
7049                         "please use btrfs-image on this file system and send "
7050                         "it to a btrfs developer so they can make fsck fix "
7051                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7052                         rec->start, rec->nr);
7053                 ret = -EINVAL;
7054                 goto out;
7055         }
7056
7057         /*
7058          * Ok great we all agreed on an extent record, let's go find the real
7059          * references and fix up the ones that don't match.
7060          */
7061         rbtree_postorder_for_each_entry_safe(back, tmp,
7062                                              &rec->backref_tree, node) {
7063                 if (back->full_backref || !back->is_data)
7064                         continue;
7065
7066                 dback = to_data_backref(back);
7067
7068                 /*
7069                  * Still ignoring backrefs that don't have a real ref attached
7070                  * to them.
7071                  */
7072                 if (dback->found_ref == 0)
7073                         continue;
7074
7075                 if (dback->bytes == best->bytes &&
7076                     dback->disk_bytenr == best->bytenr)
7077                         continue;
7078
7079                 ret = repair_ref(info, path, dback, best);
7080                 if (ret)
7081                         goto out;
7082         }
7083
7084         /*
7085          * Ok we messed with the actual refs, which means we need to drop our
7086          * entire cache and go back and rescan.  I know this is a huge pain and
7087          * adds a lot of extra work, but it's the only way to be safe.  Once all
7088          * the backrefs agree we may not need to do anything to the extent
7089          * record itself.
7090          */
7091         ret = -EAGAIN;
7092 out:
7093         while (!list_empty(&entries)) {
7094                 entry = list_entry(entries.next, struct extent_entry, list);
7095                 list_del_init(&entry->list);
7096                 free(entry);
7097         }
7098         return ret;
7099 }
7100
7101 static int process_duplicates(struct btrfs_root *root,
7102                               struct cache_tree *extent_cache,
7103                               struct extent_record *rec)
7104 {
7105         struct extent_record *good, *tmp;
7106         struct cache_extent *cache;
7107         int ret;
7108
7109         /*
7110          * If we found a extent record for this extent then return, or if we
7111          * have more than one duplicate we are likely going to need to delete
7112          * something.
7113          */
7114         if (rec->found_rec || rec->num_duplicates > 1)
7115                 return 0;
7116
7117         /* Shouldn't happen but just in case */
7118         BUG_ON(!rec->num_duplicates);
7119
7120         /*
7121          * So this happens if we end up with a backref that doesn't match the
7122          * actual extent entry.  So either the backref is bad or the extent
7123          * entry is bad.  Either way we want to have the extent_record actually
7124          * reflect what we found in the extent_tree, so we need to take the
7125          * duplicate out and use that as the extent_record since the only way we
7126          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7127          */
7128         remove_cache_extent(extent_cache, &rec->cache);
7129
7130         good = to_extent_record(rec->dups.next);
7131         list_del_init(&good->list);
7132         INIT_LIST_HEAD(&good->backrefs);
7133         INIT_LIST_HEAD(&good->dups);
7134         good->cache.start = good->start;
7135         good->cache.size = good->nr;
7136         good->content_checked = 0;
7137         good->owner_ref_checked = 0;
7138         good->num_duplicates = 0;
7139         good->refs = rec->refs;
7140         list_splice_init(&rec->backrefs, &good->backrefs);
7141         while (1) {
7142                 cache = lookup_cache_extent(extent_cache, good->start,
7143                                             good->nr);
7144                 if (!cache)
7145                         break;
7146                 tmp = container_of(cache, struct extent_record, cache);
7147
7148                 /*
7149                  * If we find another overlapping extent and it's found_rec is
7150                  * set then it's a duplicate and we need to try and delete
7151                  * something.
7152                  */
7153                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7154                         if (list_empty(&good->list))
7155                                 list_add_tail(&good->list,
7156                                               &duplicate_extents);
7157                         good->num_duplicates += tmp->num_duplicates + 1;
7158                         list_splice_init(&tmp->dups, &good->dups);
7159                         list_del_init(&tmp->list);
7160                         list_add_tail(&tmp->list, &good->dups);
7161                         remove_cache_extent(extent_cache, &tmp->cache);
7162                         continue;
7163                 }
7164
7165                 /*
7166                  * Ok we have another non extent item backed extent rec, so lets
7167                  * just add it to this extent and carry on like we did above.
7168                  */
7169                 good->refs += tmp->refs;
7170                 list_splice_init(&tmp->backrefs, &good->backrefs);
7171                 remove_cache_extent(extent_cache, &tmp->cache);
7172                 free(tmp);
7173         }
7174         ret = insert_cache_extent(extent_cache, &good->cache);
7175         BUG_ON(ret);
7176         free(rec);
7177         return good->num_duplicates ? 0 : 1;
7178 }
7179
7180 static int delete_duplicate_records(struct btrfs_root *root,
7181                                     struct extent_record *rec)
7182 {
7183         struct btrfs_trans_handle *trans;
7184         LIST_HEAD(delete_list);
7185         struct btrfs_path *path;
7186         struct extent_record *tmp, *good, *n;
7187         int nr_del = 0;
7188         int ret = 0, err;
7189         struct btrfs_key key;
7190
7191         path = btrfs_alloc_path();
7192         if (!path) {
7193                 ret = -ENOMEM;
7194                 goto out;
7195         }
7196
7197         good = rec;
7198         /* Find the record that covers all of the duplicates. */
7199         list_for_each_entry(tmp, &rec->dups, list) {
7200                 if (good->start < tmp->start)
7201                         continue;
7202                 if (good->nr > tmp->nr)
7203                         continue;
7204
7205                 if (tmp->start + tmp->nr < good->start + good->nr) {
7206                         fprintf(stderr, "Ok we have overlapping extents that "
7207                                 "aren't completely covered by each other, this "
7208                                 "is going to require more careful thought.  "
7209                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7210                                 tmp->start, tmp->nr, good->start, good->nr);
7211                         abort();
7212                 }
7213                 good = tmp;
7214         }
7215
7216         if (good != rec)
7217                 list_add_tail(&rec->list, &delete_list);
7218
7219         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7220                 if (tmp == good)
7221                         continue;
7222                 list_move_tail(&tmp->list, &delete_list);
7223         }
7224
7225         root = root->fs_info->extent_root;
7226         trans = btrfs_start_transaction(root, 1);
7227         if (IS_ERR(trans)) {
7228                 ret = PTR_ERR(trans);
7229                 goto out;
7230         }
7231
7232         list_for_each_entry(tmp, &delete_list, list) {
7233                 if (tmp->found_rec == 0)
7234                         continue;
7235                 key.objectid = tmp->start;
7236                 key.type = BTRFS_EXTENT_ITEM_KEY;
7237                 key.offset = tmp->nr;
7238
7239                 /* Shouldn't happen but just in case */
7240                 if (tmp->metadata) {
7241                         fprintf(stderr, "Well this shouldn't happen, extent "
7242                                 "record overlaps but is metadata? "
7243                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7244                         abort();
7245                 }
7246
7247                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7248                 if (ret) {
7249                         if (ret > 0)
7250                                 ret = -EINVAL;
7251                         break;
7252                 }
7253                 ret = btrfs_del_item(trans, root, path);
7254                 if (ret)
7255                         break;
7256                 btrfs_release_path(path);
7257                 nr_del++;
7258         }
7259         err = btrfs_commit_transaction(trans, root);
7260         if (err && !ret)
7261                 ret = err;
7262 out:
7263         while (!list_empty(&delete_list)) {
7264                 tmp = to_extent_record(delete_list.next);
7265                 list_del_init(&tmp->list);
7266                 if (tmp == rec)
7267                         continue;
7268                 free(tmp);
7269         }
7270
7271         while (!list_empty(&rec->dups)) {
7272                 tmp = to_extent_record(rec->dups.next);
7273                 list_del_init(&tmp->list);
7274                 free(tmp);
7275         }
7276
7277         btrfs_free_path(path);
7278
7279         if (!ret && !nr_del)
7280                 rec->num_duplicates = 0;
7281
7282         return ret ? ret : nr_del;
7283 }
7284
7285 static int find_possible_backrefs(struct btrfs_fs_info *info,
7286                                   struct btrfs_path *path,
7287                                   struct cache_tree *extent_cache,
7288                                   struct extent_record *rec)
7289 {
7290         struct btrfs_root *root;
7291         struct extent_backref *back, *tmp;
7292         struct data_backref *dback;
7293         struct cache_extent *cache;
7294         struct btrfs_file_extent_item *fi;
7295         struct btrfs_key key;
7296         u64 bytenr, bytes;
7297         int ret;
7298
7299         rbtree_postorder_for_each_entry_safe(back, tmp,
7300                                              &rec->backref_tree, node) {
7301                 /* Don't care about full backrefs (poor unloved backrefs) */
7302                 if (back->full_backref || !back->is_data)
7303                         continue;
7304
7305                 dback = to_data_backref(back);
7306
7307                 /* We found this one, we don't need to do a lookup */
7308                 if (dback->found_ref)
7309                         continue;
7310
7311                 key.objectid = dback->root;
7312                 key.type = BTRFS_ROOT_ITEM_KEY;
7313                 key.offset = (u64)-1;
7314
7315                 root = btrfs_read_fs_root(info, &key);
7316
7317                 /* No root, definitely a bad ref, skip */
7318                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7319                         continue;
7320                 /* Other err, exit */
7321                 if (IS_ERR(root))
7322                         return PTR_ERR(root);
7323
7324                 key.objectid = dback->owner;
7325                 key.type = BTRFS_EXTENT_DATA_KEY;
7326                 key.offset = dback->offset;
7327                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7328                 if (ret) {
7329                         btrfs_release_path(path);
7330                         if (ret < 0)
7331                                 return ret;
7332                         /* Didn't find it, we can carry on */
7333                         ret = 0;
7334                         continue;
7335                 }
7336
7337                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7338                                     struct btrfs_file_extent_item);
7339                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7340                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7341                 btrfs_release_path(path);
7342                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7343                 if (cache) {
7344                         struct extent_record *tmp;
7345                         tmp = container_of(cache, struct extent_record, cache);
7346
7347                         /*
7348                          * If we found an extent record for the bytenr for this
7349                          * particular backref then we can't add it to our
7350                          * current extent record.  We only want to add backrefs
7351                          * that don't have a corresponding extent item in the
7352                          * extent tree since they likely belong to this record
7353                          * and we need to fix it if it doesn't match bytenrs.
7354                          */
7355                         if  (tmp->found_rec)
7356                                 continue;
7357                 }
7358
7359                 dback->found_ref += 1;
7360                 dback->disk_bytenr = bytenr;
7361                 dback->bytes = bytes;
7362
7363                 /*
7364                  * Set this so the verify backref code knows not to trust the
7365                  * values in this backref.
7366                  */
7367                 back->broken = 1;
7368         }
7369
7370         return 0;
7371 }
7372
7373 /*
7374  * Record orphan data ref into corresponding root.
7375  *
7376  * Return 0 if the extent item contains data ref and recorded.
7377  * Return 1 if the extent item contains no useful data ref
7378  *   On that case, it may contains only shared_dataref or metadata backref
7379  *   or the file extent exists(this should be handled by the extent bytenr
7380  *   recovery routine)
7381  * Return <0 if something goes wrong.
7382  */
7383 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7384                                       struct extent_record *rec)
7385 {
7386         struct btrfs_key key;
7387         struct btrfs_root *dest_root;
7388         struct extent_backref *back, *tmp;
7389         struct data_backref *dback;
7390         struct orphan_data_extent *orphan;
7391         struct btrfs_path *path;
7392         int recorded_data_ref = 0;
7393         int ret = 0;
7394
7395         if (rec->metadata)
7396                 return 1;
7397         path = btrfs_alloc_path();
7398         if (!path)
7399                 return -ENOMEM;
7400         rbtree_postorder_for_each_entry_safe(back, tmp,
7401                                              &rec->backref_tree, node) {
7402                 if (back->full_backref || !back->is_data ||
7403                     !back->found_extent_tree)
7404                         continue;
7405                 dback = to_data_backref(back);
7406                 if (dback->found_ref)
7407                         continue;
7408                 key.objectid = dback->root;
7409                 key.type = BTRFS_ROOT_ITEM_KEY;
7410                 key.offset = (u64)-1;
7411
7412                 dest_root = btrfs_read_fs_root(fs_info, &key);
7413
7414                 /* For non-exist root we just skip it */
7415                 if (IS_ERR(dest_root) || !dest_root)
7416                         continue;
7417
7418                 key.objectid = dback->owner;
7419                 key.type = BTRFS_EXTENT_DATA_KEY;
7420                 key.offset = dback->offset;
7421
7422                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7423                 /*
7424                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7425                  * we need to record it for inode/file extent rebuild.
7426                  * For ret > 0, we record it only for file extent rebuild.
7427                  * For ret == 0, the file extent exists but only bytenr
7428                  * mismatch, let the original bytenr fix routine to handle,
7429                  * don't record it.
7430                  */
7431                 if (ret == 0)
7432                         continue;
7433                 ret = 0;
7434                 orphan = malloc(sizeof(*orphan));
7435                 if (!orphan) {
7436                         ret = -ENOMEM;
7437                         goto out;
7438                 }
7439                 INIT_LIST_HEAD(&orphan->list);
7440                 orphan->root = dback->root;
7441                 orphan->objectid = dback->owner;
7442                 orphan->offset = dback->offset;
7443                 orphan->disk_bytenr = rec->cache.start;
7444                 orphan->disk_len = rec->cache.size;
7445                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7446                 recorded_data_ref = 1;
7447         }
7448 out:
7449         btrfs_free_path(path);
7450         if (!ret)
7451                 return !recorded_data_ref;
7452         else
7453                 return ret;
7454 }
7455
7456 /*
7457  * when an incorrect extent item is found, this will delete
7458  * all of the existing entries for it and recreate them
7459  * based on what the tree scan found.
7460  */
7461 static int fixup_extent_refs(struct btrfs_fs_info *info,
7462                              struct cache_tree *extent_cache,
7463                              struct extent_record *rec)
7464 {
7465         struct btrfs_trans_handle *trans = NULL;
7466         int ret;
7467         struct btrfs_path *path;
7468         struct cache_extent *cache;
7469         struct extent_backref *back, *tmp;
7470         int allocated = 0;
7471         u64 flags = 0;
7472
7473         if (rec->flag_block_full_backref)
7474                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7475
7476         path = btrfs_alloc_path();
7477         if (!path)
7478                 return -ENOMEM;
7479
7480         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7481                 /*
7482                  * Sometimes the backrefs themselves are so broken they don't
7483                  * get attached to any meaningful rec, so first go back and
7484                  * check any of our backrefs that we couldn't find and throw
7485                  * them into the list if we find the backref so that
7486                  * verify_backrefs can figure out what to do.
7487                  */
7488                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7489                 if (ret < 0)
7490                         goto out;
7491         }
7492
7493         /* step one, make sure all of the backrefs agree */
7494         ret = verify_backrefs(info, path, rec);
7495         if (ret < 0)
7496                 goto out;
7497
7498         trans = btrfs_start_transaction(info->extent_root, 1);
7499         if (IS_ERR(trans)) {
7500                 ret = PTR_ERR(trans);
7501                 goto out;
7502         }
7503
7504         /* step two, delete all the existing records */
7505         ret = delete_extent_records(trans, info->extent_root, path,
7506                                     rec->start, rec->max_size);
7507
7508         if (ret < 0)
7509                 goto out;
7510
7511         /* was this block corrupt?  If so, don't add references to it */
7512         cache = lookup_cache_extent(info->corrupt_blocks,
7513                                     rec->start, rec->max_size);
7514         if (cache) {
7515                 ret = 0;
7516                 goto out;
7517         }
7518
7519         /* step three, recreate all the refs we did find */
7520         rbtree_postorder_for_each_entry_safe(back, tmp,
7521                                              &rec->backref_tree, node) {
7522                 /*
7523                  * if we didn't find any references, don't create a
7524                  * new extent record
7525                  */
7526                 if (!back->found_ref)
7527                         continue;
7528
7529                 rec->bad_full_backref = 0;
7530                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7531                 allocated = 1;
7532
7533                 if (ret)
7534                         goto out;
7535         }
7536 out:
7537         if (trans) {
7538                 int err = btrfs_commit_transaction(trans, info->extent_root);
7539                 if (!ret)
7540                         ret = err;
7541         }
7542
7543         btrfs_free_path(path);
7544         return ret;
7545 }
7546
7547 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7548                               struct extent_record *rec)
7549 {
7550         struct btrfs_trans_handle *trans;
7551         struct btrfs_root *root = fs_info->extent_root;
7552         struct btrfs_path *path;
7553         struct btrfs_extent_item *ei;
7554         struct btrfs_key key;
7555         u64 flags;
7556         int ret = 0;
7557
7558         key.objectid = rec->start;
7559         if (rec->metadata) {
7560                 key.type = BTRFS_METADATA_ITEM_KEY;
7561                 key.offset = rec->info_level;
7562         } else {
7563                 key.type = BTRFS_EXTENT_ITEM_KEY;
7564                 key.offset = rec->max_size;
7565         }
7566
7567         path = btrfs_alloc_path();
7568         if (!path)
7569                 return -ENOMEM;
7570
7571         trans = btrfs_start_transaction(root, 0);
7572         if (IS_ERR(trans)) {
7573                 btrfs_free_path(path);
7574                 return PTR_ERR(trans);
7575         }
7576
7577         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7578         if (ret < 0) {
7579                 btrfs_free_path(path);
7580                 btrfs_commit_transaction(trans, root);
7581                 return ret;
7582         } else if (ret) {
7583                 fprintf(stderr, "Didn't find extent for %llu\n",
7584                         (unsigned long long)rec->start);
7585                 btrfs_free_path(path);
7586                 btrfs_commit_transaction(trans, root);
7587                 return -ENOENT;
7588         }
7589
7590         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7591                             struct btrfs_extent_item);
7592         flags = btrfs_extent_flags(path->nodes[0], ei);
7593         if (rec->flag_block_full_backref) {
7594                 fprintf(stderr, "setting full backref on %llu\n",
7595                         (unsigned long long)key.objectid);
7596                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7597         } else {
7598                 fprintf(stderr, "clearing full backref on %llu\n",
7599                         (unsigned long long)key.objectid);
7600                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7601         }
7602         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7603         btrfs_mark_buffer_dirty(path->nodes[0]);
7604         btrfs_free_path(path);
7605         return btrfs_commit_transaction(trans, root);
7606 }
7607
7608 /* right now we only prune from the extent allocation tree */
7609 static int prune_one_block(struct btrfs_trans_handle *trans,
7610                            struct btrfs_fs_info *info,
7611                            struct btrfs_corrupt_block *corrupt)
7612 {
7613         int ret;
7614         struct btrfs_path path;
7615         struct extent_buffer *eb;
7616         u64 found;
7617         int slot;
7618         int nritems;
7619         int level = corrupt->level + 1;
7620
7621         btrfs_init_path(&path);
7622 again:
7623         /* we want to stop at the parent to our busted block */
7624         path.lowest_level = level;
7625
7626         ret = btrfs_search_slot(trans, info->extent_root,
7627                                 &corrupt->key, &path, -1, 1);
7628
7629         if (ret < 0)
7630                 goto out;
7631
7632         eb = path.nodes[level];
7633         if (!eb) {
7634                 ret = -ENOENT;
7635                 goto out;
7636         }
7637
7638         /*
7639          * hopefully the search gave us the block we want to prune,
7640          * lets try that first
7641          */
7642         slot = path.slots[level];
7643         found =  btrfs_node_blockptr(eb, slot);
7644         if (found == corrupt->cache.start)
7645                 goto del_ptr;
7646
7647         nritems = btrfs_header_nritems(eb);
7648
7649         /* the search failed, lets scan this node and hope we find it */
7650         for (slot = 0; slot < nritems; slot++) {
7651                 found =  btrfs_node_blockptr(eb, slot);
7652                 if (found == corrupt->cache.start)
7653                         goto del_ptr;
7654         }
7655         /*
7656          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7657          * to this block
7658          */
7659         if (eb == info->extent_root->node) {
7660                 ret = -ENOENT;
7661                 goto out;
7662         } else {
7663                 level++;
7664                 btrfs_release_path(&path);
7665                 goto again;
7666         }
7667
7668 del_ptr:
7669         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7670         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7671
7672 out:
7673         btrfs_release_path(&path);
7674         return ret;
7675 }
7676
7677 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7678 {
7679         struct btrfs_trans_handle *trans = NULL;
7680         struct cache_extent *cache;
7681         struct btrfs_corrupt_block *corrupt;
7682
7683         while (1) {
7684                 cache = search_cache_extent(info->corrupt_blocks, 0);
7685                 if (!cache)
7686                         break;
7687                 if (!trans) {
7688                         trans = btrfs_start_transaction(info->extent_root, 1);
7689                         if (IS_ERR(trans))
7690                                 return PTR_ERR(trans);
7691                 }
7692                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7693                 prune_one_block(trans, info, corrupt);
7694                 remove_cache_extent(info->corrupt_blocks, cache);
7695         }
7696         if (trans)
7697                 return btrfs_commit_transaction(trans, info->extent_root);
7698         return 0;
7699 }
7700
7701 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7702 {
7703         struct btrfs_block_group_cache *cache;
7704         u64 start, end;
7705         int ret;
7706
7707         while (1) {
7708                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7709                                             &start, &end, EXTENT_DIRTY);
7710                 if (ret)
7711                         break;
7712                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7713                                    GFP_NOFS);
7714         }
7715
7716         start = 0;
7717         while (1) {
7718                 cache = btrfs_lookup_first_block_group(fs_info, start);
7719                 if (!cache)
7720                         break;
7721                 if (cache->cached)
7722                         cache->cached = 0;
7723                 start = cache->key.objectid + cache->key.offset;
7724         }
7725 }
7726
7727 static int check_extent_refs(struct btrfs_root *root,
7728                              struct cache_tree *extent_cache)
7729 {
7730         struct extent_record *rec;
7731         struct cache_extent *cache;
7732         int err = 0;
7733         int ret = 0;
7734         int fixed = 0;
7735         int had_dups = 0;
7736         int recorded = 0;
7737
7738         if (repair) {
7739                 /*
7740                  * if we're doing a repair, we have to make sure
7741                  * we don't allocate from the problem extents.
7742                  * In the worst case, this will be all the
7743                  * extents in the FS
7744                  */
7745                 cache = search_cache_extent(extent_cache, 0);
7746                 while(cache) {
7747                         rec = container_of(cache, struct extent_record, cache);
7748                         set_extent_dirty(root->fs_info->excluded_extents,
7749                                          rec->start,
7750                                          rec->start + rec->max_size - 1,
7751                                          GFP_NOFS);
7752                         cache = next_cache_extent(cache);
7753                 }
7754
7755                 /* pin down all the corrupted blocks too */
7756                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7757                 while(cache) {
7758                         set_extent_dirty(root->fs_info->excluded_extents,
7759                                          cache->start,
7760                                          cache->start + cache->size - 1,
7761                                          GFP_NOFS);
7762                         cache = next_cache_extent(cache);
7763                 }
7764                 prune_corrupt_blocks(root->fs_info);
7765                 reset_cached_block_groups(root->fs_info);
7766         }
7767
7768         reset_cached_block_groups(root->fs_info);
7769
7770         /*
7771          * We need to delete any duplicate entries we find first otherwise we
7772          * could mess up the extent tree when we have backrefs that actually
7773          * belong to a different extent item and not the weird duplicate one.
7774          */
7775         while (repair && !list_empty(&duplicate_extents)) {
7776                 rec = to_extent_record(duplicate_extents.next);
7777                 list_del_init(&rec->list);
7778
7779                 /* Sometimes we can find a backref before we find an actual
7780                  * extent, so we need to process it a little bit to see if there
7781                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7782                  * if this is a backref screwup.  If we need to delete stuff
7783                  * process_duplicates() will return 0, otherwise it will return
7784                  * 1 and we
7785                  */
7786                 if (process_duplicates(root, extent_cache, rec))
7787                         continue;
7788                 ret = delete_duplicate_records(root, rec);
7789                 if (ret < 0)
7790                         return ret;
7791                 /*
7792                  * delete_duplicate_records will return the number of entries
7793                  * deleted, so if it's greater than 0 then we know we actually
7794                  * did something and we need to remove.
7795                  */
7796                 if (ret)
7797                         had_dups = 1;
7798         }
7799
7800         if (had_dups)
7801                 return -EAGAIN;
7802
7803         while(1) {
7804                 int cur_err = 0;
7805
7806                 fixed = 0;
7807                 recorded = 0;
7808                 cache = search_cache_extent(extent_cache, 0);
7809                 if (!cache)
7810                         break;
7811                 rec = container_of(cache, struct extent_record, cache);
7812                 if (rec->num_duplicates) {
7813                         fprintf(stderr, "extent item %llu has multiple extent "
7814                                 "items\n", (unsigned long long)rec->start);
7815                         err = 1;
7816                         cur_err = 1;
7817                 }
7818
7819                 if (rec->refs != rec->extent_item_refs) {
7820                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7821                                 (unsigned long long)rec->start,
7822                                 (unsigned long long)rec->nr);
7823                         fprintf(stderr, "extent item %llu, found %llu\n",
7824                                 (unsigned long long)rec->extent_item_refs,
7825                                 (unsigned long long)rec->refs);
7826                         ret = record_orphan_data_extents(root->fs_info, rec);
7827                         if (ret < 0)
7828                                 goto repair_abort;
7829                         if (ret == 0) {
7830                                 recorded = 1;
7831                         } else {
7832                                 /*
7833                                  * we can't use the extent to repair file
7834                                  * extent, let the fallback method handle it.
7835                                  */
7836                                 if (!fixed && repair) {
7837                                         ret = fixup_extent_refs(
7838                                                         root->fs_info,
7839                                                         extent_cache, rec);
7840                                         if (ret)
7841                                                 goto repair_abort;
7842                                         fixed = 1;
7843                                 }
7844                         }
7845                         err = 1;
7846                         cur_err = 1;
7847                 }
7848                 if (all_backpointers_checked(rec, 1)) {
7849                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7850                                 (unsigned long long)rec->start,
7851                                 (unsigned long long)rec->nr);
7852
7853                         if (!fixed && !recorded && repair) {
7854                                 ret = fixup_extent_refs(root->fs_info,
7855                                                         extent_cache, rec);
7856                                 if (ret)
7857                                         goto repair_abort;
7858                                 fixed = 1;
7859                         }
7860                         cur_err = 1;
7861                         err = 1;
7862                 }
7863                 if (!rec->owner_ref_checked) {
7864                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7865                                 (unsigned long long)rec->start,
7866                                 (unsigned long long)rec->nr);
7867                         if (!fixed && !recorded && repair) {
7868                                 ret = fixup_extent_refs(root->fs_info,
7869                                                         extent_cache, rec);
7870                                 if (ret)
7871                                         goto repair_abort;
7872                                 fixed = 1;
7873                         }
7874                         err = 1;
7875                         cur_err = 1;
7876                 }
7877                 if (rec->bad_full_backref) {
7878                         fprintf(stderr, "bad full backref, on [%llu]\n",
7879                                 (unsigned long long)rec->start);
7880                         if (repair) {
7881                                 ret = fixup_extent_flags(root->fs_info, rec);
7882                                 if (ret)
7883                                         goto repair_abort;
7884                                 fixed = 1;
7885                         }
7886                         err = 1;
7887                         cur_err = 1;
7888                 }
7889                 /*
7890                  * Although it's not a extent ref's problem, we reuse this
7891                  * routine for error reporting.
7892                  * No repair function yet.
7893                  */
7894                 if (rec->crossing_stripes) {
7895                         fprintf(stderr,
7896                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7897                                 rec->start, rec->start + rec->max_size);
7898                         err = 1;
7899                         cur_err = 1;
7900                 }
7901
7902                 if (rec->wrong_chunk_type) {
7903                         fprintf(stderr,
7904                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7905                                 rec->start, rec->start + rec->max_size);
7906                         err = 1;
7907                         cur_err = 1;
7908                 }
7909
7910                 remove_cache_extent(extent_cache, cache);
7911                 free_all_extent_backrefs(rec);
7912                 if (!init_extent_tree && repair && (!cur_err || fixed))
7913                         clear_extent_dirty(root->fs_info->excluded_extents,
7914                                            rec->start,
7915                                            rec->start + rec->max_size - 1,
7916                                            GFP_NOFS);
7917                 free(rec);
7918         }
7919 repair_abort:
7920         if (repair) {
7921                 if (ret && ret != -EAGAIN) {
7922                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7923                         exit(1);
7924                 } else if (!ret) {
7925                         struct btrfs_trans_handle *trans;
7926
7927                         root = root->fs_info->extent_root;
7928                         trans = btrfs_start_transaction(root, 1);
7929                         if (IS_ERR(trans)) {
7930                                 ret = PTR_ERR(trans);
7931                                 goto repair_abort;
7932                         }
7933
7934                         btrfs_fix_block_accounting(trans, root);
7935                         ret = btrfs_commit_transaction(trans, root);
7936                         if (ret)
7937                                 goto repair_abort;
7938                 }
7939                 if (err)
7940                         fprintf(stderr, "repaired damaged extent references\n");
7941                 return ret;
7942         }
7943         return err;
7944 }
7945
7946 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7947 {
7948         u64 stripe_size;
7949
7950         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7951                 stripe_size = length;
7952                 stripe_size /= num_stripes;
7953         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7954                 stripe_size = length * 2;
7955                 stripe_size /= num_stripes;
7956         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7957                 stripe_size = length;
7958                 stripe_size /= (num_stripes - 1);
7959         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7960                 stripe_size = length;
7961                 stripe_size /= (num_stripes - 2);
7962         } else {
7963                 stripe_size = length;
7964         }
7965         return stripe_size;
7966 }
7967
7968 /*
7969  * Check the chunk with its block group/dev list ref:
7970  * Return 0 if all refs seems valid.
7971  * Return 1 if part of refs seems valid, need later check for rebuild ref
7972  * like missing block group and needs to search extent tree to rebuild them.
7973  * Return -1 if essential refs are missing and unable to rebuild.
7974  */
7975 static int check_chunk_refs(struct chunk_record *chunk_rec,
7976                             struct block_group_tree *block_group_cache,
7977                             struct device_extent_tree *dev_extent_cache,
7978                             int silent)
7979 {
7980         struct cache_extent *block_group_item;
7981         struct block_group_record *block_group_rec;
7982         struct cache_extent *dev_extent_item;
7983         struct device_extent_record *dev_extent_rec;
7984         u64 devid;
7985         u64 offset;
7986         u64 length;
7987         int metadump_v2 = 0;
7988         int i;
7989         int ret = 0;
7990
7991         block_group_item = lookup_cache_extent(&block_group_cache->tree,
7992                                                chunk_rec->offset,
7993                                                chunk_rec->length);
7994         if (block_group_item) {
7995                 block_group_rec = container_of(block_group_item,
7996                                                struct block_group_record,
7997                                                cache);
7998                 if (chunk_rec->length != block_group_rec->offset ||
7999                     chunk_rec->offset != block_group_rec->objectid ||
8000                     (!metadump_v2 &&
8001                      chunk_rec->type_flags != block_group_rec->flags)) {
8002                         if (!silent)
8003                                 fprintf(stderr,
8004                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8005                                         chunk_rec->objectid,
8006                                         chunk_rec->type,
8007                                         chunk_rec->offset,
8008                                         chunk_rec->length,
8009                                         chunk_rec->offset,
8010                                         chunk_rec->type_flags,
8011                                         block_group_rec->objectid,
8012                                         block_group_rec->type,
8013                                         block_group_rec->offset,
8014                                         block_group_rec->offset,
8015                                         block_group_rec->objectid,
8016                                         block_group_rec->flags);
8017                         ret = -1;
8018                 } else {
8019                         list_del_init(&block_group_rec->list);
8020                         chunk_rec->bg_rec = block_group_rec;
8021                 }
8022         } else {
8023                 if (!silent)
8024                         fprintf(stderr,
8025                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8026                                 chunk_rec->objectid,
8027                                 chunk_rec->type,
8028                                 chunk_rec->offset,
8029                                 chunk_rec->length,
8030                                 chunk_rec->offset,
8031                                 chunk_rec->type_flags);
8032                 ret = 1;
8033         }
8034
8035         if (metadump_v2)
8036                 return ret;
8037
8038         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8039                                     chunk_rec->num_stripes);
8040         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8041                 devid = chunk_rec->stripes[i].devid;
8042                 offset = chunk_rec->stripes[i].offset;
8043                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8044                                                        devid, offset, length);
8045                 if (dev_extent_item) {
8046                         dev_extent_rec = container_of(dev_extent_item,
8047                                                 struct device_extent_record,
8048                                                 cache);
8049                         if (dev_extent_rec->objectid != devid ||
8050                             dev_extent_rec->offset != offset ||
8051                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8052                             dev_extent_rec->length != length) {
8053                                 if (!silent)
8054                                         fprintf(stderr,
8055                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8056                                                 chunk_rec->objectid,
8057                                                 chunk_rec->type,
8058                                                 chunk_rec->offset,
8059                                                 chunk_rec->stripes[i].devid,
8060                                                 chunk_rec->stripes[i].offset,
8061                                                 dev_extent_rec->objectid,
8062                                                 dev_extent_rec->offset,
8063                                                 dev_extent_rec->length);
8064                                 ret = -1;
8065                         } else {
8066                                 list_move(&dev_extent_rec->chunk_list,
8067                                           &chunk_rec->dextents);
8068                         }
8069                 } else {
8070                         if (!silent)
8071                                 fprintf(stderr,
8072                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8073                                         chunk_rec->objectid,
8074                                         chunk_rec->type,
8075                                         chunk_rec->offset,
8076                                         chunk_rec->stripes[i].devid,
8077                                         chunk_rec->stripes[i].offset);
8078                         ret = -1;
8079                 }
8080         }
8081         return ret;
8082 }
8083
8084 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8085 int check_chunks(struct cache_tree *chunk_cache,
8086                  struct block_group_tree *block_group_cache,
8087                  struct device_extent_tree *dev_extent_cache,
8088                  struct list_head *good, struct list_head *bad,
8089                  struct list_head *rebuild, int silent)
8090 {
8091         struct cache_extent *chunk_item;
8092         struct chunk_record *chunk_rec;
8093         struct block_group_record *bg_rec;
8094         struct device_extent_record *dext_rec;
8095         int err;
8096         int ret = 0;
8097
8098         chunk_item = first_cache_extent(chunk_cache);
8099         while (chunk_item) {
8100                 chunk_rec = container_of(chunk_item, struct chunk_record,
8101                                          cache);
8102                 err = check_chunk_refs(chunk_rec, block_group_cache,
8103                                        dev_extent_cache, silent);
8104                 if (err < 0)
8105                         ret = err;
8106                 if (err == 0 && good)
8107                         list_add_tail(&chunk_rec->list, good);
8108                 if (err > 0 && rebuild)
8109                         list_add_tail(&chunk_rec->list, rebuild);
8110                 if (err < 0 && bad)
8111                         list_add_tail(&chunk_rec->list, bad);
8112                 chunk_item = next_cache_extent(chunk_item);
8113         }
8114
8115         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8116                 if (!silent)
8117                         fprintf(stderr,
8118                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8119                                 bg_rec->objectid,
8120                                 bg_rec->offset,
8121                                 bg_rec->flags);
8122                 if (!ret)
8123                         ret = 1;
8124         }
8125
8126         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8127                             chunk_list) {
8128                 if (!silent)
8129                         fprintf(stderr,
8130                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8131                                 dext_rec->objectid,
8132                                 dext_rec->offset,
8133                                 dext_rec->length);
8134                 if (!ret)
8135                         ret = 1;
8136         }
8137         return ret;
8138 }
8139
8140
8141 static int check_device_used(struct device_record *dev_rec,
8142                              struct device_extent_tree *dext_cache)
8143 {
8144         struct cache_extent *cache;
8145         struct device_extent_record *dev_extent_rec;
8146         u64 total_byte = 0;
8147
8148         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8149         while (cache) {
8150                 dev_extent_rec = container_of(cache,
8151                                               struct device_extent_record,
8152                                               cache);
8153                 if (dev_extent_rec->objectid != dev_rec->devid)
8154                         break;
8155
8156                 list_del_init(&dev_extent_rec->device_list);
8157                 total_byte += dev_extent_rec->length;
8158                 cache = next_cache_extent(cache);
8159         }
8160
8161         if (total_byte != dev_rec->byte_used) {
8162                 fprintf(stderr,
8163                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8164                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8165                         dev_rec->type, dev_rec->offset);
8166                 return -1;
8167         } else {
8168                 return 0;
8169         }
8170 }
8171
8172 /* check btrfs_dev_item -> btrfs_dev_extent */
8173 static int check_devices(struct rb_root *dev_cache,
8174                          struct device_extent_tree *dev_extent_cache)
8175 {
8176         struct rb_node *dev_node;
8177         struct device_record *dev_rec;
8178         struct device_extent_record *dext_rec;
8179         int err;
8180         int ret = 0;
8181
8182         dev_node = rb_first(dev_cache);
8183         while (dev_node) {
8184                 dev_rec = container_of(dev_node, struct device_record, node);
8185                 err = check_device_used(dev_rec, dev_extent_cache);
8186                 if (err)
8187                         ret = err;
8188
8189                 dev_node = rb_next(dev_node);
8190         }
8191         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8192                             device_list) {
8193                 fprintf(stderr,
8194                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8195                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8196                 if (!ret)
8197                         ret = 1;
8198         }
8199         return ret;
8200 }
8201
8202 static int add_root_item_to_list(struct list_head *head,
8203                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8204                                   u8 level, u8 drop_level,
8205                                   int level_size, struct btrfs_key *drop_key)
8206 {
8207
8208         struct root_item_record *ri_rec;
8209         ri_rec = malloc(sizeof(*ri_rec));
8210         if (!ri_rec)
8211                 return -ENOMEM;
8212         ri_rec->bytenr = bytenr;
8213         ri_rec->objectid = objectid;
8214         ri_rec->level = level;
8215         ri_rec->level_size = level_size;
8216         ri_rec->drop_level = drop_level;
8217         ri_rec->last_snapshot = last_snapshot;
8218         if (drop_key)
8219                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8220         list_add_tail(&ri_rec->list, head);
8221
8222         return 0;
8223 }
8224
8225 static void free_root_item_list(struct list_head *list)
8226 {
8227         struct root_item_record *ri_rec;
8228
8229         while (!list_empty(list)) {
8230                 ri_rec = list_first_entry(list, struct root_item_record,
8231                                           list);
8232                 list_del_init(&ri_rec->list);
8233                 free(ri_rec);
8234         }
8235 }
8236
8237 static int deal_root_from_list(struct list_head *list,
8238                                struct btrfs_root *root,
8239                                struct block_info *bits,
8240                                int bits_nr,
8241                                struct cache_tree *pending,
8242                                struct cache_tree *seen,
8243                                struct cache_tree *reada,
8244                                struct cache_tree *nodes,
8245                                struct cache_tree *extent_cache,
8246                                struct cache_tree *chunk_cache,
8247                                struct rb_root *dev_cache,
8248                                struct block_group_tree *block_group_cache,
8249                                struct device_extent_tree *dev_extent_cache)
8250 {
8251         int ret = 0;
8252         u64 last;
8253
8254         while (!list_empty(list)) {
8255                 struct root_item_record *rec;
8256                 struct extent_buffer *buf;
8257                 rec = list_entry(list->next,
8258                                  struct root_item_record, list);
8259                 last = 0;
8260                 buf = read_tree_block(root->fs_info->tree_root,
8261                                       rec->bytenr, rec->level_size, 0);
8262                 if (!extent_buffer_uptodate(buf)) {
8263                         free_extent_buffer(buf);
8264                         ret = -EIO;
8265                         break;
8266                 }
8267                 add_root_to_pending(buf, extent_cache, pending,
8268                                     seen, nodes, rec->objectid);
8269                 /*
8270                  * To rebuild extent tree, we need deal with snapshot
8271                  * one by one, otherwise we deal with node firstly which
8272                  * can maximize readahead.
8273                  */
8274                 while (1) {
8275                         ret = run_next_block(root, bits, bits_nr, &last,
8276                                              pending, seen, reada, nodes,
8277                                              extent_cache, chunk_cache,
8278                                              dev_cache, block_group_cache,
8279                                              dev_extent_cache, rec);
8280                         if (ret != 0)
8281                                 break;
8282                 }
8283                 free_extent_buffer(buf);
8284                 list_del(&rec->list);
8285                 free(rec);
8286                 if (ret < 0)
8287                         break;
8288         }
8289         while (ret >= 0) {
8290                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8291                                      reada, nodes, extent_cache, chunk_cache,
8292                                      dev_cache, block_group_cache,
8293                                      dev_extent_cache, NULL);
8294                 if (ret != 0) {
8295                         if (ret > 0)
8296                                 ret = 0;
8297                         break;
8298                 }
8299         }
8300         return ret;
8301 }
8302
8303 static int check_chunks_and_extents(struct btrfs_root *root)
8304 {
8305         struct rb_root dev_cache;
8306         struct cache_tree chunk_cache;
8307         struct block_group_tree block_group_cache;
8308         struct device_extent_tree dev_extent_cache;
8309         struct cache_tree extent_cache;
8310         struct cache_tree seen;
8311         struct cache_tree pending;
8312         struct cache_tree reada;
8313         struct cache_tree nodes;
8314         struct extent_io_tree excluded_extents;
8315         struct cache_tree corrupt_blocks;
8316         struct btrfs_path path;
8317         struct btrfs_key key;
8318         struct btrfs_key found_key;
8319         int ret, err = 0;
8320         struct block_info *bits;
8321         int bits_nr;
8322         struct extent_buffer *leaf;
8323         int slot;
8324         struct btrfs_root_item ri;
8325         struct list_head dropping_trees;
8326         struct list_head normal_trees;
8327         struct btrfs_root *root1;
8328         u64 objectid;
8329         u32 level_size;
8330         u8 level;
8331
8332         dev_cache = RB_ROOT;
8333         cache_tree_init(&chunk_cache);
8334         block_group_tree_init(&block_group_cache);
8335         device_extent_tree_init(&dev_extent_cache);
8336
8337         cache_tree_init(&extent_cache);
8338         cache_tree_init(&seen);
8339         cache_tree_init(&pending);
8340         cache_tree_init(&nodes);
8341         cache_tree_init(&reada);
8342         cache_tree_init(&corrupt_blocks);
8343         extent_io_tree_init(&excluded_extents);
8344         INIT_LIST_HEAD(&dropping_trees);
8345         INIT_LIST_HEAD(&normal_trees);
8346
8347         if (repair) {
8348                 root->fs_info->excluded_extents = &excluded_extents;
8349                 root->fs_info->fsck_extent_cache = &extent_cache;
8350                 root->fs_info->free_extent_hook = free_extent_hook;
8351                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8352         }
8353
8354         bits_nr = 1024;
8355         bits = malloc(bits_nr * sizeof(struct block_info));
8356         if (!bits) {
8357                 perror("malloc");
8358                 exit(1);
8359         }
8360
8361         if (ctx.progress_enabled) {
8362                 ctx.tp = TASK_EXTENTS;
8363                 task_start(ctx.info);
8364         }
8365
8366 again:
8367         root1 = root->fs_info->tree_root;
8368         level = btrfs_header_level(root1->node);
8369         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8370                                     root1->node->start, 0, level, 0,
8371                                     root1->nodesize, NULL);
8372         if (ret < 0)
8373                 goto out;
8374         root1 = root->fs_info->chunk_root;
8375         level = btrfs_header_level(root1->node);
8376         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8377                                     root1->node->start, 0, level, 0,
8378                                     root1->nodesize, NULL);
8379         if (ret < 0)
8380                 goto out;
8381         btrfs_init_path(&path);
8382         key.offset = 0;
8383         key.objectid = 0;
8384         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8385         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8386                                         &key, &path, 0, 0);
8387         if (ret < 0)
8388                 goto out;
8389         while(1) {
8390                 leaf = path.nodes[0];
8391                 slot = path.slots[0];
8392                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8393                         ret = btrfs_next_leaf(root, &path);
8394                         if (ret != 0)
8395                                 break;
8396                         leaf = path.nodes[0];
8397                         slot = path.slots[0];
8398                 }
8399                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8400                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8401                         unsigned long offset;
8402                         u64 last_snapshot;
8403
8404                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8405                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8406                         last_snapshot = btrfs_root_last_snapshot(&ri);
8407                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8408                                 level = btrfs_root_level(&ri);
8409                                 level_size = root->nodesize;
8410                                 ret = add_root_item_to_list(&normal_trees,
8411                                                 found_key.objectid,
8412                                                 btrfs_root_bytenr(&ri),
8413                                                 last_snapshot, level,
8414                                                 0, level_size, NULL);
8415                                 if (ret < 0)
8416                                         goto out;
8417                         } else {
8418                                 level = btrfs_root_level(&ri);
8419                                 level_size = root->nodesize;
8420                                 objectid = found_key.objectid;
8421                                 btrfs_disk_key_to_cpu(&found_key,
8422                                                       &ri.drop_progress);
8423                                 ret = add_root_item_to_list(&dropping_trees,
8424                                                 objectid,
8425                                                 btrfs_root_bytenr(&ri),
8426                                                 last_snapshot, level,
8427                                                 ri.drop_level,
8428                                                 level_size, &found_key);
8429                                 if (ret < 0)
8430                                         goto out;
8431                         }
8432                 }
8433                 path.slots[0]++;
8434         }
8435         btrfs_release_path(&path);
8436
8437         /*
8438          * check_block can return -EAGAIN if it fixes something, please keep
8439          * this in mind when dealing with return values from these functions, if
8440          * we get -EAGAIN we want to fall through and restart the loop.
8441          */
8442         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8443                                   &seen, &reada, &nodes, &extent_cache,
8444                                   &chunk_cache, &dev_cache, &block_group_cache,
8445                                   &dev_extent_cache);
8446         if (ret < 0) {
8447                 if (ret == -EAGAIN)
8448                         goto loop;
8449                 goto out;
8450         }
8451         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8452                                   &pending, &seen, &reada, &nodes,
8453                                   &extent_cache, &chunk_cache, &dev_cache,
8454                                   &block_group_cache, &dev_extent_cache);
8455         if (ret < 0) {
8456                 if (ret == -EAGAIN)
8457                         goto loop;
8458                 goto out;
8459         }
8460
8461         ret = check_chunks(&chunk_cache, &block_group_cache,
8462                            &dev_extent_cache, NULL, NULL, NULL, 0);
8463         if (ret) {
8464                 if (ret == -EAGAIN)
8465                         goto loop;
8466                 err = ret;
8467         }
8468
8469         ret = check_extent_refs(root, &extent_cache);
8470         if (ret < 0) {
8471                 if (ret == -EAGAIN)
8472                         goto loop;
8473                 goto out;
8474         }
8475
8476         ret = check_devices(&dev_cache, &dev_extent_cache);
8477         if (ret && err)
8478                 ret = err;
8479
8480 out:
8481         task_stop(ctx.info);
8482         if (repair) {
8483                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8484                 extent_io_tree_cleanup(&excluded_extents);
8485                 root->fs_info->fsck_extent_cache = NULL;
8486                 root->fs_info->free_extent_hook = NULL;
8487                 root->fs_info->corrupt_blocks = NULL;
8488                 root->fs_info->excluded_extents = NULL;
8489         }
8490         free(bits);
8491         free_chunk_cache_tree(&chunk_cache);
8492         free_device_cache_tree(&dev_cache);
8493         free_block_group_tree(&block_group_cache);
8494         free_device_extent_tree(&dev_extent_cache);
8495         free_extent_cache_tree(&seen);
8496         free_extent_cache_tree(&pending);
8497         free_extent_cache_tree(&reada);
8498         free_extent_cache_tree(&nodes);
8499         return ret;
8500 loop:
8501         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8502         free_extent_cache_tree(&seen);
8503         free_extent_cache_tree(&pending);
8504         free_extent_cache_tree(&reada);
8505         free_extent_cache_tree(&nodes);
8506         free_chunk_cache_tree(&chunk_cache);
8507         free_block_group_tree(&block_group_cache);
8508         free_device_cache_tree(&dev_cache);
8509         free_device_extent_tree(&dev_extent_cache);
8510         free_extent_record_cache(root->fs_info, &extent_cache);
8511         free_root_item_list(&normal_trees);
8512         free_root_item_list(&dropping_trees);
8513         extent_io_tree_cleanup(&excluded_extents);
8514         goto again;
8515 }
8516
8517 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8518                            struct btrfs_root *root, int overwrite)
8519 {
8520         struct extent_buffer *c;
8521         struct extent_buffer *old = root->node;
8522         int level;
8523         int ret;
8524         struct btrfs_disk_key disk_key = {0,0,0};
8525
8526         level = 0;
8527
8528         if (overwrite) {
8529                 c = old;
8530                 extent_buffer_get(c);
8531                 goto init;
8532         }
8533         c = btrfs_alloc_free_block(trans, root,
8534                                    root->nodesize,
8535                                    root->root_key.objectid,
8536                                    &disk_key, level, 0, 0);
8537         if (IS_ERR(c)) {
8538                 c = old;
8539                 extent_buffer_get(c);
8540                 overwrite = 1;
8541         }
8542 init:
8543         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8544         btrfs_set_header_level(c, level);
8545         btrfs_set_header_bytenr(c, c->start);
8546         btrfs_set_header_generation(c, trans->transid);
8547         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8548         btrfs_set_header_owner(c, root->root_key.objectid);
8549
8550         write_extent_buffer(c, root->fs_info->fsid,
8551                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8552
8553         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8554                             btrfs_header_chunk_tree_uuid(c),
8555                             BTRFS_UUID_SIZE);
8556
8557         btrfs_mark_buffer_dirty(c);
8558         /*
8559          * this case can happen in the following case:
8560          *
8561          * 1.overwrite previous root.
8562          *
8563          * 2.reinit reloc data root, this is because we skip pin
8564          * down reloc data tree before which means we can allocate
8565          * same block bytenr here.
8566          */
8567         if (old->start == c->start) {
8568                 btrfs_set_root_generation(&root->root_item,
8569                                           trans->transid);
8570                 root->root_item.level = btrfs_header_level(root->node);
8571                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8572                                         &root->root_key, &root->root_item);
8573                 if (ret) {
8574                         free_extent_buffer(c);
8575                         return ret;
8576                 }
8577         }
8578         free_extent_buffer(old);
8579         root->node = c;
8580         add_root_to_dirty_list(root);
8581         return 0;
8582 }
8583
8584 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8585                                 struct extent_buffer *eb, int tree_root)
8586 {
8587         struct extent_buffer *tmp;
8588         struct btrfs_root_item *ri;
8589         struct btrfs_key key;
8590         u64 bytenr;
8591         u32 nodesize;
8592         int level = btrfs_header_level(eb);
8593         int nritems;
8594         int ret;
8595         int i;
8596
8597         /*
8598          * If we have pinned this block before, don't pin it again.
8599          * This can not only avoid forever loop with broken filesystem
8600          * but also give us some speedups.
8601          */
8602         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8603                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8604                 return 0;
8605
8606         btrfs_pin_extent(fs_info, eb->start, eb->len);
8607
8608         nodesize = btrfs_super_nodesize(fs_info->super_copy);
8609         nritems = btrfs_header_nritems(eb);
8610         for (i = 0; i < nritems; i++) {
8611                 if (level == 0) {
8612                         btrfs_item_key_to_cpu(eb, &key, i);
8613                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8614                                 continue;
8615                         /* Skip the extent root and reloc roots */
8616                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8617                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8618                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8619                                 continue;
8620                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8621                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8622
8623                         /*
8624                          * If at any point we start needing the real root we
8625                          * will have to build a stump root for the root we are
8626                          * in, but for now this doesn't actually use the root so
8627                          * just pass in extent_root.
8628                          */
8629                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8630                                               nodesize, 0);
8631                         if (!extent_buffer_uptodate(tmp)) {
8632                                 fprintf(stderr, "Error reading root block\n");
8633                                 return -EIO;
8634                         }
8635                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8636                         free_extent_buffer(tmp);
8637                         if (ret)
8638                                 return ret;
8639                 } else {
8640                         bytenr = btrfs_node_blockptr(eb, i);
8641
8642                         /* If we aren't the tree root don't read the block */
8643                         if (level == 1 && !tree_root) {
8644                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
8645                                 continue;
8646                         }
8647
8648                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8649                                               nodesize, 0);
8650                         if (!extent_buffer_uptodate(tmp)) {
8651                                 fprintf(stderr, "Error reading tree block\n");
8652                                 return -EIO;
8653                         }
8654                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8655                         free_extent_buffer(tmp);
8656                         if (ret)
8657                                 return ret;
8658                 }
8659         }
8660
8661         return 0;
8662 }
8663
8664 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8665 {
8666         int ret;
8667
8668         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8669         if (ret)
8670                 return ret;
8671
8672         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8673 }
8674
8675 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8676 {
8677         struct btrfs_block_group_cache *cache;
8678         struct btrfs_path *path;
8679         struct extent_buffer *leaf;
8680         struct btrfs_chunk *chunk;
8681         struct btrfs_key key;
8682         int ret;
8683         u64 start;
8684
8685         path = btrfs_alloc_path();
8686         if (!path)
8687                 return -ENOMEM;
8688
8689         key.objectid = 0;
8690         key.type = BTRFS_CHUNK_ITEM_KEY;
8691         key.offset = 0;
8692
8693         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8694         if (ret < 0) {
8695                 btrfs_free_path(path);
8696                 return ret;
8697         }
8698
8699         /*
8700          * We do this in case the block groups were screwed up and had alloc
8701          * bits that aren't actually set on the chunks.  This happens with
8702          * restored images every time and could happen in real life I guess.
8703          */
8704         fs_info->avail_data_alloc_bits = 0;
8705         fs_info->avail_metadata_alloc_bits = 0;
8706         fs_info->avail_system_alloc_bits = 0;
8707
8708         /* First we need to create the in-memory block groups */
8709         while (1) {
8710                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8711                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
8712                         if (ret < 0) {
8713                                 btrfs_free_path(path);
8714                                 return ret;
8715                         }
8716                         if (ret) {
8717                                 ret = 0;
8718                                 break;
8719                         }
8720                 }
8721                 leaf = path->nodes[0];
8722                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8723                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8724                         path->slots[0]++;
8725                         continue;
8726                 }
8727
8728                 chunk = btrfs_item_ptr(leaf, path->slots[0],
8729                                        struct btrfs_chunk);
8730                 btrfs_add_block_group(fs_info, 0,
8731                                       btrfs_chunk_type(leaf, chunk),
8732                                       key.objectid, key.offset,
8733                                       btrfs_chunk_length(leaf, chunk));
8734                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8735                                  key.offset + btrfs_chunk_length(leaf, chunk),
8736                                  GFP_NOFS);
8737                 path->slots[0]++;
8738         }
8739         start = 0;
8740         while (1) {
8741                 cache = btrfs_lookup_first_block_group(fs_info, start);
8742                 if (!cache)
8743                         break;
8744                 cache->cached = 1;
8745                 start = cache->key.objectid + cache->key.offset;
8746         }
8747
8748         btrfs_free_path(path);
8749         return 0;
8750 }
8751
8752 static int reset_balance(struct btrfs_trans_handle *trans,
8753                          struct btrfs_fs_info *fs_info)
8754 {
8755         struct btrfs_root *root = fs_info->tree_root;
8756         struct btrfs_path *path;
8757         struct extent_buffer *leaf;
8758         struct btrfs_key key;
8759         int del_slot, del_nr = 0;
8760         int ret;
8761         int found = 0;
8762
8763         path = btrfs_alloc_path();
8764         if (!path)
8765                 return -ENOMEM;
8766
8767         key.objectid = BTRFS_BALANCE_OBJECTID;
8768         key.type = BTRFS_BALANCE_ITEM_KEY;
8769         key.offset = 0;
8770
8771         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8772         if (ret) {
8773                 if (ret > 0)
8774                         ret = 0;
8775                 if (!ret)
8776                         goto reinit_data_reloc;
8777                 else
8778                         goto out;
8779         }
8780
8781         ret = btrfs_del_item(trans, root, path);
8782         if (ret)
8783                 goto out;
8784         btrfs_release_path(path);
8785
8786         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8787         key.type = BTRFS_ROOT_ITEM_KEY;
8788         key.offset = 0;
8789
8790         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8791         if (ret < 0)
8792                 goto out;
8793         while (1) {
8794                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8795                         if (!found)
8796                                 break;
8797
8798                         if (del_nr) {
8799                                 ret = btrfs_del_items(trans, root, path,
8800                                                       del_slot, del_nr);
8801                                 del_nr = 0;
8802                                 if (ret)
8803                                         goto out;
8804                         }
8805                         key.offset++;
8806                         btrfs_release_path(path);
8807
8808                         found = 0;
8809                         ret = btrfs_search_slot(trans, root, &key, path,
8810                                                 -1, 1);
8811                         if (ret < 0)
8812                                 goto out;
8813                         continue;
8814                 }
8815                 found = 1;
8816                 leaf = path->nodes[0];
8817                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8818                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8819                         break;
8820                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8821                         path->slots[0]++;
8822                         continue;
8823                 }
8824                 if (!del_nr) {
8825                         del_slot = path->slots[0];
8826                         del_nr = 1;
8827                 } else {
8828                         del_nr++;
8829                 }
8830                 path->slots[0]++;
8831         }
8832
8833         if (del_nr) {
8834                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8835                 if (ret)
8836                         goto out;
8837         }
8838         btrfs_release_path(path);
8839
8840 reinit_data_reloc:
8841         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8842         key.type = BTRFS_ROOT_ITEM_KEY;
8843         key.offset = (u64)-1;
8844         root = btrfs_read_fs_root(fs_info, &key);
8845         if (IS_ERR(root)) {
8846                 fprintf(stderr, "Error reading data reloc tree\n");
8847                 ret = PTR_ERR(root);
8848                 goto out;
8849         }
8850         record_root_in_trans(trans, root);
8851         ret = btrfs_fsck_reinit_root(trans, root, 0);
8852         if (ret)
8853                 goto out;
8854         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8855 out:
8856         btrfs_free_path(path);
8857         return ret;
8858 }
8859
8860 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8861                               struct btrfs_fs_info *fs_info)
8862 {
8863         u64 start = 0;
8864         int ret;
8865
8866         /*
8867          * The only reason we don't do this is because right now we're just
8868          * walking the trees we find and pinning down their bytes, we don't look
8869          * at any of the leaves.  In order to do mixed groups we'd have to check
8870          * the leaves of any fs roots and pin down the bytes for any file
8871          * extents we find.  Not hard but why do it if we don't have to?
8872          */
8873         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8874                 fprintf(stderr, "We don't support re-initing the extent tree "
8875                         "for mixed block groups yet, please notify a btrfs "
8876                         "developer you want to do this so they can add this "
8877                         "functionality.\n");
8878                 return -EINVAL;
8879         }
8880
8881         /*
8882          * first we need to walk all of the trees except the extent tree and pin
8883          * down the bytes that are in use so we don't overwrite any existing
8884          * metadata.
8885          */
8886         ret = pin_metadata_blocks(fs_info);
8887         if (ret) {
8888                 fprintf(stderr, "error pinning down used bytes\n");
8889                 return ret;
8890         }
8891
8892         /*
8893          * Need to drop all the block groups since we're going to recreate all
8894          * of them again.
8895          */
8896         btrfs_free_block_groups(fs_info);
8897         ret = reset_block_groups(fs_info);
8898         if (ret) {
8899                 fprintf(stderr, "error resetting the block groups\n");
8900                 return ret;
8901         }
8902
8903         /* Ok we can allocate now, reinit the extent root */
8904         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8905         if (ret) {
8906                 fprintf(stderr, "extent root initialization failed\n");
8907                 /*
8908                  * When the transaction code is updated we should end the
8909                  * transaction, but for now progs only knows about commit so
8910                  * just return an error.
8911                  */
8912                 return ret;
8913         }
8914
8915         /*
8916          * Now we have all the in-memory block groups setup so we can make
8917          * allocations properly, and the metadata we care about is safe since we
8918          * pinned all of it above.
8919          */
8920         while (1) {
8921                 struct btrfs_block_group_cache *cache;
8922
8923                 cache = btrfs_lookup_first_block_group(fs_info, start);
8924                 if (!cache)
8925                         break;
8926                 start = cache->key.objectid + cache->key.offset;
8927                 ret = btrfs_insert_item(trans, fs_info->extent_root,
8928                                         &cache->key, &cache->item,
8929                                         sizeof(cache->item));
8930                 if (ret) {
8931                         fprintf(stderr, "Error adding block group\n");
8932                         return ret;
8933                 }
8934                 btrfs_extent_post_op(trans, fs_info->extent_root);
8935         }
8936
8937         ret = reset_balance(trans, fs_info);
8938         if (ret)
8939                 fprintf(stderr, "error resetting the pending balance\n");
8940
8941         return ret;
8942 }
8943
8944 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8945 {
8946         struct btrfs_path *path;
8947         struct btrfs_trans_handle *trans;
8948         struct btrfs_key key;
8949         int ret;
8950
8951         printf("Recowing metadata block %llu\n", eb->start);
8952         key.objectid = btrfs_header_owner(eb);
8953         key.type = BTRFS_ROOT_ITEM_KEY;
8954         key.offset = (u64)-1;
8955
8956         root = btrfs_read_fs_root(root->fs_info, &key);
8957         if (IS_ERR(root)) {
8958                 fprintf(stderr, "Couldn't find owner root %llu\n",
8959                         key.objectid);
8960                 return PTR_ERR(root);
8961         }
8962
8963         path = btrfs_alloc_path();
8964         if (!path)
8965                 return -ENOMEM;
8966
8967         trans = btrfs_start_transaction(root, 1);
8968         if (IS_ERR(trans)) {
8969                 btrfs_free_path(path);
8970                 return PTR_ERR(trans);
8971         }
8972
8973         path->lowest_level = btrfs_header_level(eb);
8974         if (path->lowest_level)
8975                 btrfs_node_key_to_cpu(eb, &key, 0);
8976         else
8977                 btrfs_item_key_to_cpu(eb, &key, 0);
8978
8979         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8980         btrfs_commit_transaction(trans, root);
8981         btrfs_free_path(path);
8982         return ret;
8983 }
8984
8985 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8986 {
8987         struct btrfs_path *path;
8988         struct btrfs_trans_handle *trans;
8989         struct btrfs_key key;
8990         int ret;
8991
8992         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8993                bad->key.type, bad->key.offset);
8994         key.objectid = bad->root_id;
8995         key.type = BTRFS_ROOT_ITEM_KEY;
8996         key.offset = (u64)-1;
8997
8998         root = btrfs_read_fs_root(root->fs_info, &key);
8999         if (IS_ERR(root)) {
9000                 fprintf(stderr, "Couldn't find owner root %llu\n",
9001                         key.objectid);
9002                 return PTR_ERR(root);
9003         }
9004
9005         path = btrfs_alloc_path();
9006         if (!path)
9007                 return -ENOMEM;
9008
9009         trans = btrfs_start_transaction(root, 1);
9010         if (IS_ERR(trans)) {
9011                 btrfs_free_path(path);
9012                 return PTR_ERR(trans);
9013         }
9014
9015         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
9016         if (ret) {
9017                 if (ret > 0)
9018                         ret = 0;
9019                 goto out;
9020         }
9021         ret = btrfs_del_item(trans, root, path);
9022 out:
9023         btrfs_commit_transaction(trans, root);
9024         btrfs_free_path(path);
9025         return ret;
9026 }
9027
9028 static int zero_log_tree(struct btrfs_root *root)
9029 {
9030         struct btrfs_trans_handle *trans;
9031         int ret;
9032
9033         trans = btrfs_start_transaction(root, 1);
9034         if (IS_ERR(trans)) {
9035                 ret = PTR_ERR(trans);
9036                 return ret;
9037         }
9038         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
9039         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
9040         ret = btrfs_commit_transaction(trans, root);
9041         return ret;
9042 }
9043
9044 static int populate_csum(struct btrfs_trans_handle *trans,
9045                          struct btrfs_root *csum_root, char *buf, u64 start,
9046                          u64 len)
9047 {
9048         u64 offset = 0;
9049         u64 sectorsize;
9050         int ret = 0;
9051
9052         while (offset < len) {
9053                 sectorsize = csum_root->sectorsize;
9054                 ret = read_extent_data(csum_root, buf, start + offset,
9055                                        &sectorsize, 0);
9056                 if (ret)
9057                         break;
9058                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
9059                                             start + offset, buf, sectorsize);
9060                 if (ret)
9061                         break;
9062                 offset += sectorsize;
9063         }
9064         return ret;
9065 }
9066
9067 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
9068                                       struct btrfs_root *csum_root,
9069                                       struct btrfs_root *cur_root)
9070 {
9071         struct btrfs_path *path;
9072         struct btrfs_key key;
9073         struct extent_buffer *node;
9074         struct btrfs_file_extent_item *fi;
9075         char *buf = NULL;
9076         u64 start = 0;
9077         u64 len = 0;
9078         int slot = 0;
9079         int ret = 0;
9080
9081         path = btrfs_alloc_path();
9082         if (!path)
9083                 return -ENOMEM;
9084         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
9085         if (!buf) {
9086                 ret = -ENOMEM;
9087                 goto out;
9088         }
9089
9090         key.objectid = 0;
9091         key.offset = 0;
9092         key.type = 0;
9093
9094         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
9095         if (ret < 0)
9096                 goto out;
9097         /* Iterate all regular file extents and fill its csum */
9098         while (1) {
9099                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
9100
9101                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9102                         goto next;
9103                 node = path->nodes[0];
9104                 slot = path->slots[0];
9105                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
9106                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
9107                         goto next;
9108                 start = btrfs_file_extent_disk_bytenr(node, fi);
9109                 len = btrfs_file_extent_disk_num_bytes(node, fi);
9110
9111                 ret = populate_csum(trans, csum_root, buf, start, len);
9112                 if (ret == -EEXIST)
9113                         ret = 0;
9114                 if (ret < 0)
9115                         goto out;
9116 next:
9117                 /*
9118                  * TODO: if next leaf is corrupted, jump to nearest next valid
9119                  * leaf.
9120                  */
9121                 ret = btrfs_next_item(cur_root, path);
9122                 if (ret < 0)
9123                         goto out;
9124                 if (ret > 0) {
9125                         ret = 0;
9126                         goto out;
9127                 }
9128         }
9129
9130 out:
9131         btrfs_free_path(path);
9132         free(buf);
9133         return ret;
9134 }
9135
9136 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
9137                                   struct btrfs_root *csum_root)
9138 {
9139         struct btrfs_fs_info *fs_info = csum_root->fs_info;
9140         struct btrfs_path *path;
9141         struct btrfs_root *tree_root = fs_info->tree_root;
9142         struct btrfs_root *cur_root;
9143         struct extent_buffer *node;
9144         struct btrfs_key key;
9145         int slot = 0;
9146         int ret = 0;
9147
9148         path = btrfs_alloc_path();
9149         if (!path)
9150                 return -ENOMEM;
9151
9152         key.objectid = BTRFS_FS_TREE_OBJECTID;
9153         key.offset = 0;
9154         key.type = BTRFS_ROOT_ITEM_KEY;
9155
9156         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
9157         if (ret < 0)
9158                 goto out;
9159         if (ret > 0) {
9160                 ret = -ENOENT;
9161                 goto out;
9162         }
9163
9164         while (1) {
9165                 node = path->nodes[0];
9166                 slot = path->slots[0];
9167                 btrfs_item_key_to_cpu(node, &key, slot);
9168                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
9169                         goto out;
9170                 if (key.type != BTRFS_ROOT_ITEM_KEY)
9171                         goto next;
9172                 if (!is_fstree(key.objectid))
9173                         goto next;
9174                 key.offset = (u64)-1;
9175
9176                 cur_root = btrfs_read_fs_root(fs_info, &key);
9177                 if (IS_ERR(cur_root) || !cur_root) {
9178                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
9179                                 key.objectid);
9180                         goto out;
9181                 }
9182                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
9183                                 cur_root);
9184                 if (ret < 0)
9185                         goto out;
9186 next:
9187                 ret = btrfs_next_item(tree_root, path);
9188                 if (ret > 0) {
9189                         ret = 0;
9190                         goto out;
9191                 }
9192                 if (ret < 0)
9193                         goto out;
9194         }
9195
9196 out:
9197         btrfs_free_path(path);
9198         return ret;
9199 }
9200
9201 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9202                                       struct btrfs_root *csum_root)
9203 {
9204         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9205         struct btrfs_path *path;
9206         struct btrfs_extent_item *ei;
9207         struct extent_buffer *leaf;
9208         char *buf;
9209         struct btrfs_key key;
9210         int ret;
9211
9212         path = btrfs_alloc_path();
9213         if (!path)
9214                 return -ENOMEM;
9215
9216         key.objectid = 0;
9217         key.type = BTRFS_EXTENT_ITEM_KEY;
9218         key.offset = 0;
9219
9220         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9221         if (ret < 0) {
9222                 btrfs_free_path(path);
9223                 return ret;
9224         }
9225
9226         buf = malloc(csum_root->sectorsize);
9227         if (!buf) {
9228                 btrfs_free_path(path);
9229                 return -ENOMEM;
9230         }
9231
9232         while (1) {
9233                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9234                         ret = btrfs_next_leaf(extent_root, path);
9235                         if (ret < 0)
9236                                 break;
9237                         if (ret) {
9238                                 ret = 0;
9239                                 break;
9240                         }
9241                 }
9242                 leaf = path->nodes[0];
9243
9244                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9245                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9246                         path->slots[0]++;
9247                         continue;
9248                 }
9249
9250                 ei = btrfs_item_ptr(leaf, path->slots[0],
9251                                     struct btrfs_extent_item);
9252                 if (!(btrfs_extent_flags(leaf, ei) &
9253                       BTRFS_EXTENT_FLAG_DATA)) {
9254                         path->slots[0]++;
9255                         continue;
9256                 }
9257
9258                 ret = populate_csum(trans, csum_root, buf, key.objectid,
9259                                     key.offset);
9260                 if (ret)
9261                         break;
9262                 path->slots[0]++;
9263         }
9264
9265         btrfs_free_path(path);
9266         free(buf);
9267         return ret;
9268 }
9269
9270 /*
9271  * Recalculate the csum and put it into the csum tree.
9272  *
9273  * Extent tree init will wipe out all the extent info, so in that case, we
9274  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
9275  * will use fs/subvol trees to init the csum tree.
9276  */
9277 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9278                           struct btrfs_root *csum_root,
9279                           int search_fs_tree)
9280 {
9281         if (search_fs_tree)
9282                 return fill_csum_tree_from_fs(trans, csum_root);
9283         else
9284                 return fill_csum_tree_from_extent(trans, csum_root);
9285 }
9286
9287 static void free_roots_info_cache(void)
9288 {
9289         if (!roots_info_cache)
9290                 return;
9291
9292         while (!cache_tree_empty(roots_info_cache)) {
9293                 struct cache_extent *entry;
9294                 struct root_item_info *rii;
9295
9296                 entry = first_cache_extent(roots_info_cache);
9297                 if (!entry)
9298                         break;
9299                 remove_cache_extent(roots_info_cache, entry);
9300                 rii = container_of(entry, struct root_item_info, cache_extent);
9301                 free(rii);
9302         }
9303
9304         free(roots_info_cache);
9305         roots_info_cache = NULL;
9306 }
9307
9308 static int build_roots_info_cache(struct btrfs_fs_info *info)
9309 {
9310         int ret = 0;
9311         struct btrfs_key key;
9312         struct extent_buffer *leaf;
9313         struct btrfs_path *path;
9314
9315         if (!roots_info_cache) {
9316                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9317                 if (!roots_info_cache)
9318                         return -ENOMEM;
9319                 cache_tree_init(roots_info_cache);
9320         }
9321
9322         path = btrfs_alloc_path();
9323         if (!path)
9324                 return -ENOMEM;
9325
9326         key.objectid = 0;
9327         key.type = BTRFS_EXTENT_ITEM_KEY;
9328         key.offset = 0;
9329
9330         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9331         if (ret < 0)
9332                 goto out;
9333         leaf = path->nodes[0];
9334
9335         while (1) {
9336                 struct btrfs_key found_key;
9337                 struct btrfs_extent_item *ei;
9338                 struct btrfs_extent_inline_ref *iref;
9339                 int slot = path->slots[0];
9340                 int type;
9341                 u64 flags;
9342                 u64 root_id;
9343                 u8 level;
9344                 struct cache_extent *entry;
9345                 struct root_item_info *rii;
9346
9347                 if (slot >= btrfs_header_nritems(leaf)) {
9348                         ret = btrfs_next_leaf(info->extent_root, path);
9349                         if (ret < 0) {
9350                                 break;
9351                         } else if (ret) {
9352                                 ret = 0;
9353                                 break;
9354                         }
9355                         leaf = path->nodes[0];
9356                         slot = path->slots[0];
9357                 }
9358
9359                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9360
9361                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9362                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9363                         goto next;
9364
9365                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9366                 flags = btrfs_extent_flags(leaf, ei);
9367
9368                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9369                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9370                         goto next;
9371
9372                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9373                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9374                         level = found_key.offset;
9375                 } else {
9376                         struct btrfs_tree_block_info *binfo;
9377
9378                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
9379                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9380                         level = btrfs_tree_block_level(leaf, binfo);
9381                 }
9382
9383                 /*
9384                  * For a root extent, it must be of the following type and the
9385                  * first (and only one) iref in the item.
9386                  */
9387                 type = btrfs_extent_inline_ref_type(leaf, iref);
9388                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9389                         goto next;
9390
9391                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9392                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9393                 if (!entry) {
9394                         rii = malloc(sizeof(struct root_item_info));
9395                         if (!rii) {
9396                                 ret = -ENOMEM;
9397                                 goto out;
9398                         }
9399                         rii->cache_extent.start = root_id;
9400                         rii->cache_extent.size = 1;
9401                         rii->level = (u8)-1;
9402                         entry = &rii->cache_extent;
9403                         ret = insert_cache_extent(roots_info_cache, entry);
9404                         ASSERT(ret == 0);
9405                 } else {
9406                         rii = container_of(entry, struct root_item_info,
9407                                            cache_extent);
9408                 }
9409
9410                 ASSERT(rii->cache_extent.start == root_id);
9411                 ASSERT(rii->cache_extent.size == 1);
9412
9413                 if (level > rii->level || rii->level == (u8)-1) {
9414                         rii->level = level;
9415                         rii->bytenr = found_key.objectid;
9416                         rii->gen = btrfs_extent_generation(leaf, ei);
9417                         rii->node_count = 1;
9418                 } else if (level == rii->level) {
9419                         rii->node_count++;
9420                 }
9421 next:
9422                 path->slots[0]++;
9423         }
9424
9425 out:
9426         btrfs_free_path(path);
9427
9428         return ret;
9429 }
9430
9431 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9432                                   struct btrfs_path *path,
9433                                   const struct btrfs_key *root_key,
9434                                   const int read_only_mode)
9435 {
9436         const u64 root_id = root_key->objectid;
9437         struct cache_extent *entry;
9438         struct root_item_info *rii;
9439         struct btrfs_root_item ri;
9440         unsigned long offset;
9441
9442         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9443         if (!entry) {
9444                 fprintf(stderr,
9445                         "Error: could not find extent items for root %llu\n",
9446                         root_key->objectid);
9447                 return -ENOENT;
9448         }
9449
9450         rii = container_of(entry, struct root_item_info, cache_extent);
9451         ASSERT(rii->cache_extent.start == root_id);
9452         ASSERT(rii->cache_extent.size == 1);
9453
9454         if (rii->node_count != 1) {
9455                 fprintf(stderr,
9456                         "Error: could not find btree root extent for root %llu\n",
9457                         root_id);
9458                 return -ENOENT;
9459         }
9460
9461         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9462         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9463
9464         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9465             btrfs_root_level(&ri) != rii->level ||
9466             btrfs_root_generation(&ri) != rii->gen) {
9467
9468                 /*
9469                  * If we're in repair mode but our caller told us to not update
9470                  * the root item, i.e. just check if it needs to be updated, don't
9471                  * print this message, since the caller will call us again shortly
9472                  * for the same root item without read only mode (the caller will
9473                  * open a transaction first).
9474                  */
9475                 if (!(read_only_mode && repair))
9476                         fprintf(stderr,
9477                                 "%sroot item for root %llu,"
9478                                 " current bytenr %llu, current gen %llu, current level %u,"
9479                                 " new bytenr %llu, new gen %llu, new level %u\n",
9480                                 (read_only_mode ? "" : "fixing "),
9481                                 root_id,
9482                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9483                                 btrfs_root_level(&ri),
9484                                 rii->bytenr, rii->gen, rii->level);
9485
9486                 if (btrfs_root_generation(&ri) > rii->gen) {
9487                         fprintf(stderr,
9488                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9489                                 root_id, btrfs_root_generation(&ri), rii->gen);
9490                         return -EINVAL;
9491                 }
9492
9493                 if (!read_only_mode) {
9494                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9495                         btrfs_set_root_level(&ri, rii->level);
9496                         btrfs_set_root_generation(&ri, rii->gen);
9497                         write_extent_buffer(path->nodes[0], &ri,
9498                                             offset, sizeof(ri));
9499                 }
9500
9501                 return 1;
9502         }
9503
9504         return 0;
9505 }
9506
9507 /*
9508  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9509  * caused read-only snapshots to be corrupted if they were created at a moment
9510  * when the source subvolume/snapshot had orphan items. The issue was that the
9511  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9512  * node instead of the post orphan cleanup root node.
9513  * So this function, and its callees, just detects and fixes those cases. Even
9514  * though the regression was for read-only snapshots, this function applies to
9515  * any snapshot/subvolume root.
9516  * This must be run before any other repair code - not doing it so, makes other
9517  * repair code delete or modify backrefs in the extent tree for example, which
9518  * will result in an inconsistent fs after repairing the root items.
9519  */
9520 static int repair_root_items(struct btrfs_fs_info *info)
9521 {
9522         struct btrfs_path *path = NULL;
9523         struct btrfs_key key;
9524         struct extent_buffer *leaf;
9525         struct btrfs_trans_handle *trans = NULL;
9526         int ret = 0;
9527         int bad_roots = 0;
9528         int need_trans = 0;
9529
9530         ret = build_roots_info_cache(info);
9531         if (ret)
9532                 goto out;
9533
9534         path = btrfs_alloc_path();
9535         if (!path) {
9536                 ret = -ENOMEM;
9537                 goto out;
9538         }
9539
9540         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9541         key.type = BTRFS_ROOT_ITEM_KEY;
9542         key.offset = 0;
9543
9544 again:
9545         /*
9546          * Avoid opening and committing transactions if a leaf doesn't have
9547          * any root items that need to be fixed, so that we avoid rotating
9548          * backup roots unnecessarily.
9549          */
9550         if (need_trans) {
9551                 trans = btrfs_start_transaction(info->tree_root, 1);
9552                 if (IS_ERR(trans)) {
9553                         ret = PTR_ERR(trans);
9554                         goto out;
9555                 }
9556         }
9557
9558         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9559                                 0, trans ? 1 : 0);
9560         if (ret < 0)
9561                 goto out;
9562         leaf = path->nodes[0];
9563
9564         while (1) {
9565                 struct btrfs_key found_key;
9566
9567                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9568                         int no_more_keys = find_next_key(path, &key);
9569
9570                         btrfs_release_path(path);
9571                         if (trans) {
9572                                 ret = btrfs_commit_transaction(trans,
9573                                                                info->tree_root);
9574                                 trans = NULL;
9575                                 if (ret < 0)
9576                                         goto out;
9577                         }
9578                         need_trans = 0;
9579                         if (no_more_keys)
9580                                 break;
9581                         goto again;
9582                 }
9583
9584                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9585
9586                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9587                         goto next;
9588                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9589                         goto next;
9590
9591                 ret = maybe_repair_root_item(info, path, &found_key,
9592                                              trans ? 0 : 1);
9593                 if (ret < 0)
9594                         goto out;
9595                 if (ret) {
9596                         if (!trans && repair) {
9597                                 need_trans = 1;
9598                                 key = found_key;
9599                                 btrfs_release_path(path);
9600                                 goto again;
9601                         }
9602                         bad_roots++;
9603                 }
9604 next:
9605                 path->slots[0]++;
9606         }
9607         ret = 0;
9608 out:
9609         free_roots_info_cache();
9610         btrfs_free_path(path);
9611         if (trans)
9612                 btrfs_commit_transaction(trans, info->tree_root);
9613         if (ret < 0)
9614                 return ret;
9615
9616         return bad_roots;
9617 }
9618
9619 const char * const cmd_check_usage[] = {
9620         "btrfs check [options] <device>",
9621         "Check structural integrity of a filesystem (unmounted).",
9622         "Check structural integrity of an unmounted filesystem. Verify internal",
9623         "trees' consistency and item connectivity. In the repair mode try to",
9624         "fix the problems found.",
9625         "WARNING: the repair mode is considered dangerous",
9626         "",
9627         "-s|--super <superblock>     use this superblock copy",
9628         "-b|--backup                 use the first valid backup root copy",
9629         "--repair                    try to repair the filesystem",
9630         "--readonly                  run in read-only mode (default)",
9631         "--init-csum-tree            create a new CRC tree",
9632         "--init-extent-tree          create a new extent tree",
9633         "--check-data-csum           verify checksums of data blocks",
9634         "-Q|--qgroup-report           print a report on qgroup consistency",
9635         "-E|--subvol-extents <subvolid>",
9636         "                            print subvolume extents and sharing state",
9637         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
9638         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
9639         "-p|--progress               indicate progress",
9640         NULL
9641 };
9642
9643 int cmd_check(int argc, char **argv)
9644 {
9645         struct cache_tree root_cache;
9646         struct btrfs_root *root;
9647         struct btrfs_fs_info *info;
9648         u64 bytenr = 0;
9649         u64 subvolid = 0;
9650         u64 tree_root_bytenr = 0;
9651         u64 chunk_root_bytenr = 0;
9652         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9653         int ret;
9654         u64 num;
9655         int init_csum_tree = 0;
9656         int readonly = 0;
9657         int qgroup_report = 0;
9658         int qgroups_repaired = 0;
9659         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9660
9661         while(1) {
9662                 int c;
9663                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
9664                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
9665                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE };
9666                 static const struct option long_options[] = {
9667                         { "super", required_argument, NULL, 's' },
9668                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
9669                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
9670                         { "init-csum-tree", no_argument, NULL,
9671                                 GETOPT_VAL_INIT_CSUM },
9672                         { "init-extent-tree", no_argument, NULL,
9673                                 GETOPT_VAL_INIT_EXTENT },
9674                         { "check-data-csum", no_argument, NULL,
9675                                 GETOPT_VAL_CHECK_CSUM },
9676                         { "backup", no_argument, NULL, 'b' },
9677                         { "subvol-extents", required_argument, NULL, 'E' },
9678                         { "qgroup-report", no_argument, NULL, 'Q' },
9679                         { "tree-root", required_argument, NULL, 'r' },
9680                         { "chunk-root", required_argument, NULL,
9681                                 GETOPT_VAL_CHUNK_TREE },
9682                         { "progress", no_argument, NULL, 'p' },
9683                         { NULL, 0, NULL, 0}
9684                 };
9685
9686                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9687                 if (c < 0)
9688                         break;
9689                 switch(c) {
9690                         case 'a': /* ignored */ break;
9691                         case 'b':
9692                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9693                                 break;
9694                         case 's':
9695                                 num = arg_strtou64(optarg);
9696                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9697                                         fprintf(stderr,
9698                                                 "ERROR: super mirror should be less than: %d\n",
9699                                                 BTRFS_SUPER_MIRROR_MAX);
9700                                         exit(1);
9701                                 }
9702                                 bytenr = btrfs_sb_offset(((int)num));
9703                                 printf("using SB copy %llu, bytenr %llu\n", num,
9704                                        (unsigned long long)bytenr);
9705                                 break;
9706                         case 'Q':
9707                                 qgroup_report = 1;
9708                                 break;
9709                         case 'E':
9710                                 subvolid = arg_strtou64(optarg);
9711                                 break;
9712                         case 'r':
9713                                 tree_root_bytenr = arg_strtou64(optarg);
9714                                 break;
9715                         case GETOPT_VAL_CHUNK_TREE:
9716                                 chunk_root_bytenr = arg_strtou64(optarg);
9717                                 break;
9718                         case 'p':
9719                                 ctx.progress_enabled = true;
9720                                 break;
9721                         case '?':
9722                         case 'h':
9723                                 usage(cmd_check_usage);
9724                         case GETOPT_VAL_REPAIR:
9725                                 printf("enabling repair mode\n");
9726                                 repair = 1;
9727                                 ctree_flags |= OPEN_CTREE_WRITES;
9728                                 break;
9729                         case GETOPT_VAL_READONLY:
9730                                 readonly = 1;
9731                                 break;
9732                         case GETOPT_VAL_INIT_CSUM:
9733                                 printf("Creating a new CRC tree\n");
9734                                 init_csum_tree = 1;
9735                                 repair = 1;
9736                                 ctree_flags |= OPEN_CTREE_WRITES;
9737                                 break;
9738                         case GETOPT_VAL_INIT_EXTENT:
9739                                 init_extent_tree = 1;
9740                                 ctree_flags |= (OPEN_CTREE_WRITES |
9741                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9742                                 repair = 1;
9743                                 break;
9744                         case GETOPT_VAL_CHECK_CSUM:
9745                                 check_data_csum = 1;
9746                                 break;
9747                 }
9748         }
9749
9750         if (check_argc_exact(argc - optind, 1))
9751                 usage(cmd_check_usage);
9752
9753         if (ctx.progress_enabled) {
9754                 ctx.tp = TASK_NOTHING;
9755                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9756         }
9757
9758         /* This check is the only reason for --readonly to exist */
9759         if (readonly && repair) {
9760                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9761                 exit(1);
9762         }
9763
9764         radix_tree_init();
9765         cache_tree_init(&root_cache);
9766
9767         if((ret = check_mounted(argv[optind])) < 0) {
9768                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9769                 goto err_out;
9770         } else if(ret) {
9771                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9772                 ret = -EBUSY;
9773                 goto err_out;
9774         }
9775
9776         /* only allow partial opening under repair mode */
9777         if (repair)
9778                 ctree_flags |= OPEN_CTREE_PARTIAL;
9779
9780         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9781                                   chunk_root_bytenr, ctree_flags);
9782         if (!info) {
9783                 fprintf(stderr, "Couldn't open file system\n");
9784                 ret = -EIO;
9785                 goto err_out;
9786         }
9787
9788         global_info = info;
9789         root = info->fs_root;
9790
9791         /*
9792          * repair mode will force us to commit transaction which
9793          * will make us fail to load log tree when mounting.
9794          */
9795         if (repair && btrfs_super_log_root(info->super_copy)) {
9796                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9797                 if (!ret) {
9798                         ret = 1;
9799                         goto close_out;
9800                 }
9801                 ret = zero_log_tree(root);
9802                 if (ret) {
9803                         fprintf(stderr, "fail to zero log tree\n");
9804                         goto close_out;
9805                 }
9806         }
9807
9808         uuid_unparse(info->super_copy->fsid, uuidbuf);
9809         if (qgroup_report) {
9810                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9811                        uuidbuf);
9812                 ret = qgroup_verify_all(info);
9813                 if (ret == 0)
9814                         report_qgroups(1);
9815                 goto close_out;
9816         }
9817         if (subvolid) {
9818                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9819                        subvolid, argv[optind], uuidbuf);
9820                 ret = print_extent_state(info, subvolid);
9821                 goto close_out;
9822         }
9823         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9824
9825         if (!extent_buffer_uptodate(info->tree_root->node) ||
9826             !extent_buffer_uptodate(info->dev_root->node) ||
9827             !extent_buffer_uptodate(info->chunk_root->node)) {
9828                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9829                 ret = -EIO;
9830                 goto close_out;
9831         }
9832
9833         if (init_extent_tree || init_csum_tree) {
9834                 struct btrfs_trans_handle *trans;
9835
9836                 trans = btrfs_start_transaction(info->extent_root, 0);
9837                 if (IS_ERR(trans)) {
9838                         fprintf(stderr, "Error starting transaction\n");
9839                         ret = PTR_ERR(trans);
9840                         goto close_out;
9841                 }
9842
9843                 if (init_extent_tree) {
9844                         printf("Creating a new extent tree\n");
9845                         ret = reinit_extent_tree(trans, info);
9846                         if (ret)
9847                                 goto close_out;
9848                 }
9849
9850                 if (init_csum_tree) {
9851                         fprintf(stderr, "Reinit crc root\n");
9852                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9853                         if (ret) {
9854                                 fprintf(stderr, "crc root initialization failed\n");
9855                                 ret = -EIO;
9856                                 goto close_out;
9857                         }
9858
9859                         ret = fill_csum_tree(trans, info->csum_root,
9860                                              init_extent_tree);
9861                         if (ret) {
9862                                 fprintf(stderr, "crc refilling failed\n");
9863                                 return -EIO;
9864                         }
9865                 }
9866                 /*
9867                  * Ok now we commit and run the normal fsck, which will add
9868                  * extent entries for all of the items it finds.
9869                  */
9870                 ret = btrfs_commit_transaction(trans, info->extent_root);
9871                 if (ret)
9872                         goto close_out;
9873         }
9874         if (!extent_buffer_uptodate(info->extent_root->node)) {
9875                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9876                 ret = -EIO;
9877                 goto close_out;
9878         }
9879         if (!extent_buffer_uptodate(info->csum_root->node)) {
9880                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9881                 ret = -EIO;
9882                 goto close_out;
9883         }
9884
9885         if (!ctx.progress_enabled)
9886                 fprintf(stderr, "checking extents\n");
9887         ret = check_chunks_and_extents(root);
9888         if (ret)
9889                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9890
9891         ret = repair_root_items(info);
9892         if (ret < 0)
9893                 goto close_out;
9894         if (repair) {
9895                 fprintf(stderr, "Fixed %d roots.\n", ret);
9896                 ret = 0;
9897         } else if (ret > 0) {
9898                 fprintf(stderr,
9899                        "Found %d roots with an outdated root item.\n",
9900                        ret);
9901                 fprintf(stderr,
9902                         "Please run a filesystem check with the option --repair to fix them.\n");
9903                 ret = 1;
9904                 goto close_out;
9905         }
9906
9907         if (!ctx.progress_enabled) {
9908                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
9909                         fprintf(stderr, "checking free space tree\n");
9910                 else
9911                         fprintf(stderr, "checking free space cache\n");
9912         }
9913         ret = check_space_cache(root);
9914         if (ret)
9915                 goto out;
9916
9917         /*
9918          * We used to have to have these hole extents in between our real
9919          * extents so if we don't have this flag set we need to make sure there
9920          * are no gaps in the file extents for inodes, otherwise we can just
9921          * ignore it when this happens.
9922          */
9923         no_holes = btrfs_fs_incompat(root->fs_info,
9924                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9925         if (!ctx.progress_enabled)
9926                 fprintf(stderr, "checking fs roots\n");
9927         ret = check_fs_roots(root, &root_cache);
9928         if (ret)
9929                 goto out;
9930
9931         fprintf(stderr, "checking csums\n");
9932         ret = check_csums(root);
9933         if (ret)
9934                 goto out;
9935
9936         fprintf(stderr, "checking root refs\n");
9937         ret = check_root_refs(root, &root_cache);
9938         if (ret)
9939                 goto out;
9940
9941         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9942                 struct extent_buffer *eb;
9943
9944                 eb = list_first_entry(&root->fs_info->recow_ebs,
9945                                       struct extent_buffer, recow);
9946                 list_del_init(&eb->recow);
9947                 ret = recow_extent_buffer(root, eb);
9948                 if (ret)
9949                         break;
9950         }
9951
9952         while (!list_empty(&delete_items)) {
9953                 struct bad_item *bad;
9954
9955                 bad = list_first_entry(&delete_items, struct bad_item, list);
9956                 list_del_init(&bad->list);
9957                 if (repair)
9958                         ret = delete_bad_item(root, bad);
9959                 free(bad);
9960         }
9961
9962         if (info->quota_enabled) {
9963                 int err;
9964                 fprintf(stderr, "checking quota groups\n");
9965                 err = qgroup_verify_all(info);
9966                 if (err)
9967                         goto out;
9968                 report_qgroups(0);
9969                 err = repair_qgroups(info, &qgroups_repaired);
9970                 if (err)
9971                         goto out;
9972         }
9973
9974         if (!list_empty(&root->fs_info->recow_ebs)) {
9975                 fprintf(stderr, "Transid errors in file system\n");
9976                 ret = 1;
9977         }
9978 out:
9979         /* Don't override original ret */
9980         if (!ret && qgroups_repaired)
9981                 ret = qgroups_repaired;
9982
9983         if (found_old_backref) { /*
9984                  * there was a disk format change when mixed
9985                  * backref was in testing tree. The old format
9986                  * existed about one week.
9987                  */
9988                 printf("\n * Found old mixed backref format. "
9989                        "The old format is not supported! *"
9990                        "\n * Please mount the FS in readonly mode, "
9991                        "backup data and re-format the FS. *\n\n");
9992                 ret = 1;
9993         }
9994         printf("found %llu bytes used err is %d\n",
9995                (unsigned long long)bytes_used, ret);
9996         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9997         printf("total tree bytes: %llu\n",
9998                (unsigned long long)total_btree_bytes);
9999         printf("total fs tree bytes: %llu\n",
10000                (unsigned long long)total_fs_tree_bytes);
10001         printf("total extent tree bytes: %llu\n",
10002                (unsigned long long)total_extent_tree_bytes);
10003         printf("btree space waste bytes: %llu\n",
10004                (unsigned long long)btree_space_waste);
10005         printf("file data blocks allocated: %llu\n referenced %llu\n",
10006                 (unsigned long long)data_bytes_allocated,
10007                 (unsigned long long)data_bytes_referenced);
10008
10009         free_qgroup_counts();
10010         free_root_recs_tree(&root_cache);
10011 close_out:
10012         close_ctree(root);
10013 err_out:
10014         if (ctx.progress_enabled)
10015                 task_deinit(ctx.info);
10016
10017         return ret;
10018 }