394f838e711d61013bc02e812a65f328dcb01d64
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
76
77 struct extent_backref {
78         struct rb_node node;
79         unsigned int is_data:1;
80         unsigned int found_extent_tree:1;
81         unsigned int full_backref:1;
82         unsigned int found_ref:1;
83         unsigned int broken:1;
84 };
85
86 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
87 {
88         return rb_entry(node, struct extent_backref, node);
89 }
90
91 struct data_backref {
92         struct extent_backref node;
93         union {
94                 u64 parent;
95                 u64 root;
96         };
97         u64 owner;
98         u64 offset;
99         u64 disk_bytenr;
100         u64 bytes;
101         u64 ram_bytes;
102         u32 num_refs;
103         u32 found_ref;
104 };
105
106 static inline struct data_backref* to_data_backref(struct extent_backref *back)
107 {
108         return container_of(back, struct data_backref, node);
109 }
110
111 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
112 {
113         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
114         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
115         struct data_backref *back1 = to_data_backref(ext1);
116         struct data_backref *back2 = to_data_backref(ext2);
117
118         WARN_ON(!ext1->is_data);
119         WARN_ON(!ext2->is_data);
120
121         /* parent and root are a union, so this covers both */
122         if (back1->parent > back2->parent)
123                 return 1;
124         if (back1->parent < back2->parent)
125                 return -1;
126
127         /* This is a full backref and the parents match. */
128         if (back1->node.full_backref)
129                 return 0;
130
131         if (back1->owner > back2->owner)
132                 return 1;
133         if (back1->owner < back2->owner)
134                 return -1;
135
136         if (back1->offset > back2->offset)
137                 return 1;
138         if (back1->offset < back2->offset)
139                 return -1;
140
141         if (back1->bytes > back2->bytes)
142                 return 1;
143         if (back1->bytes < back2->bytes)
144                 return -1;
145
146         if (back1->found_ref && back2->found_ref) {
147                 if (back1->disk_bytenr > back2->disk_bytenr)
148                         return 1;
149                 if (back1->disk_bytenr < back2->disk_bytenr)
150                         return -1;
151
152                 if (back1->found_ref > back2->found_ref)
153                         return 1;
154                 if (back1->found_ref < back2->found_ref)
155                         return -1;
156         }
157
158         return 0;
159 }
160
161 /*
162  * Much like data_backref, just removed the undetermined members
163  * and change it to use list_head.
164  * During extent scan, it is stored in root->orphan_data_extent.
165  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
166  */
167 struct orphan_data_extent {
168         struct list_head list;
169         u64 root;
170         u64 objectid;
171         u64 offset;
172         u64 disk_bytenr;
173         u64 disk_len;
174 };
175
176 struct tree_backref {
177         struct extent_backref node;
178         union {
179                 u64 parent;
180                 u64 root;
181         };
182 };
183
184 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
185 {
186         return container_of(back, struct tree_backref, node);
187 }
188
189 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
190 {
191         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
192         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
193         struct tree_backref *back1 = to_tree_backref(ext1);
194         struct tree_backref *back2 = to_tree_backref(ext2);
195
196         WARN_ON(ext1->is_data);
197         WARN_ON(ext2->is_data);
198
199         /* parent and root are a union, so this covers both */
200         if (back1->parent > back2->parent)
201                 return 1;
202         if (back1->parent < back2->parent)
203                 return -1;
204
205         return 0;
206 }
207
208 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
209 {
210         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
211         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
212
213         if (ext1->is_data > ext2->is_data)
214                 return 1;
215
216         if (ext1->is_data < ext2->is_data)
217                 return -1;
218
219         if (ext1->full_backref > ext2->full_backref)
220                 return 1;
221         if (ext1->full_backref < ext2->full_backref)
222                 return -1;
223
224         if (ext1->is_data)
225                 return compare_data_backref(node1, node2);
226         else
227                 return compare_tree_backref(node1, node2);
228 }
229
230 /* Explicit initialization for extent_record::flag_block_full_backref */
231 enum { FLAG_UNSET = 2 };
232
233 struct extent_record {
234         struct list_head backrefs;
235         struct list_head dups;
236         struct rb_root backref_tree;
237         struct list_head list;
238         struct cache_extent cache;
239         struct btrfs_disk_key parent_key;
240         u64 start;
241         u64 max_size;
242         u64 nr;
243         u64 refs;
244         u64 extent_item_refs;
245         u64 generation;
246         u64 parent_generation;
247         u64 info_objectid;
248         u32 num_duplicates;
249         u8 info_level;
250         unsigned int flag_block_full_backref:2;
251         unsigned int found_rec:1;
252         unsigned int content_checked:1;
253         unsigned int owner_ref_checked:1;
254         unsigned int is_root:1;
255         unsigned int metadata:1;
256         unsigned int bad_full_backref:1;
257         unsigned int crossing_stripes:1;
258         unsigned int wrong_chunk_type:1;
259 };
260
261 static inline struct extent_record* to_extent_record(struct list_head *entry)
262 {
263         return container_of(entry, struct extent_record, list);
264 }
265
266 struct inode_backref {
267         struct list_head list;
268         unsigned int found_dir_item:1;
269         unsigned int found_dir_index:1;
270         unsigned int found_inode_ref:1;
271         unsigned int filetype:8;
272         int errors;
273         unsigned int ref_type;
274         u64 dir;
275         u64 index;
276         u16 namelen;
277         char name[0];
278 };
279
280 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
281 {
282         return list_entry(entry, struct inode_backref, list);
283 }
284
285 struct root_item_record {
286         struct list_head list;
287         u64 objectid;
288         u64 bytenr;
289         u64 last_snapshot;
290         u8 level;
291         u8 drop_level;
292         int level_size;
293         struct btrfs_key drop_key;
294 };
295
296 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
297 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
298 #define REF_ERR_NO_INODE_REF            (1 << 2)
299 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
300 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
301 #define REF_ERR_DUP_INODE_REF           (1 << 5)
302 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
303 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
304 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
305 #define REF_ERR_NO_ROOT_REF             (1 << 9)
306 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
307 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
308 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
309
310 struct file_extent_hole {
311         struct rb_node node;
312         u64 start;
313         u64 len;
314 };
315
316 struct inode_record {
317         struct list_head backrefs;
318         unsigned int checked:1;
319         unsigned int merging:1;
320         unsigned int found_inode_item:1;
321         unsigned int found_dir_item:1;
322         unsigned int found_file_extent:1;
323         unsigned int found_csum_item:1;
324         unsigned int some_csum_missing:1;
325         unsigned int nodatasum:1;
326         int errors;
327
328         u64 ino;
329         u32 nlink;
330         u32 imode;
331         u64 isize;
332         u64 nbytes;
333
334         u32 found_link;
335         u64 found_size;
336         u64 extent_start;
337         u64 extent_end;
338         struct rb_root holes;
339         struct list_head orphan_extents;
340
341         u32 refs;
342 };
343
344 #define I_ERR_NO_INODE_ITEM             (1 << 0)
345 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
346 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
347 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
348 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
349 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
350 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
351 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
352 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
353 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
354 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
355 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
356 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
357 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
358 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
359
360 struct root_backref {
361         struct list_head list;
362         unsigned int found_dir_item:1;
363         unsigned int found_dir_index:1;
364         unsigned int found_back_ref:1;
365         unsigned int found_forward_ref:1;
366         unsigned int reachable:1;
367         int errors;
368         u64 ref_root;
369         u64 dir;
370         u64 index;
371         u16 namelen;
372         char name[0];
373 };
374
375 static inline struct root_backref* to_root_backref(struct list_head *entry)
376 {
377         return list_entry(entry, struct root_backref, list);
378 }
379
380 struct root_record {
381         struct list_head backrefs;
382         struct cache_extent cache;
383         unsigned int found_root_item:1;
384         u64 objectid;
385         u32 found_ref;
386 };
387
388 struct ptr_node {
389         struct cache_extent cache;
390         void *data;
391 };
392
393 struct shared_node {
394         struct cache_extent cache;
395         struct cache_tree root_cache;
396         struct cache_tree inode_cache;
397         struct inode_record *current;
398         u32 refs;
399 };
400
401 struct block_info {
402         u64 start;
403         u32 size;
404 };
405
406 struct walk_control {
407         struct cache_tree shared;
408         struct shared_node *nodes[BTRFS_MAX_LEVEL];
409         int active_node;
410         int root_level;
411 };
412
413 struct bad_item {
414         struct btrfs_key key;
415         u64 root_id;
416         struct list_head list;
417 };
418
419 struct extent_entry {
420         u64 bytenr;
421         u64 bytes;
422         int count;
423         int broken;
424         struct list_head list;
425 };
426
427 struct root_item_info {
428         /* level of the root */
429         u8 level;
430         /* number of nodes at this level, must be 1 for a root */
431         int node_count;
432         u64 bytenr;
433         u64 gen;
434         struct cache_extent cache_extent;
435 };
436
437 static void *print_status_check(void *p)
438 {
439         struct task_ctx *priv = p;
440         const char work_indicator[] = { '.', 'o', 'O', 'o' };
441         uint32_t count = 0;
442         static char *task_position_string[] = {
443                 "checking extents",
444                 "checking free space cache",
445                 "checking fs roots",
446         };
447
448         task_period_start(priv->info, 1000 /* 1s */);
449
450         if (priv->tp == TASK_NOTHING)
451                 return NULL;
452
453         while (1) {
454                 printf("%s [%c]\r", task_position_string[priv->tp],
455                                 work_indicator[count % 4]);
456                 count++;
457                 fflush(stdout);
458                 task_period_wait(priv->info);
459         }
460         return NULL;
461 }
462
463 static int print_status_return(void *p)
464 {
465         printf("\n");
466         fflush(stdout);
467
468         return 0;
469 }
470
471 /* Compatible function to allow reuse of old codes */
472 static u64 first_extent_gap(struct rb_root *holes)
473 {
474         struct file_extent_hole *hole;
475
476         if (RB_EMPTY_ROOT(holes))
477                 return (u64)-1;
478
479         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
480         return hole->start;
481 }
482
483 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
484 {
485         struct file_extent_hole *hole1;
486         struct file_extent_hole *hole2;
487
488         hole1 = rb_entry(node1, struct file_extent_hole, node);
489         hole2 = rb_entry(node2, struct file_extent_hole, node);
490
491         if (hole1->start > hole2->start)
492                 return -1;
493         if (hole1->start < hole2->start)
494                 return 1;
495         /* Now hole1->start == hole2->start */
496         if (hole1->len >= hole2->len)
497                 /*
498                  * Hole 1 will be merge center
499                  * Same hole will be merged later
500                  */
501                 return -1;
502         /* Hole 2 will be merge center */
503         return 1;
504 }
505
506 /*
507  * Add a hole to the record
508  *
509  * This will do hole merge for copy_file_extent_holes(),
510  * which will ensure there won't be continuous holes.
511  */
512 static int add_file_extent_hole(struct rb_root *holes,
513                                 u64 start, u64 len)
514 {
515         struct file_extent_hole *hole;
516         struct file_extent_hole *prev = NULL;
517         struct file_extent_hole *next = NULL;
518
519         hole = malloc(sizeof(*hole));
520         if (!hole)
521                 return -ENOMEM;
522         hole->start = start;
523         hole->len = len;
524         /* Since compare will not return 0, no -EEXIST will happen */
525         rb_insert(holes, &hole->node, compare_hole);
526
527         /* simple merge with previous hole */
528         if (rb_prev(&hole->node))
529                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
530                                 node);
531         if (prev && prev->start + prev->len >= hole->start) {
532                 hole->len = hole->start + hole->len - prev->start;
533                 hole->start = prev->start;
534                 rb_erase(&prev->node, holes);
535                 free(prev);
536                 prev = NULL;
537         }
538
539         /* iterate merge with next holes */
540         while (1) {
541                 if (!rb_next(&hole->node))
542                         break;
543                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
544                                         node);
545                 if (hole->start + hole->len >= next->start) {
546                         if (hole->start + hole->len <= next->start + next->len)
547                                 hole->len = next->start + next->len -
548                                             hole->start;
549                         rb_erase(&next->node, holes);
550                         free(next);
551                         next = NULL;
552                 } else
553                         break;
554         }
555         return 0;
556 }
557
558 static int compare_hole_range(struct rb_node *node, void *data)
559 {
560         struct file_extent_hole *hole;
561         u64 start;
562
563         hole = (struct file_extent_hole *)data;
564         start = hole->start;
565
566         hole = rb_entry(node, struct file_extent_hole, node);
567         if (start < hole->start)
568                 return -1;
569         if (start >= hole->start && start < hole->start + hole->len)
570                 return 0;
571         return 1;
572 }
573
574 /*
575  * Delete a hole in the record
576  *
577  * This will do the hole split and is much restrict than add.
578  */
579 static int del_file_extent_hole(struct rb_root *holes,
580                                 u64 start, u64 len)
581 {
582         struct file_extent_hole *hole;
583         struct file_extent_hole tmp;
584         u64 prev_start = 0;
585         u64 prev_len = 0;
586         u64 next_start = 0;
587         u64 next_len = 0;
588         struct rb_node *node;
589         int have_prev = 0;
590         int have_next = 0;
591         int ret = 0;
592
593         tmp.start = start;
594         tmp.len = len;
595         node = rb_search(holes, &tmp, compare_hole_range, NULL);
596         if (!node)
597                 return -EEXIST;
598         hole = rb_entry(node, struct file_extent_hole, node);
599         if (start + len > hole->start + hole->len)
600                 return -EEXIST;
601
602         /*
603          * Now there will be no overlap, delete the hole and re-add the
604          * split(s) if they exists.
605          */
606         if (start > hole->start) {
607                 prev_start = hole->start;
608                 prev_len = start - hole->start;
609                 have_prev = 1;
610         }
611         if (hole->start + hole->len > start + len) {
612                 next_start = start + len;
613                 next_len = hole->start + hole->len - start - len;
614                 have_next = 1;
615         }
616         rb_erase(node, holes);
617         free(hole);
618         if (have_prev) {
619                 ret = add_file_extent_hole(holes, prev_start, prev_len);
620                 if (ret < 0)
621                         return ret;
622         }
623         if (have_next) {
624                 ret = add_file_extent_hole(holes, next_start, next_len);
625                 if (ret < 0)
626                         return ret;
627         }
628         return 0;
629 }
630
631 static int copy_file_extent_holes(struct rb_root *dst,
632                                   struct rb_root *src)
633 {
634         struct file_extent_hole *hole;
635         struct rb_node *node;
636         int ret = 0;
637
638         node = rb_first(src);
639         while (node) {
640                 hole = rb_entry(node, struct file_extent_hole, node);
641                 ret = add_file_extent_hole(dst, hole->start, hole->len);
642                 if (ret)
643                         break;
644                 node = rb_next(node);
645         }
646         return ret;
647 }
648
649 static void free_file_extent_holes(struct rb_root *holes)
650 {
651         struct rb_node *node;
652         struct file_extent_hole *hole;
653
654         node = rb_first(holes);
655         while (node) {
656                 hole = rb_entry(node, struct file_extent_hole, node);
657                 rb_erase(node, holes);
658                 free(hole);
659                 node = rb_first(holes);
660         }
661 }
662
663 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
664
665 static void record_root_in_trans(struct btrfs_trans_handle *trans,
666                                  struct btrfs_root *root)
667 {
668         if (root->last_trans != trans->transid) {
669                 root->track_dirty = 1;
670                 root->last_trans = trans->transid;
671                 root->commit_root = root->node;
672                 extent_buffer_get(root->node);
673         }
674 }
675
676 static u8 imode_to_type(u32 imode)
677 {
678 #define S_SHIFT 12
679         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
680                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
681                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
682                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
683                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
684                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
685                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
686                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
687         };
688
689         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
690 #undef S_SHIFT
691 }
692
693 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
694 {
695         struct device_record *rec1;
696         struct device_record *rec2;
697
698         rec1 = rb_entry(node1, struct device_record, node);
699         rec2 = rb_entry(node2, struct device_record, node);
700         if (rec1->devid > rec2->devid)
701                 return -1;
702         else if (rec1->devid < rec2->devid)
703                 return 1;
704         else
705                 return 0;
706 }
707
708 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
709 {
710         struct inode_record *rec;
711         struct inode_backref *backref;
712         struct inode_backref *orig;
713         struct inode_backref *tmp;
714         struct orphan_data_extent *src_orphan;
715         struct orphan_data_extent *dst_orphan;
716         size_t size;
717         int ret;
718
719         rec = malloc(sizeof(*rec));
720         if (!rec)
721                 return ERR_PTR(-ENOMEM);
722         memcpy(rec, orig_rec, sizeof(*rec));
723         rec->refs = 1;
724         INIT_LIST_HEAD(&rec->backrefs);
725         INIT_LIST_HEAD(&rec->orphan_extents);
726         rec->holes = RB_ROOT;
727
728         list_for_each_entry(orig, &orig_rec->backrefs, list) {
729                 size = sizeof(*orig) + orig->namelen + 1;
730                 backref = malloc(size);
731                 if (!backref) {
732                         ret = -ENOMEM;
733                         goto cleanup;
734                 }
735                 memcpy(backref, orig, size);
736                 list_add_tail(&backref->list, &rec->backrefs);
737         }
738         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
739                 dst_orphan = malloc(sizeof(*dst_orphan));
740                 if (!dst_orphan) {
741                         ret = -ENOMEM;
742                         goto cleanup;
743                 }
744                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
745                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
746         }
747         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
748         BUG_ON(ret < 0);
749
750         return rec;
751
752 cleanup:
753         if (!list_empty(&rec->backrefs))
754                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
755                         list_del(&orig->list);
756                         free(orig);
757                 }
758
759         if (!list_empty(&rec->orphan_extents))
760                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
761                         list_del(&orig->list);
762                         free(orig);
763                 }
764
765         free(rec);
766
767         return ERR_PTR(ret);
768 }
769
770 static void print_orphan_data_extents(struct list_head *orphan_extents,
771                                       u64 objectid)
772 {
773         struct orphan_data_extent *orphan;
774
775         if (list_empty(orphan_extents))
776                 return;
777         printf("The following data extent is lost in tree %llu:\n",
778                objectid);
779         list_for_each_entry(orphan, orphan_extents, list) {
780                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
781                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
782                        orphan->disk_len);
783         }
784 }
785
786 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
787 {
788         u64 root_objectid = root->root_key.objectid;
789         int errors = rec->errors;
790
791         if (!errors)
792                 return;
793         /* reloc root errors, we print its corresponding fs root objectid*/
794         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
795                 root_objectid = root->root_key.offset;
796                 fprintf(stderr, "reloc");
797         }
798         fprintf(stderr, "root %llu inode %llu errors %x",
799                 (unsigned long long) root_objectid,
800                 (unsigned long long) rec->ino, rec->errors);
801
802         if (errors & I_ERR_NO_INODE_ITEM)
803                 fprintf(stderr, ", no inode item");
804         if (errors & I_ERR_NO_ORPHAN_ITEM)
805                 fprintf(stderr, ", no orphan item");
806         if (errors & I_ERR_DUP_INODE_ITEM)
807                 fprintf(stderr, ", dup inode item");
808         if (errors & I_ERR_DUP_DIR_INDEX)
809                 fprintf(stderr, ", dup dir index");
810         if (errors & I_ERR_ODD_DIR_ITEM)
811                 fprintf(stderr, ", odd dir item");
812         if (errors & I_ERR_ODD_FILE_EXTENT)
813                 fprintf(stderr, ", odd file extent");
814         if (errors & I_ERR_BAD_FILE_EXTENT)
815                 fprintf(stderr, ", bad file extent");
816         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
817                 fprintf(stderr, ", file extent overlap");
818         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
819                 fprintf(stderr, ", file extent discount");
820         if (errors & I_ERR_DIR_ISIZE_WRONG)
821                 fprintf(stderr, ", dir isize wrong");
822         if (errors & I_ERR_FILE_NBYTES_WRONG)
823                 fprintf(stderr, ", nbytes wrong");
824         if (errors & I_ERR_ODD_CSUM_ITEM)
825                 fprintf(stderr, ", odd csum item");
826         if (errors & I_ERR_SOME_CSUM_MISSING)
827                 fprintf(stderr, ", some csum missing");
828         if (errors & I_ERR_LINK_COUNT_WRONG)
829                 fprintf(stderr, ", link count wrong");
830         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
831                 fprintf(stderr, ", orphan file extent");
832         fprintf(stderr, "\n");
833         /* Print the orphan extents if needed */
834         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
835                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
836
837         /* Print the holes if needed */
838         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
839                 struct file_extent_hole *hole;
840                 struct rb_node *node;
841                 int found = 0;
842
843                 node = rb_first(&rec->holes);
844                 fprintf(stderr, "Found file extent holes:\n");
845                 while (node) {
846                         found = 1;
847                         hole = rb_entry(node, struct file_extent_hole, node);
848                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
849                                 hole->start, hole->len);
850                         node = rb_next(node);
851                 }
852                 if (!found)
853                         fprintf(stderr, "\tstart: 0, len: %llu\n",
854                                 round_up(rec->isize, root->sectorsize));
855         }
856 }
857
858 static void print_ref_error(int errors)
859 {
860         if (errors & REF_ERR_NO_DIR_ITEM)
861                 fprintf(stderr, ", no dir item");
862         if (errors & REF_ERR_NO_DIR_INDEX)
863                 fprintf(stderr, ", no dir index");
864         if (errors & REF_ERR_NO_INODE_REF)
865                 fprintf(stderr, ", no inode ref");
866         if (errors & REF_ERR_DUP_DIR_ITEM)
867                 fprintf(stderr, ", dup dir item");
868         if (errors & REF_ERR_DUP_DIR_INDEX)
869                 fprintf(stderr, ", dup dir index");
870         if (errors & REF_ERR_DUP_INODE_REF)
871                 fprintf(stderr, ", dup inode ref");
872         if (errors & REF_ERR_INDEX_UNMATCH)
873                 fprintf(stderr, ", index mismatch");
874         if (errors & REF_ERR_FILETYPE_UNMATCH)
875                 fprintf(stderr, ", filetype mismatch");
876         if (errors & REF_ERR_NAME_TOO_LONG)
877                 fprintf(stderr, ", name too long");
878         if (errors & REF_ERR_NO_ROOT_REF)
879                 fprintf(stderr, ", no root ref");
880         if (errors & REF_ERR_NO_ROOT_BACKREF)
881                 fprintf(stderr, ", no root backref");
882         if (errors & REF_ERR_DUP_ROOT_REF)
883                 fprintf(stderr, ", dup root ref");
884         if (errors & REF_ERR_DUP_ROOT_BACKREF)
885                 fprintf(stderr, ", dup root backref");
886         fprintf(stderr, "\n");
887 }
888
889 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
890                                           u64 ino, int mod)
891 {
892         struct ptr_node *node;
893         struct cache_extent *cache;
894         struct inode_record *rec = NULL;
895         int ret;
896
897         cache = lookup_cache_extent(inode_cache, ino, 1);
898         if (cache) {
899                 node = container_of(cache, struct ptr_node, cache);
900                 rec = node->data;
901                 if (mod && rec->refs > 1) {
902                         node->data = clone_inode_rec(rec);
903                         if (IS_ERR(node->data))
904                                 return node->data;
905                         rec->refs--;
906                         rec = node->data;
907                 }
908         } else if (mod) {
909                 rec = calloc(1, sizeof(*rec));
910                 if (!rec)
911                         return ERR_PTR(-ENOMEM);
912                 rec->ino = ino;
913                 rec->extent_start = (u64)-1;
914                 rec->refs = 1;
915                 INIT_LIST_HEAD(&rec->backrefs);
916                 INIT_LIST_HEAD(&rec->orphan_extents);
917                 rec->holes = RB_ROOT;
918
919                 node = malloc(sizeof(*node));
920                 if (!node) {
921                         free(rec);
922                         return ERR_PTR(-ENOMEM);
923                 }
924                 node->cache.start = ino;
925                 node->cache.size = 1;
926                 node->data = rec;
927
928                 if (ino == BTRFS_FREE_INO_OBJECTID)
929                         rec->found_link = 1;
930
931                 ret = insert_cache_extent(inode_cache, &node->cache);
932                 if (ret)
933                         return ERR_PTR(-EEXIST);
934         }
935         return rec;
936 }
937
938 static void free_orphan_data_extents(struct list_head *orphan_extents)
939 {
940         struct orphan_data_extent *orphan;
941
942         while (!list_empty(orphan_extents)) {
943                 orphan = list_entry(orphan_extents->next,
944                                     struct orphan_data_extent, list);
945                 list_del(&orphan->list);
946                 free(orphan);
947         }
948 }
949
950 static void free_inode_rec(struct inode_record *rec)
951 {
952         struct inode_backref *backref;
953
954         if (--rec->refs > 0)
955                 return;
956
957         while (!list_empty(&rec->backrefs)) {
958                 backref = to_inode_backref(rec->backrefs.next);
959                 list_del(&backref->list);
960                 free(backref);
961         }
962         free_orphan_data_extents(&rec->orphan_extents);
963         free_file_extent_holes(&rec->holes);
964         free(rec);
965 }
966
967 static int can_free_inode_rec(struct inode_record *rec)
968 {
969         if (!rec->errors && rec->checked && rec->found_inode_item &&
970             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
971                 return 1;
972         return 0;
973 }
974
975 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
976                                  struct inode_record *rec)
977 {
978         struct cache_extent *cache;
979         struct inode_backref *tmp, *backref;
980         struct ptr_node *node;
981         unsigned char filetype;
982
983         if (!rec->found_inode_item)
984                 return;
985
986         filetype = imode_to_type(rec->imode);
987         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
988                 if (backref->found_dir_item && backref->found_dir_index) {
989                         if (backref->filetype != filetype)
990                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
991                         if (!backref->errors && backref->found_inode_ref &&
992                             rec->nlink == rec->found_link) {
993                                 list_del(&backref->list);
994                                 free(backref);
995                         }
996                 }
997         }
998
999         if (!rec->checked || rec->merging)
1000                 return;
1001
1002         if (S_ISDIR(rec->imode)) {
1003                 if (rec->found_size != rec->isize)
1004                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1005                 if (rec->found_file_extent)
1006                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1007         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1008                 if (rec->found_dir_item)
1009                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1010                 if (rec->found_size != rec->nbytes)
1011                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1012                 if (rec->nlink > 0 && !no_holes &&
1013                     (rec->extent_end < rec->isize ||
1014                      first_extent_gap(&rec->holes) < rec->isize))
1015                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1016         }
1017
1018         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1019                 if (rec->found_csum_item && rec->nodatasum)
1020                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1021                 if (rec->some_csum_missing && !rec->nodatasum)
1022                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1023         }
1024
1025         BUG_ON(rec->refs != 1);
1026         if (can_free_inode_rec(rec)) {
1027                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1028                 node = container_of(cache, struct ptr_node, cache);
1029                 BUG_ON(node->data != rec);
1030                 remove_cache_extent(inode_cache, &node->cache);
1031                 free(node);
1032                 free_inode_rec(rec);
1033         }
1034 }
1035
1036 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1037 {
1038         struct btrfs_path path;
1039         struct btrfs_key key;
1040         int ret;
1041
1042         key.objectid = BTRFS_ORPHAN_OBJECTID;
1043         key.type = BTRFS_ORPHAN_ITEM_KEY;
1044         key.offset = ino;
1045
1046         btrfs_init_path(&path);
1047         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1048         btrfs_release_path(&path);
1049         if (ret > 0)
1050                 ret = -ENOENT;
1051         return ret;
1052 }
1053
1054 static int process_inode_item(struct extent_buffer *eb,
1055                               int slot, struct btrfs_key *key,
1056                               struct shared_node *active_node)
1057 {
1058         struct inode_record *rec;
1059         struct btrfs_inode_item *item;
1060
1061         rec = active_node->current;
1062         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1063         if (rec->found_inode_item) {
1064                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1065                 return 1;
1066         }
1067         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1068         rec->nlink = btrfs_inode_nlink(eb, item);
1069         rec->isize = btrfs_inode_size(eb, item);
1070         rec->nbytes = btrfs_inode_nbytes(eb, item);
1071         rec->imode = btrfs_inode_mode(eb, item);
1072         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1073                 rec->nodatasum = 1;
1074         rec->found_inode_item = 1;
1075         if (rec->nlink == 0)
1076                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1077         maybe_free_inode_rec(&active_node->inode_cache, rec);
1078         return 0;
1079 }
1080
1081 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1082                                                 const char *name,
1083                                                 int namelen, u64 dir)
1084 {
1085         struct inode_backref *backref;
1086
1087         list_for_each_entry(backref, &rec->backrefs, list) {
1088                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1089                         break;
1090                 if (backref->dir != dir || backref->namelen != namelen)
1091                         continue;
1092                 if (memcmp(name, backref->name, namelen))
1093                         continue;
1094                 return backref;
1095         }
1096
1097         backref = malloc(sizeof(*backref) + namelen + 1);
1098         if (!backref)
1099                 return NULL;
1100         memset(backref, 0, sizeof(*backref));
1101         backref->dir = dir;
1102         backref->namelen = namelen;
1103         memcpy(backref->name, name, namelen);
1104         backref->name[namelen] = '\0';
1105         list_add_tail(&backref->list, &rec->backrefs);
1106         return backref;
1107 }
1108
1109 static int add_inode_backref(struct cache_tree *inode_cache,
1110                              u64 ino, u64 dir, u64 index,
1111                              const char *name, int namelen,
1112                              int filetype, int itemtype, int errors)
1113 {
1114         struct inode_record *rec;
1115         struct inode_backref *backref;
1116
1117         rec = get_inode_rec(inode_cache, ino, 1);
1118         BUG_ON(IS_ERR(rec));
1119         backref = get_inode_backref(rec, name, namelen, dir);
1120         BUG_ON(!backref);
1121         if (errors)
1122                 backref->errors |= errors;
1123         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1124                 if (backref->found_dir_index)
1125                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1126                 if (backref->found_inode_ref && backref->index != index)
1127                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1128                 if (backref->found_dir_item && backref->filetype != filetype)
1129                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1130
1131                 backref->index = index;
1132                 backref->filetype = filetype;
1133                 backref->found_dir_index = 1;
1134         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1135                 rec->found_link++;
1136                 if (backref->found_dir_item)
1137                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1138                 if (backref->found_dir_index && backref->filetype != filetype)
1139                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1140
1141                 backref->filetype = filetype;
1142                 backref->found_dir_item = 1;
1143         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1144                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1145                 if (backref->found_inode_ref)
1146                         backref->errors |= REF_ERR_DUP_INODE_REF;
1147                 if (backref->found_dir_index && backref->index != index)
1148                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1149                 else
1150                         backref->index = index;
1151
1152                 backref->ref_type = itemtype;
1153                 backref->found_inode_ref = 1;
1154         } else {
1155                 BUG_ON(1);
1156         }
1157
1158         maybe_free_inode_rec(inode_cache, rec);
1159         return 0;
1160 }
1161
1162 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1163                             struct cache_tree *dst_cache)
1164 {
1165         struct inode_backref *backref;
1166         u32 dir_count = 0;
1167         int ret = 0;
1168
1169         dst->merging = 1;
1170         list_for_each_entry(backref, &src->backrefs, list) {
1171                 if (backref->found_dir_index) {
1172                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1173                                         backref->index, backref->name,
1174                                         backref->namelen, backref->filetype,
1175                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1176                 }
1177                 if (backref->found_dir_item) {
1178                         dir_count++;
1179                         add_inode_backref(dst_cache, dst->ino,
1180                                         backref->dir, 0, backref->name,
1181                                         backref->namelen, backref->filetype,
1182                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1183                 }
1184                 if (backref->found_inode_ref) {
1185                         add_inode_backref(dst_cache, dst->ino,
1186                                         backref->dir, backref->index,
1187                                         backref->name, backref->namelen, 0,
1188                                         backref->ref_type, backref->errors);
1189                 }
1190         }
1191
1192         if (src->found_dir_item)
1193                 dst->found_dir_item = 1;
1194         if (src->found_file_extent)
1195                 dst->found_file_extent = 1;
1196         if (src->found_csum_item)
1197                 dst->found_csum_item = 1;
1198         if (src->some_csum_missing)
1199                 dst->some_csum_missing = 1;
1200         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1201                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1202                 if (ret < 0)
1203                         return ret;
1204         }
1205
1206         BUG_ON(src->found_link < dir_count);
1207         dst->found_link += src->found_link - dir_count;
1208         dst->found_size += src->found_size;
1209         if (src->extent_start != (u64)-1) {
1210                 if (dst->extent_start == (u64)-1) {
1211                         dst->extent_start = src->extent_start;
1212                         dst->extent_end = src->extent_end;
1213                 } else {
1214                         if (dst->extent_end > src->extent_start)
1215                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1216                         else if (dst->extent_end < src->extent_start) {
1217                                 ret = add_file_extent_hole(&dst->holes,
1218                                         dst->extent_end,
1219                                         src->extent_start - dst->extent_end);
1220                         }
1221                         if (dst->extent_end < src->extent_end)
1222                                 dst->extent_end = src->extent_end;
1223                 }
1224         }
1225
1226         dst->errors |= src->errors;
1227         if (src->found_inode_item) {
1228                 if (!dst->found_inode_item) {
1229                         dst->nlink = src->nlink;
1230                         dst->isize = src->isize;
1231                         dst->nbytes = src->nbytes;
1232                         dst->imode = src->imode;
1233                         dst->nodatasum = src->nodatasum;
1234                         dst->found_inode_item = 1;
1235                 } else {
1236                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1237                 }
1238         }
1239         dst->merging = 0;
1240
1241         return 0;
1242 }
1243
1244 static int splice_shared_node(struct shared_node *src_node,
1245                               struct shared_node *dst_node)
1246 {
1247         struct cache_extent *cache;
1248         struct ptr_node *node, *ins;
1249         struct cache_tree *src, *dst;
1250         struct inode_record *rec, *conflict;
1251         u64 current_ino = 0;
1252         int splice = 0;
1253         int ret;
1254
1255         if (--src_node->refs == 0)
1256                 splice = 1;
1257         if (src_node->current)
1258                 current_ino = src_node->current->ino;
1259
1260         src = &src_node->root_cache;
1261         dst = &dst_node->root_cache;
1262 again:
1263         cache = search_cache_extent(src, 0);
1264         while (cache) {
1265                 node = container_of(cache, struct ptr_node, cache);
1266                 rec = node->data;
1267                 cache = next_cache_extent(cache);
1268
1269                 if (splice) {
1270                         remove_cache_extent(src, &node->cache);
1271                         ins = node;
1272                 } else {
1273                         ins = malloc(sizeof(*ins));
1274                         BUG_ON(!ins);
1275                         ins->cache.start = node->cache.start;
1276                         ins->cache.size = node->cache.size;
1277                         ins->data = rec;
1278                         rec->refs++;
1279                 }
1280                 ret = insert_cache_extent(dst, &ins->cache);
1281                 if (ret == -EEXIST) {
1282                         conflict = get_inode_rec(dst, rec->ino, 1);
1283                         BUG_ON(IS_ERR(conflict));
1284                         merge_inode_recs(rec, conflict, dst);
1285                         if (rec->checked) {
1286                                 conflict->checked = 1;
1287                                 if (dst_node->current == conflict)
1288                                         dst_node->current = NULL;
1289                         }
1290                         maybe_free_inode_rec(dst, conflict);
1291                         free_inode_rec(rec);
1292                         free(ins);
1293                 } else {
1294                         BUG_ON(ret);
1295                 }
1296         }
1297
1298         if (src == &src_node->root_cache) {
1299                 src = &src_node->inode_cache;
1300                 dst = &dst_node->inode_cache;
1301                 goto again;
1302         }
1303
1304         if (current_ino > 0 && (!dst_node->current ||
1305             current_ino > dst_node->current->ino)) {
1306                 if (dst_node->current) {
1307                         dst_node->current->checked = 1;
1308                         maybe_free_inode_rec(dst, dst_node->current);
1309                 }
1310                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1311                 BUG_ON(IS_ERR(dst_node->current));
1312         }
1313         return 0;
1314 }
1315
1316 static void free_inode_ptr(struct cache_extent *cache)
1317 {
1318         struct ptr_node *node;
1319         struct inode_record *rec;
1320
1321         node = container_of(cache, struct ptr_node, cache);
1322         rec = node->data;
1323         free_inode_rec(rec);
1324         free(node);
1325 }
1326
1327 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1328
1329 static struct shared_node *find_shared_node(struct cache_tree *shared,
1330                                             u64 bytenr)
1331 {
1332         struct cache_extent *cache;
1333         struct shared_node *node;
1334
1335         cache = lookup_cache_extent(shared, bytenr, 1);
1336         if (cache) {
1337                 node = container_of(cache, struct shared_node, cache);
1338                 return node;
1339         }
1340         return NULL;
1341 }
1342
1343 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1344 {
1345         int ret;
1346         struct shared_node *node;
1347
1348         node = calloc(1, sizeof(*node));
1349         if (!node)
1350                 return -ENOMEM;
1351         node->cache.start = bytenr;
1352         node->cache.size = 1;
1353         cache_tree_init(&node->root_cache);
1354         cache_tree_init(&node->inode_cache);
1355         node->refs = refs;
1356
1357         ret = insert_cache_extent(shared, &node->cache);
1358
1359         return ret;
1360 }
1361
1362 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1363                              struct walk_control *wc, int level)
1364 {
1365         struct shared_node *node;
1366         struct shared_node *dest;
1367         int ret;
1368
1369         if (level == wc->active_node)
1370                 return 0;
1371
1372         BUG_ON(wc->active_node <= level);
1373         node = find_shared_node(&wc->shared, bytenr);
1374         if (!node) {
1375                 ret = add_shared_node(&wc->shared, bytenr, refs);
1376                 BUG_ON(ret);
1377                 node = find_shared_node(&wc->shared, bytenr);
1378                 wc->nodes[level] = node;
1379                 wc->active_node = level;
1380                 return 0;
1381         }
1382
1383         if (wc->root_level == wc->active_node &&
1384             btrfs_root_refs(&root->root_item) == 0) {
1385                 if (--node->refs == 0) {
1386                         free_inode_recs_tree(&node->root_cache);
1387                         free_inode_recs_tree(&node->inode_cache);
1388                         remove_cache_extent(&wc->shared, &node->cache);
1389                         free(node);
1390                 }
1391                 return 1;
1392         }
1393
1394         dest = wc->nodes[wc->active_node];
1395         splice_shared_node(node, dest);
1396         if (node->refs == 0) {
1397                 remove_cache_extent(&wc->shared, &node->cache);
1398                 free(node);
1399         }
1400         return 1;
1401 }
1402
1403 static int leave_shared_node(struct btrfs_root *root,
1404                              struct walk_control *wc, int level)
1405 {
1406         struct shared_node *node;
1407         struct shared_node *dest;
1408         int i;
1409
1410         if (level == wc->root_level)
1411                 return 0;
1412
1413         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1414                 if (wc->nodes[i])
1415                         break;
1416         }
1417         BUG_ON(i >= BTRFS_MAX_LEVEL);
1418
1419         node = wc->nodes[wc->active_node];
1420         wc->nodes[wc->active_node] = NULL;
1421         wc->active_node = i;
1422
1423         dest = wc->nodes[wc->active_node];
1424         if (wc->active_node < wc->root_level ||
1425             btrfs_root_refs(&root->root_item) > 0) {
1426                 BUG_ON(node->refs <= 1);
1427                 splice_shared_node(node, dest);
1428         } else {
1429                 BUG_ON(node->refs < 2);
1430                 node->refs--;
1431         }
1432         return 0;
1433 }
1434
1435 /*
1436  * Returns:
1437  * < 0 - on error
1438  * 1   - if the root with id child_root_id is a child of root parent_root_id
1439  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1440  *       has other root(s) as parent(s)
1441  * 2   - if the root child_root_id doesn't have any parent roots
1442  */
1443 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1444                          u64 child_root_id)
1445 {
1446         struct btrfs_path path;
1447         struct btrfs_key key;
1448         struct extent_buffer *leaf;
1449         int has_parent = 0;
1450         int ret;
1451
1452         btrfs_init_path(&path);
1453
1454         key.objectid = parent_root_id;
1455         key.type = BTRFS_ROOT_REF_KEY;
1456         key.offset = child_root_id;
1457         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1458                                 0, 0);
1459         if (ret < 0)
1460                 return ret;
1461         btrfs_release_path(&path);
1462         if (!ret)
1463                 return 1;
1464
1465         key.objectid = child_root_id;
1466         key.type = BTRFS_ROOT_BACKREF_KEY;
1467         key.offset = 0;
1468         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1469                                 0, 0);
1470         if (ret < 0)
1471                 goto out;
1472
1473         while (1) {
1474                 leaf = path.nodes[0];
1475                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1476                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1477                         if (ret)
1478                                 break;
1479                         leaf = path.nodes[0];
1480                 }
1481
1482                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1483                 if (key.objectid != child_root_id ||
1484                     key.type != BTRFS_ROOT_BACKREF_KEY)
1485                         break;
1486
1487                 has_parent = 1;
1488
1489                 if (key.offset == parent_root_id) {
1490                         btrfs_release_path(&path);
1491                         return 1;
1492                 }
1493
1494                 path.slots[0]++;
1495         }
1496 out:
1497         btrfs_release_path(&path);
1498         if (ret < 0)
1499                 return ret;
1500         return has_parent ? 0 : 2;
1501 }
1502
1503 static int process_dir_item(struct btrfs_root *root,
1504                             struct extent_buffer *eb,
1505                             int slot, struct btrfs_key *key,
1506                             struct shared_node *active_node)
1507 {
1508         u32 total;
1509         u32 cur = 0;
1510         u32 len;
1511         u32 name_len;
1512         u32 data_len;
1513         int error;
1514         int nritems = 0;
1515         int filetype;
1516         struct btrfs_dir_item *di;
1517         struct inode_record *rec;
1518         struct cache_tree *root_cache;
1519         struct cache_tree *inode_cache;
1520         struct btrfs_key location;
1521         char namebuf[BTRFS_NAME_LEN];
1522
1523         root_cache = &active_node->root_cache;
1524         inode_cache = &active_node->inode_cache;
1525         rec = active_node->current;
1526         rec->found_dir_item = 1;
1527
1528         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1529         total = btrfs_item_size_nr(eb, slot);
1530         while (cur < total) {
1531                 nritems++;
1532                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1533                 name_len = btrfs_dir_name_len(eb, di);
1534                 data_len = btrfs_dir_data_len(eb, di);
1535                 filetype = btrfs_dir_type(eb, di);
1536
1537                 rec->found_size += name_len;
1538                 if (name_len <= BTRFS_NAME_LEN) {
1539                         len = name_len;
1540                         error = 0;
1541                 } else {
1542                         len = BTRFS_NAME_LEN;
1543                         error = REF_ERR_NAME_TOO_LONG;
1544                 }
1545                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1546
1547                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1548                         add_inode_backref(inode_cache, location.objectid,
1549                                           key->objectid, key->offset, namebuf,
1550                                           len, filetype, key->type, error);
1551                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1552                         add_inode_backref(root_cache, location.objectid,
1553                                           key->objectid, key->offset,
1554                                           namebuf, len, filetype,
1555                                           key->type, error);
1556                 } else {
1557                         fprintf(stderr, "invalid location in dir item %u\n",
1558                                 location.type);
1559                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1560                                           key->objectid, key->offset, namebuf,
1561                                           len, filetype, key->type, error);
1562                 }
1563
1564                 len = sizeof(*di) + name_len + data_len;
1565                 di = (struct btrfs_dir_item *)((char *)di + len);
1566                 cur += len;
1567         }
1568         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1569                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1570
1571         return 0;
1572 }
1573
1574 static int process_inode_ref(struct extent_buffer *eb,
1575                              int slot, struct btrfs_key *key,
1576                              struct shared_node *active_node)
1577 {
1578         u32 total;
1579         u32 cur = 0;
1580         u32 len;
1581         u32 name_len;
1582         u64 index;
1583         int error;
1584         struct cache_tree *inode_cache;
1585         struct btrfs_inode_ref *ref;
1586         char namebuf[BTRFS_NAME_LEN];
1587
1588         inode_cache = &active_node->inode_cache;
1589
1590         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1591         total = btrfs_item_size_nr(eb, slot);
1592         while (cur < total) {
1593                 name_len = btrfs_inode_ref_name_len(eb, ref);
1594                 index = btrfs_inode_ref_index(eb, ref);
1595                 if (name_len <= BTRFS_NAME_LEN) {
1596                         len = name_len;
1597                         error = 0;
1598                 } else {
1599                         len = BTRFS_NAME_LEN;
1600                         error = REF_ERR_NAME_TOO_LONG;
1601                 }
1602                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1603                 add_inode_backref(inode_cache, key->objectid, key->offset,
1604                                   index, namebuf, len, 0, key->type, error);
1605
1606                 len = sizeof(*ref) + name_len;
1607                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1608                 cur += len;
1609         }
1610         return 0;
1611 }
1612
1613 static int process_inode_extref(struct extent_buffer *eb,
1614                                 int slot, struct btrfs_key *key,
1615                                 struct shared_node *active_node)
1616 {
1617         u32 total;
1618         u32 cur = 0;
1619         u32 len;
1620         u32 name_len;
1621         u64 index;
1622         u64 parent;
1623         int error;
1624         struct cache_tree *inode_cache;
1625         struct btrfs_inode_extref *extref;
1626         char namebuf[BTRFS_NAME_LEN];
1627
1628         inode_cache = &active_node->inode_cache;
1629
1630         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1631         total = btrfs_item_size_nr(eb, slot);
1632         while (cur < total) {
1633                 name_len = btrfs_inode_extref_name_len(eb, extref);
1634                 index = btrfs_inode_extref_index(eb, extref);
1635                 parent = btrfs_inode_extref_parent(eb, extref);
1636                 if (name_len <= BTRFS_NAME_LEN) {
1637                         len = name_len;
1638                         error = 0;
1639                 } else {
1640                         len = BTRFS_NAME_LEN;
1641                         error = REF_ERR_NAME_TOO_LONG;
1642                 }
1643                 read_extent_buffer(eb, namebuf,
1644                                    (unsigned long)(extref + 1), len);
1645                 add_inode_backref(inode_cache, key->objectid, parent,
1646                                   index, namebuf, len, 0, key->type, error);
1647
1648                 len = sizeof(*extref) + name_len;
1649                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1650                 cur += len;
1651         }
1652         return 0;
1653
1654 }
1655
1656 static int count_csum_range(struct btrfs_root *root, u64 start,
1657                             u64 len, u64 *found)
1658 {
1659         struct btrfs_key key;
1660         struct btrfs_path path;
1661         struct extent_buffer *leaf;
1662         int ret;
1663         size_t size;
1664         *found = 0;
1665         u64 csum_end;
1666         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1667
1668         btrfs_init_path(&path);
1669
1670         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1671         key.offset = start;
1672         key.type = BTRFS_EXTENT_CSUM_KEY;
1673
1674         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1675                                 &key, &path, 0, 0);
1676         if (ret < 0)
1677                 goto out;
1678         if (ret > 0 && path.slots[0] > 0) {
1679                 leaf = path.nodes[0];
1680                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1681                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1682                     key.type == BTRFS_EXTENT_CSUM_KEY)
1683                         path.slots[0]--;
1684         }
1685
1686         while (len > 0) {
1687                 leaf = path.nodes[0];
1688                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1689                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1690                         if (ret > 0)
1691                                 break;
1692                         else if (ret < 0)
1693                                 goto out;
1694                         leaf = path.nodes[0];
1695                 }
1696
1697                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1698                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1699                     key.type != BTRFS_EXTENT_CSUM_KEY)
1700                         break;
1701
1702                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1703                 if (key.offset >= start + len)
1704                         break;
1705
1706                 if (key.offset > start)
1707                         start = key.offset;
1708
1709                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1710                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1711                 if (csum_end > start) {
1712                         size = min(csum_end - start, len);
1713                         len -= size;
1714                         start += size;
1715                         *found += size;
1716                 }
1717
1718                 path.slots[0]++;
1719         }
1720 out:
1721         btrfs_release_path(&path);
1722         if (ret < 0)
1723                 return ret;
1724         return 0;
1725 }
1726
1727 static int process_file_extent(struct btrfs_root *root,
1728                                 struct extent_buffer *eb,
1729                                 int slot, struct btrfs_key *key,
1730                                 struct shared_node *active_node)
1731 {
1732         struct inode_record *rec;
1733         struct btrfs_file_extent_item *fi;
1734         u64 num_bytes = 0;
1735         u64 disk_bytenr = 0;
1736         u64 extent_offset = 0;
1737         u64 mask = root->sectorsize - 1;
1738         int extent_type;
1739         int ret;
1740
1741         rec = active_node->current;
1742         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1743         rec->found_file_extent = 1;
1744
1745         if (rec->extent_start == (u64)-1) {
1746                 rec->extent_start = key->offset;
1747                 rec->extent_end = key->offset;
1748         }
1749
1750         if (rec->extent_end > key->offset)
1751                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1752         else if (rec->extent_end < key->offset) {
1753                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1754                                            key->offset - rec->extent_end);
1755                 if (ret < 0)
1756                         return ret;
1757         }
1758
1759         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1760         extent_type = btrfs_file_extent_type(eb, fi);
1761
1762         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1763                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1764                 if (num_bytes == 0)
1765                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1766                 rec->found_size += num_bytes;
1767                 num_bytes = (num_bytes + mask) & ~mask;
1768         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1769                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1770                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1771                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1772                 extent_offset = btrfs_file_extent_offset(eb, fi);
1773                 if (num_bytes == 0 || (num_bytes & mask))
1774                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1775                 if (num_bytes + extent_offset >
1776                     btrfs_file_extent_ram_bytes(eb, fi))
1777                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1778                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1779                     (btrfs_file_extent_compression(eb, fi) ||
1780                      btrfs_file_extent_encryption(eb, fi) ||
1781                      btrfs_file_extent_other_encoding(eb, fi)))
1782                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1783                 if (disk_bytenr > 0)
1784                         rec->found_size += num_bytes;
1785         } else {
1786                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1787         }
1788         rec->extent_end = key->offset + num_bytes;
1789
1790         /*
1791          * The data reloc tree will copy full extents into its inode and then
1792          * copy the corresponding csums.  Because the extent it copied could be
1793          * a preallocated extent that hasn't been written to yet there may be no
1794          * csums to copy, ergo we won't have csums for our file extent.  This is
1795          * ok so just don't bother checking csums if the inode belongs to the
1796          * data reloc tree.
1797          */
1798         if (disk_bytenr > 0 &&
1799             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1800                 u64 found;
1801                 if (btrfs_file_extent_compression(eb, fi))
1802                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1803                 else
1804                         disk_bytenr += extent_offset;
1805
1806                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1807                 if (ret < 0)
1808                         return ret;
1809                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1810                         if (found > 0)
1811                                 rec->found_csum_item = 1;
1812                         if (found < num_bytes)
1813                                 rec->some_csum_missing = 1;
1814                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1815                         if (found > 0)
1816                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1817                 }
1818         }
1819         return 0;
1820 }
1821
1822 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1823                             struct walk_control *wc)
1824 {
1825         struct btrfs_key key;
1826         u32 nritems;
1827         int i;
1828         int ret = 0;
1829         struct cache_tree *inode_cache;
1830         struct shared_node *active_node;
1831
1832         if (wc->root_level == wc->active_node &&
1833             btrfs_root_refs(&root->root_item) == 0)
1834                 return 0;
1835
1836         active_node = wc->nodes[wc->active_node];
1837         inode_cache = &active_node->inode_cache;
1838         nritems = btrfs_header_nritems(eb);
1839         for (i = 0; i < nritems; i++) {
1840                 btrfs_item_key_to_cpu(eb, &key, i);
1841
1842                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1843                         continue;
1844                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1845                         continue;
1846
1847                 if (active_node->current == NULL ||
1848                     active_node->current->ino < key.objectid) {
1849                         if (active_node->current) {
1850                                 active_node->current->checked = 1;
1851                                 maybe_free_inode_rec(inode_cache,
1852                                                      active_node->current);
1853                         }
1854                         active_node->current = get_inode_rec(inode_cache,
1855                                                              key.objectid, 1);
1856                         BUG_ON(IS_ERR(active_node->current));
1857                 }
1858                 switch (key.type) {
1859                 case BTRFS_DIR_ITEM_KEY:
1860                 case BTRFS_DIR_INDEX_KEY:
1861                         ret = process_dir_item(root, eb, i, &key, active_node);
1862                         break;
1863                 case BTRFS_INODE_REF_KEY:
1864                         ret = process_inode_ref(eb, i, &key, active_node);
1865                         break;
1866                 case BTRFS_INODE_EXTREF_KEY:
1867                         ret = process_inode_extref(eb, i, &key, active_node);
1868                         break;
1869                 case BTRFS_INODE_ITEM_KEY:
1870                         ret = process_inode_item(eb, i, &key, active_node);
1871                         break;
1872                 case BTRFS_EXTENT_DATA_KEY:
1873                         ret = process_file_extent(root, eb, i, &key,
1874                                                   active_node);
1875                         break;
1876                 default:
1877                         break;
1878                 };
1879         }
1880         return ret;
1881 }
1882
1883 static void reada_walk_down(struct btrfs_root *root,
1884                             struct extent_buffer *node, int slot)
1885 {
1886         u64 bytenr;
1887         u64 ptr_gen;
1888         u32 nritems;
1889         u32 blocksize;
1890         int i;
1891         int level;
1892
1893         level = btrfs_header_level(node);
1894         if (level != 1)
1895                 return;
1896
1897         nritems = btrfs_header_nritems(node);
1898         blocksize = root->nodesize;
1899         for (i = slot; i < nritems; i++) {
1900                 bytenr = btrfs_node_blockptr(node, i);
1901                 ptr_gen = btrfs_node_ptr_generation(node, i);
1902                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1903         }
1904 }
1905
1906 /*
1907  * Check the child node/leaf by the following condition:
1908  * 1. the first item key of the node/leaf should be the same with the one
1909  *    in parent.
1910  * 2. block in parent node should match the child node/leaf.
1911  * 3. generation of parent node and child's header should be consistent.
1912  *
1913  * Or the child node/leaf pointed by the key in parent is not valid.
1914  *
1915  * We hope to check leaf owner too, but since subvol may share leaves,
1916  * which makes leaf owner check not so strong, key check should be
1917  * sufficient enough for that case.
1918  */
1919 static int check_child_node(struct btrfs_root *root,
1920                             struct extent_buffer *parent, int slot,
1921                             struct extent_buffer *child)
1922 {
1923         struct btrfs_key parent_key;
1924         struct btrfs_key child_key;
1925         int ret = 0;
1926
1927         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1928         if (btrfs_header_level(child) == 0)
1929                 btrfs_item_key_to_cpu(child, &child_key, 0);
1930         else
1931                 btrfs_node_key_to_cpu(child, &child_key, 0);
1932
1933         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1934                 ret = -EINVAL;
1935                 fprintf(stderr,
1936                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1937                         parent_key.objectid, parent_key.type, parent_key.offset,
1938                         child_key.objectid, child_key.type, child_key.offset);
1939         }
1940         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1941                 ret = -EINVAL;
1942                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1943                         btrfs_node_blockptr(parent, slot),
1944                         btrfs_header_bytenr(child));
1945         }
1946         if (btrfs_node_ptr_generation(parent, slot) !=
1947             btrfs_header_generation(child)) {
1948                 ret = -EINVAL;
1949                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1950                         btrfs_header_generation(child),
1951                         btrfs_node_ptr_generation(parent, slot));
1952         }
1953         return ret;
1954 }
1955
1956 struct node_refs {
1957         u64 bytenr[BTRFS_MAX_LEVEL];
1958         u64 refs[BTRFS_MAX_LEVEL];
1959 };
1960
1961 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1962                           struct walk_control *wc, int *level,
1963                           struct node_refs *nrefs)
1964 {
1965         enum btrfs_tree_block_status status;
1966         u64 bytenr;
1967         u64 ptr_gen;
1968         struct extent_buffer *next;
1969         struct extent_buffer *cur;
1970         u32 blocksize;
1971         int ret, err = 0;
1972         u64 refs;
1973
1974         WARN_ON(*level < 0);
1975         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1976
1977         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1978                 refs = nrefs->refs[*level];
1979                 ret = 0;
1980         } else {
1981                 ret = btrfs_lookup_extent_info(NULL, root,
1982                                        path->nodes[*level]->start,
1983                                        *level, 1, &refs, NULL);
1984                 if (ret < 0) {
1985                         err = ret;
1986                         goto out;
1987                 }
1988                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1989                 nrefs->refs[*level] = refs;
1990         }
1991
1992         if (refs > 1) {
1993                 ret = enter_shared_node(root, path->nodes[*level]->start,
1994                                         refs, wc, *level);
1995                 if (ret > 0) {
1996                         err = ret;
1997                         goto out;
1998                 }
1999         }
2000
2001         while (*level >= 0) {
2002                 WARN_ON(*level < 0);
2003                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2004                 cur = path->nodes[*level];
2005
2006                 if (btrfs_header_level(cur) != *level)
2007                         WARN_ON(1);
2008
2009                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2010                         break;
2011                 if (*level == 0) {
2012                         ret = process_one_leaf(root, cur, wc);
2013                         if (ret < 0)
2014                                 err = ret;
2015                         break;
2016                 }
2017                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2018                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2019                 blocksize = root->nodesize;
2020
2021                 if (bytenr == nrefs->bytenr[*level - 1]) {
2022                         refs = nrefs->refs[*level - 1];
2023                 } else {
2024                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2025                                         *level - 1, 1, &refs, NULL);
2026                         if (ret < 0) {
2027                                 refs = 0;
2028                         } else {
2029                                 nrefs->bytenr[*level - 1] = bytenr;
2030                                 nrefs->refs[*level - 1] = refs;
2031                         }
2032                 }
2033
2034                 if (refs > 1) {
2035                         ret = enter_shared_node(root, bytenr, refs,
2036                                                 wc, *level - 1);
2037                         if (ret > 0) {
2038                                 path->slots[*level]++;
2039                                 continue;
2040                         }
2041                 }
2042
2043                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2044                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2045                         free_extent_buffer(next);
2046                         reada_walk_down(root, cur, path->slots[*level]);
2047                         next = read_tree_block(root, bytenr, blocksize,
2048                                                ptr_gen);
2049                         if (!extent_buffer_uptodate(next)) {
2050                                 struct btrfs_key node_key;
2051
2052                                 btrfs_node_key_to_cpu(path->nodes[*level],
2053                                                       &node_key,
2054                                                       path->slots[*level]);
2055                                 btrfs_add_corrupt_extent_record(root->fs_info,
2056                                                 &node_key,
2057                                                 path->nodes[*level]->start,
2058                                                 root->nodesize, *level);
2059                                 err = -EIO;
2060                                 goto out;
2061                         }
2062                 }
2063
2064                 ret = check_child_node(root, cur, path->slots[*level], next);
2065                 if (ret) {
2066                         err = ret;
2067                         goto out;
2068                 }
2069
2070                 if (btrfs_is_leaf(next))
2071                         status = btrfs_check_leaf(root, NULL, next);
2072                 else
2073                         status = btrfs_check_node(root, NULL, next);
2074                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2075                         free_extent_buffer(next);
2076                         err = -EIO;
2077                         goto out;
2078                 }
2079
2080                 *level = *level - 1;
2081                 free_extent_buffer(path->nodes[*level]);
2082                 path->nodes[*level] = next;
2083                 path->slots[*level] = 0;
2084         }
2085 out:
2086         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2087         return err;
2088 }
2089
2090 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2091                         struct walk_control *wc, int *level)
2092 {
2093         int i;
2094         struct extent_buffer *leaf;
2095
2096         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2097                 leaf = path->nodes[i];
2098                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2099                         path->slots[i]++;
2100                         *level = i;
2101                         return 0;
2102                 } else {
2103                         free_extent_buffer(path->nodes[*level]);
2104                         path->nodes[*level] = NULL;
2105                         BUG_ON(*level > wc->active_node);
2106                         if (*level == wc->active_node)
2107                                 leave_shared_node(root, wc, *level);
2108                         *level = i + 1;
2109                 }
2110         }
2111         return 1;
2112 }
2113
2114 static int check_root_dir(struct inode_record *rec)
2115 {
2116         struct inode_backref *backref;
2117         int ret = -1;
2118
2119         if (!rec->found_inode_item || rec->errors)
2120                 goto out;
2121         if (rec->nlink != 1 || rec->found_link != 0)
2122                 goto out;
2123         if (list_empty(&rec->backrefs))
2124                 goto out;
2125         backref = to_inode_backref(rec->backrefs.next);
2126         if (!backref->found_inode_ref)
2127                 goto out;
2128         if (backref->index != 0 || backref->namelen != 2 ||
2129             memcmp(backref->name, "..", 2))
2130                 goto out;
2131         if (backref->found_dir_index || backref->found_dir_item)
2132                 goto out;
2133         ret = 0;
2134 out:
2135         return ret;
2136 }
2137
2138 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2139                               struct btrfs_root *root, struct btrfs_path *path,
2140                               struct inode_record *rec)
2141 {
2142         struct btrfs_inode_item *ei;
2143         struct btrfs_key key;
2144         int ret;
2145
2146         key.objectid = rec->ino;
2147         key.type = BTRFS_INODE_ITEM_KEY;
2148         key.offset = (u64)-1;
2149
2150         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2151         if (ret < 0)
2152                 goto out;
2153         if (ret) {
2154                 if (!path->slots[0]) {
2155                         ret = -ENOENT;
2156                         goto out;
2157                 }
2158                 path->slots[0]--;
2159                 ret = 0;
2160         }
2161         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2162         if (key.objectid != rec->ino) {
2163                 ret = -ENOENT;
2164                 goto out;
2165         }
2166
2167         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2168                             struct btrfs_inode_item);
2169         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2170         btrfs_mark_buffer_dirty(path->nodes[0]);
2171         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2172         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2173                root->root_key.objectid);
2174 out:
2175         btrfs_release_path(path);
2176         return ret;
2177 }
2178
2179 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2180                                     struct btrfs_root *root,
2181                                     struct btrfs_path *path,
2182                                     struct inode_record *rec)
2183 {
2184         int ret;
2185
2186         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2187         btrfs_release_path(path);
2188         if (!ret)
2189                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2190         return ret;
2191 }
2192
2193 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2194                                struct btrfs_root *root,
2195                                struct btrfs_path *path,
2196                                struct inode_record *rec)
2197 {
2198         struct btrfs_inode_item *ei;
2199         struct btrfs_key key;
2200         int ret = 0;
2201
2202         key.objectid = rec->ino;
2203         key.type = BTRFS_INODE_ITEM_KEY;
2204         key.offset = 0;
2205
2206         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2207         if (ret) {
2208                 if (ret > 0)
2209                         ret = -ENOENT;
2210                 goto out;
2211         }
2212
2213         /* Since ret == 0, no need to check anything */
2214         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2215                             struct btrfs_inode_item);
2216         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2217         btrfs_mark_buffer_dirty(path->nodes[0]);
2218         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2219         printf("reset nbytes for ino %llu root %llu\n",
2220                rec->ino, root->root_key.objectid);
2221 out:
2222         btrfs_release_path(path);
2223         return ret;
2224 }
2225
2226 static int add_missing_dir_index(struct btrfs_root *root,
2227                                  struct cache_tree *inode_cache,
2228                                  struct inode_record *rec,
2229                                  struct inode_backref *backref)
2230 {
2231         struct btrfs_path *path;
2232         struct btrfs_trans_handle *trans;
2233         struct btrfs_dir_item *dir_item;
2234         struct extent_buffer *leaf;
2235         struct btrfs_key key;
2236         struct btrfs_disk_key disk_key;
2237         struct inode_record *dir_rec;
2238         unsigned long name_ptr;
2239         u32 data_size = sizeof(*dir_item) + backref->namelen;
2240         int ret;
2241
2242         path = btrfs_alloc_path();
2243         if (!path)
2244                 return -ENOMEM;
2245
2246         trans = btrfs_start_transaction(root, 1);
2247         if (IS_ERR(trans)) {
2248                 btrfs_free_path(path);
2249                 return PTR_ERR(trans);
2250         }
2251
2252         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2253                 (unsigned long long)rec->ino);
2254         key.objectid = backref->dir;
2255         key.type = BTRFS_DIR_INDEX_KEY;
2256         key.offset = backref->index;
2257
2258         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2259         BUG_ON(ret);
2260
2261         leaf = path->nodes[0];
2262         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2263
2264         disk_key.objectid = cpu_to_le64(rec->ino);
2265         disk_key.type = BTRFS_INODE_ITEM_KEY;
2266         disk_key.offset = 0;
2267
2268         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2269         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2270         btrfs_set_dir_data_len(leaf, dir_item, 0);
2271         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2272         name_ptr = (unsigned long)(dir_item + 1);
2273         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2274         btrfs_mark_buffer_dirty(leaf);
2275         btrfs_free_path(path);
2276         btrfs_commit_transaction(trans, root);
2277
2278         backref->found_dir_index = 1;
2279         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2280         BUG_ON(IS_ERR(dir_rec));
2281         if (!dir_rec)
2282                 return 0;
2283         dir_rec->found_size += backref->namelen;
2284         if (dir_rec->found_size == dir_rec->isize &&
2285             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2286                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2287         if (dir_rec->found_size != dir_rec->isize)
2288                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2289
2290         return 0;
2291 }
2292
2293 static int delete_dir_index(struct btrfs_root *root,
2294                             struct cache_tree *inode_cache,
2295                             struct inode_record *rec,
2296                             struct inode_backref *backref)
2297 {
2298         struct btrfs_trans_handle *trans;
2299         struct btrfs_dir_item *di;
2300         struct btrfs_path *path;
2301         int ret = 0;
2302
2303         path = btrfs_alloc_path();
2304         if (!path)
2305                 return -ENOMEM;
2306
2307         trans = btrfs_start_transaction(root, 1);
2308         if (IS_ERR(trans)) {
2309                 btrfs_free_path(path);
2310                 return PTR_ERR(trans);
2311         }
2312
2313
2314         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2315                 (unsigned long long)backref->dir,
2316                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2317                 (unsigned long long)root->objectid);
2318
2319         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2320                                     backref->name, backref->namelen,
2321                                     backref->index, -1);
2322         if (IS_ERR(di)) {
2323                 ret = PTR_ERR(di);
2324                 btrfs_free_path(path);
2325                 btrfs_commit_transaction(trans, root);
2326                 if (ret == -ENOENT)
2327                         return 0;
2328                 return ret;
2329         }
2330
2331         if (!di)
2332                 ret = btrfs_del_item(trans, root, path);
2333         else
2334                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2335         BUG_ON(ret);
2336         btrfs_free_path(path);
2337         btrfs_commit_transaction(trans, root);
2338         return ret;
2339 }
2340
2341 static int create_inode_item(struct btrfs_root *root,
2342                              struct inode_record *rec,
2343                              struct inode_backref *backref, int root_dir)
2344 {
2345         struct btrfs_trans_handle *trans;
2346         struct btrfs_inode_item inode_item;
2347         time_t now = time(NULL);
2348         int ret;
2349
2350         trans = btrfs_start_transaction(root, 1);
2351         if (IS_ERR(trans)) {
2352                 ret = PTR_ERR(trans);
2353                 return ret;
2354         }
2355
2356         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2357                 "be incomplete, please check permissions and content after "
2358                 "the fsck completes.\n", (unsigned long long)root->objectid,
2359                 (unsigned long long)rec->ino);
2360
2361         memset(&inode_item, 0, sizeof(inode_item));
2362         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2363         if (root_dir)
2364                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2365         else
2366                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2367         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2368         if (rec->found_dir_item) {
2369                 if (rec->found_file_extent)
2370                         fprintf(stderr, "root %llu inode %llu has both a dir "
2371                                 "item and extents, unsure if it is a dir or a "
2372                                 "regular file so setting it as a directory\n",
2373                                 (unsigned long long)root->objectid,
2374                                 (unsigned long long)rec->ino);
2375                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2376                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2377         } else if (!rec->found_dir_item) {
2378                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2379                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2380         }
2381         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2382         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2383         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2384         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2385         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2386         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2387         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2388         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2389
2390         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2391         BUG_ON(ret);
2392         btrfs_commit_transaction(trans, root);
2393         return 0;
2394 }
2395
2396 static int repair_inode_backrefs(struct btrfs_root *root,
2397                                  struct inode_record *rec,
2398                                  struct cache_tree *inode_cache,
2399                                  int delete)
2400 {
2401         struct inode_backref *tmp, *backref;
2402         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2403         int ret = 0;
2404         int repaired = 0;
2405
2406         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2407                 if (!delete && rec->ino == root_dirid) {
2408                         if (!rec->found_inode_item) {
2409                                 ret = create_inode_item(root, rec, backref, 1);
2410                                 if (ret)
2411                                         break;
2412                                 repaired++;
2413                         }
2414                 }
2415
2416                 /* Index 0 for root dir's are special, don't mess with it */
2417                 if (rec->ino == root_dirid && backref->index == 0)
2418                         continue;
2419
2420                 if (delete &&
2421                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2422                      (backref->found_dir_index && backref->found_inode_ref &&
2423                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2424                         ret = delete_dir_index(root, inode_cache, rec, backref);
2425                         if (ret)
2426                                 break;
2427                         repaired++;
2428                         list_del(&backref->list);
2429                         free(backref);
2430                 }
2431
2432                 if (!delete && !backref->found_dir_index &&
2433                     backref->found_dir_item && backref->found_inode_ref) {
2434                         ret = add_missing_dir_index(root, inode_cache, rec,
2435                                                     backref);
2436                         if (ret)
2437                                 break;
2438                         repaired++;
2439                         if (backref->found_dir_item &&
2440                             backref->found_dir_index &&
2441                             backref->found_dir_index) {
2442                                 if (!backref->errors &&
2443                                     backref->found_inode_ref) {
2444                                         list_del(&backref->list);
2445                                         free(backref);
2446                                 }
2447                         }
2448                 }
2449
2450                 if (!delete && (!backref->found_dir_index &&
2451                                 !backref->found_dir_item &&
2452                                 backref->found_inode_ref)) {
2453                         struct btrfs_trans_handle *trans;
2454                         struct btrfs_key location;
2455
2456                         ret = check_dir_conflict(root, backref->name,
2457                                                  backref->namelen,
2458                                                  backref->dir,
2459                                                  backref->index);
2460                         if (ret) {
2461                                 /*
2462                                  * let nlink fixing routine to handle it,
2463                                  * which can do it better.
2464                                  */
2465                                 ret = 0;
2466                                 break;
2467                         }
2468                         location.objectid = rec->ino;
2469                         location.type = BTRFS_INODE_ITEM_KEY;
2470                         location.offset = 0;
2471
2472                         trans = btrfs_start_transaction(root, 1);
2473                         if (IS_ERR(trans)) {
2474                                 ret = PTR_ERR(trans);
2475                                 break;
2476                         }
2477                         fprintf(stderr, "adding missing dir index/item pair "
2478                                 "for inode %llu\n",
2479                                 (unsigned long long)rec->ino);
2480                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2481                                                     backref->namelen,
2482                                                     backref->dir, &location,
2483                                                     imode_to_type(rec->imode),
2484                                                     backref->index);
2485                         BUG_ON(ret);
2486                         btrfs_commit_transaction(trans, root);
2487                         repaired++;
2488                 }
2489
2490                 if (!delete && (backref->found_inode_ref &&
2491                                 backref->found_dir_index &&
2492                                 backref->found_dir_item &&
2493                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2494                                 !rec->found_inode_item)) {
2495                         ret = create_inode_item(root, rec, backref, 0);
2496                         if (ret)
2497                                 break;
2498                         repaired++;
2499                 }
2500
2501         }
2502         return ret ? ret : repaired;
2503 }
2504
2505 /*
2506  * To determine the file type for nlink/inode_item repair
2507  *
2508  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2509  * Return -ENOENT if file type is not found.
2510  */
2511 static int find_file_type(struct inode_record *rec, u8 *type)
2512 {
2513         struct inode_backref *backref;
2514
2515         /* For inode item recovered case */
2516         if (rec->found_inode_item) {
2517                 *type = imode_to_type(rec->imode);
2518                 return 0;
2519         }
2520
2521         list_for_each_entry(backref, &rec->backrefs, list) {
2522                 if (backref->found_dir_index || backref->found_dir_item) {
2523                         *type = backref->filetype;
2524                         return 0;
2525                 }
2526         }
2527         return -ENOENT;
2528 }
2529
2530 /*
2531  * To determine the file name for nlink repair
2532  *
2533  * Return 0 if file name is found, set name and namelen.
2534  * Return -ENOENT if file name is not found.
2535  */
2536 static int find_file_name(struct inode_record *rec,
2537                           char *name, int *namelen)
2538 {
2539         struct inode_backref *backref;
2540
2541         list_for_each_entry(backref, &rec->backrefs, list) {
2542                 if (backref->found_dir_index || backref->found_dir_item ||
2543                     backref->found_inode_ref) {
2544                         memcpy(name, backref->name, backref->namelen);
2545                         *namelen = backref->namelen;
2546                         return 0;
2547                 }
2548         }
2549         return -ENOENT;
2550 }
2551
2552 /* Reset the nlink of the inode to the correct one */
2553 static int reset_nlink(struct btrfs_trans_handle *trans,
2554                        struct btrfs_root *root,
2555                        struct btrfs_path *path,
2556                        struct inode_record *rec)
2557 {
2558         struct inode_backref *backref;
2559         struct inode_backref *tmp;
2560         struct btrfs_key key;
2561         struct btrfs_inode_item *inode_item;
2562         int ret = 0;
2563
2564         /* We don't believe this either, reset it and iterate backref */
2565         rec->found_link = 0;
2566
2567         /* Remove all backref including the valid ones */
2568         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2569                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2570                                    backref->index, backref->name,
2571                                    backref->namelen, 0);
2572                 if (ret < 0)
2573                         goto out;
2574
2575                 /* remove invalid backref, so it won't be added back */
2576                 if (!(backref->found_dir_index &&
2577                       backref->found_dir_item &&
2578                       backref->found_inode_ref)) {
2579                         list_del(&backref->list);
2580                         free(backref);
2581                 } else {
2582                         rec->found_link++;
2583                 }
2584         }
2585
2586         /* Set nlink to 0 */
2587         key.objectid = rec->ino;
2588         key.type = BTRFS_INODE_ITEM_KEY;
2589         key.offset = 0;
2590         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2591         if (ret < 0)
2592                 goto out;
2593         if (ret > 0) {
2594                 ret = -ENOENT;
2595                 goto out;
2596         }
2597         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2598                                     struct btrfs_inode_item);
2599         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2600         btrfs_mark_buffer_dirty(path->nodes[0]);
2601         btrfs_release_path(path);
2602
2603         /*
2604          * Add back valid inode_ref/dir_item/dir_index,
2605          * add_link() will handle the nlink inc, so new nlink must be correct
2606          */
2607         list_for_each_entry(backref, &rec->backrefs, list) {
2608                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2609                                      backref->name, backref->namelen,
2610                                      backref->filetype, &backref->index, 1);
2611                 if (ret < 0)
2612                         goto out;
2613         }
2614 out:
2615         btrfs_release_path(path);
2616         return ret;
2617 }
2618
2619 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2620                                struct btrfs_root *root,
2621                                struct btrfs_path *path,
2622                                struct inode_record *rec)
2623 {
2624         char *dir_name = "lost+found";
2625         char namebuf[BTRFS_NAME_LEN] = {0};
2626         u64 lost_found_ino;
2627         u32 mode = 0700;
2628         u8 type = 0;
2629         int namelen = 0;
2630         int name_recovered = 0;
2631         int type_recovered = 0;
2632         int ret = 0;
2633
2634         /*
2635          * Get file name and type first before these invalid inode ref
2636          * are deleted by remove_all_invalid_backref()
2637          */
2638         name_recovered = !find_file_name(rec, namebuf, &namelen);
2639         type_recovered = !find_file_type(rec, &type);
2640
2641         if (!name_recovered) {
2642                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2643                        rec->ino, rec->ino);
2644                 namelen = count_digits(rec->ino);
2645                 sprintf(namebuf, "%llu", rec->ino);
2646                 name_recovered = 1;
2647         }
2648         if (!type_recovered) {
2649                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2650                        rec->ino);
2651                 type = BTRFS_FT_REG_FILE;
2652                 type_recovered = 1;
2653         }
2654
2655         ret = reset_nlink(trans, root, path, rec);
2656         if (ret < 0) {
2657                 fprintf(stderr,
2658                         "Failed to reset nlink for inode %llu: %s\n",
2659                         rec->ino, strerror(-ret));
2660                 goto out;
2661         }
2662
2663         if (rec->found_link == 0) {
2664                 lost_found_ino = root->highest_inode;
2665                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2666                         ret = -EOVERFLOW;
2667                         goto out;
2668                 }
2669                 lost_found_ino++;
2670                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2671                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2672                                   mode);
2673                 if (ret < 0) {
2674                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2675                                 dir_name, strerror(-ret));
2676                         goto out;
2677                 }
2678                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2679                                      namebuf, namelen, type, NULL, 1);
2680                 /*
2681                  * Add ".INO" suffix several times to handle case where
2682                  * "FILENAME.INO" is already taken by another file.
2683                  */
2684                 while (ret == -EEXIST) {
2685                         /*
2686                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2687                          */
2688                         if (namelen + count_digits(rec->ino) + 1 >
2689                             BTRFS_NAME_LEN) {
2690                                 ret = -EFBIG;
2691                                 goto out;
2692                         }
2693                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2694                                  ".%llu", rec->ino);
2695                         namelen += count_digits(rec->ino) + 1;
2696                         ret = btrfs_add_link(trans, root, rec->ino,
2697                                              lost_found_ino, namebuf,
2698                                              namelen, type, NULL, 1);
2699                 }
2700                 if (ret < 0) {
2701                         fprintf(stderr,
2702                                 "Failed to link the inode %llu to %s dir: %s\n",
2703                                 rec->ino, dir_name, strerror(-ret));
2704                         goto out;
2705                 }
2706                 /*
2707                  * Just increase the found_link, don't actually add the
2708                  * backref. This will make things easier and this inode
2709                  * record will be freed after the repair is done.
2710                  * So fsck will not report problem about this inode.
2711                  */
2712                 rec->found_link++;
2713                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2714                        namelen, namebuf, dir_name);
2715         }
2716         printf("Fixed the nlink of inode %llu\n", rec->ino);
2717 out:
2718         /*
2719          * Clear the flag anyway, or we will loop forever for the same inode
2720          * as it will not be removed from the bad inode list and the dead loop
2721          * happens.
2722          */
2723         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2724         btrfs_release_path(path);
2725         return ret;
2726 }
2727
2728 /*
2729  * Check if there is any normal(reg or prealloc) file extent for given
2730  * ino.
2731  * This is used to determine the file type when neither its dir_index/item or
2732  * inode_item exists.
2733  *
2734  * This will *NOT* report error, if any error happens, just consider it does
2735  * not have any normal file extent.
2736  */
2737 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2738 {
2739         struct btrfs_path *path;
2740         struct btrfs_key key;
2741         struct btrfs_key found_key;
2742         struct btrfs_file_extent_item *fi;
2743         u8 type;
2744         int ret = 0;
2745
2746         path = btrfs_alloc_path();
2747         if (!path)
2748                 goto out;
2749         key.objectid = ino;
2750         key.type = BTRFS_EXTENT_DATA_KEY;
2751         key.offset = 0;
2752
2753         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2754         if (ret < 0) {
2755                 ret = 0;
2756                 goto out;
2757         }
2758         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2759                 ret = btrfs_next_leaf(root, path);
2760                 if (ret) {
2761                         ret = 0;
2762                         goto out;
2763                 }
2764         }
2765         while (1) {
2766                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2767                                       path->slots[0]);
2768                 if (found_key.objectid != ino ||
2769                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2770                         break;
2771                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2772                                     struct btrfs_file_extent_item);
2773                 type = btrfs_file_extent_type(path->nodes[0], fi);
2774                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2775                         ret = 1;
2776                         goto out;
2777                 }
2778         }
2779 out:
2780         btrfs_free_path(path);
2781         return ret;
2782 }
2783
2784 static u32 btrfs_type_to_imode(u8 type)
2785 {
2786         static u32 imode_by_btrfs_type[] = {
2787                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2788                 [BTRFS_FT_DIR]          = S_IFDIR,
2789                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2790                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2791                 [BTRFS_FT_FIFO]         = S_IFIFO,
2792                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2793                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2794         };
2795
2796         return imode_by_btrfs_type[(type)];
2797 }
2798
2799 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2800                                 struct btrfs_root *root,
2801                                 struct btrfs_path *path,
2802                                 struct inode_record *rec)
2803 {
2804         u8 filetype;
2805         u32 mode = 0700;
2806         int type_recovered = 0;
2807         int ret = 0;
2808
2809         printf("Trying to rebuild inode:%llu\n", rec->ino);
2810
2811         type_recovered = !find_file_type(rec, &filetype);
2812
2813         /*
2814          * Try to determine inode type if type not found.
2815          *
2816          * For found regular file extent, it must be FILE.
2817          * For found dir_item/index, it must be DIR.
2818          *
2819          * For undetermined one, use FILE as fallback.
2820          *
2821          * TODO:
2822          * 1. If found backref(inode_index/item is already handled) to it,
2823          *    it must be DIR.
2824          *    Need new inode-inode ref structure to allow search for that.
2825          */
2826         if (!type_recovered) {
2827                 if (rec->found_file_extent &&
2828                     find_normal_file_extent(root, rec->ino)) {
2829                         type_recovered = 1;
2830                         filetype = BTRFS_FT_REG_FILE;
2831                 } else if (rec->found_dir_item) {
2832                         type_recovered = 1;
2833                         filetype = BTRFS_FT_DIR;
2834                 } else if (!list_empty(&rec->orphan_extents)) {
2835                         type_recovered = 1;
2836                         filetype = BTRFS_FT_REG_FILE;
2837                 } else{
2838                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2839                                rec->ino);
2840                         type_recovered = 1;
2841                         filetype = BTRFS_FT_REG_FILE;
2842                 }
2843         }
2844
2845         ret = btrfs_new_inode(trans, root, rec->ino,
2846                               mode | btrfs_type_to_imode(filetype));
2847         if (ret < 0)
2848                 goto out;
2849
2850         /*
2851          * Here inode rebuild is done, we only rebuild the inode item,
2852          * don't repair the nlink(like move to lost+found).
2853          * That is the job of nlink repair.
2854          *
2855          * We just fill the record and return
2856          */
2857         rec->found_dir_item = 1;
2858         rec->imode = mode | btrfs_type_to_imode(filetype);
2859         rec->nlink = 0;
2860         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2861         /* Ensure the inode_nlinks repair function will be called */
2862         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2863 out:
2864         return ret;
2865 }
2866
2867 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2868                                       struct btrfs_root *root,
2869                                       struct btrfs_path *path,
2870                                       struct inode_record *rec)
2871 {
2872         struct orphan_data_extent *orphan;
2873         struct orphan_data_extent *tmp;
2874         int ret = 0;
2875
2876         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2877                 /*
2878                  * Check for conflicting file extents
2879                  *
2880                  * Here we don't know whether the extents is compressed or not,
2881                  * so we can only assume it not compressed nor data offset,
2882                  * and use its disk_len as extent length.
2883                  */
2884                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2885                                        orphan->offset, orphan->disk_len, 0);
2886                 btrfs_release_path(path);
2887                 if (ret < 0)
2888                         goto out;
2889                 if (!ret) {
2890                         fprintf(stderr,
2891                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2892                                 orphan->disk_bytenr, orphan->disk_len);
2893                         ret = btrfs_free_extent(trans,
2894                                         root->fs_info->extent_root,
2895                                         orphan->disk_bytenr, orphan->disk_len,
2896                                         0, root->objectid, orphan->objectid,
2897                                         orphan->offset);
2898                         if (ret < 0)
2899                                 goto out;
2900                 }
2901                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2902                                 orphan->offset, orphan->disk_bytenr,
2903                                 orphan->disk_len, orphan->disk_len);
2904                 if (ret < 0)
2905                         goto out;
2906
2907                 /* Update file size info */
2908                 rec->found_size += orphan->disk_len;
2909                 if (rec->found_size == rec->nbytes)
2910                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2911
2912                 /* Update the file extent hole info too */
2913                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2914                                            orphan->disk_len);
2915                 if (ret < 0)
2916                         goto out;
2917                 if (RB_EMPTY_ROOT(&rec->holes))
2918                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2919
2920                 list_del(&orphan->list);
2921                 free(orphan);
2922         }
2923         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2924 out:
2925         return ret;
2926 }
2927
2928 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2929                                         struct btrfs_root *root,
2930                                         struct btrfs_path *path,
2931                                         struct inode_record *rec)
2932 {
2933         struct rb_node *node;
2934         struct file_extent_hole *hole;
2935         int found = 0;
2936         int ret = 0;
2937
2938         node = rb_first(&rec->holes);
2939
2940         while (node) {
2941                 found = 1;
2942                 hole = rb_entry(node, struct file_extent_hole, node);
2943                 ret = btrfs_punch_hole(trans, root, rec->ino,
2944                                        hole->start, hole->len);
2945                 if (ret < 0)
2946                         goto out;
2947                 ret = del_file_extent_hole(&rec->holes, hole->start,
2948                                            hole->len);
2949                 if (ret < 0)
2950                         goto out;
2951                 if (RB_EMPTY_ROOT(&rec->holes))
2952                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2953                 node = rb_first(&rec->holes);
2954         }
2955         /* special case for a file losing all its file extent */
2956         if (!found) {
2957                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2958                                        round_up(rec->isize, root->sectorsize));
2959                 if (ret < 0)
2960                         goto out;
2961         }
2962         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2963                rec->ino, root->objectid);
2964 out:
2965         return ret;
2966 }
2967
2968 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2969 {
2970         struct btrfs_trans_handle *trans;
2971         struct btrfs_path *path;
2972         int ret = 0;
2973
2974         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2975                              I_ERR_NO_ORPHAN_ITEM |
2976                              I_ERR_LINK_COUNT_WRONG |
2977                              I_ERR_NO_INODE_ITEM |
2978                              I_ERR_FILE_EXTENT_ORPHAN |
2979                              I_ERR_FILE_EXTENT_DISCOUNT|
2980                              I_ERR_FILE_NBYTES_WRONG)))
2981                 return rec->errors;
2982
2983         path = btrfs_alloc_path();
2984         if (!path)
2985                 return -ENOMEM;
2986
2987         /*
2988          * For nlink repair, it may create a dir and add link, so
2989          * 2 for parent(256)'s dir_index and dir_item
2990          * 2 for lost+found dir's inode_item and inode_ref
2991          * 1 for the new inode_ref of the file
2992          * 2 for lost+found dir's dir_index and dir_item for the file
2993          */
2994         trans = btrfs_start_transaction(root, 7);
2995         if (IS_ERR(trans)) {
2996                 btrfs_free_path(path);
2997                 return PTR_ERR(trans);
2998         }
2999
3000         if (rec->errors & I_ERR_NO_INODE_ITEM)
3001                 ret = repair_inode_no_item(trans, root, path, rec);
3002         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3003                 ret = repair_inode_orphan_extent(trans, root, path, rec);
3004         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3005                 ret = repair_inode_discount_extent(trans, root, path, rec);
3006         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3007                 ret = repair_inode_isize(trans, root, path, rec);
3008         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3009                 ret = repair_inode_orphan_item(trans, root, path, rec);
3010         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3011                 ret = repair_inode_nlinks(trans, root, path, rec);
3012         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3013                 ret = repair_inode_nbytes(trans, root, path, rec);
3014         btrfs_commit_transaction(trans, root);
3015         btrfs_free_path(path);
3016         return ret;
3017 }
3018
3019 static int check_inode_recs(struct btrfs_root *root,
3020                             struct cache_tree *inode_cache)
3021 {
3022         struct cache_extent *cache;
3023         struct ptr_node *node;
3024         struct inode_record *rec;
3025         struct inode_backref *backref;
3026         int stage = 0;
3027         int ret = 0;
3028         int err = 0;
3029         u64 error = 0;
3030         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3031
3032         if (btrfs_root_refs(&root->root_item) == 0) {
3033                 if (!cache_tree_empty(inode_cache))
3034                         fprintf(stderr, "warning line %d\n", __LINE__);
3035                 return 0;
3036         }
3037
3038         /*
3039          * We need to record the highest inode number for later 'lost+found'
3040          * dir creation.
3041          * We must select an ino not used/referred by any existing inode, or
3042          * 'lost+found' ino may be a missing ino in a corrupted leaf,
3043          * this may cause 'lost+found' dir has wrong nlinks.
3044          */
3045         cache = last_cache_extent(inode_cache);
3046         if (cache) {
3047                 node = container_of(cache, struct ptr_node, cache);
3048                 rec = node->data;
3049                 if (rec->ino > root->highest_inode)
3050                         root->highest_inode = rec->ino;
3051         }
3052
3053         /*
3054          * We need to repair backrefs first because we could change some of the
3055          * errors in the inode recs.
3056          *
3057          * We also need to go through and delete invalid backrefs first and then
3058          * add the correct ones second.  We do this because we may get EEXIST
3059          * when adding back the correct index because we hadn't yet deleted the
3060          * invalid index.
3061          *
3062          * For example, if we were missing a dir index then the directories
3063          * isize would be wrong, so if we fixed the isize to what we thought it
3064          * would be and then fixed the backref we'd still have a invalid fs, so
3065          * we need to add back the dir index and then check to see if the isize
3066          * is still wrong.
3067          */
3068         while (stage < 3) {
3069                 stage++;
3070                 if (stage == 3 && !err)
3071                         break;
3072
3073                 cache = search_cache_extent(inode_cache, 0);
3074                 while (repair && cache) {
3075                         node = container_of(cache, struct ptr_node, cache);
3076                         rec = node->data;
3077                         cache = next_cache_extent(cache);
3078
3079                         /* Need to free everything up and rescan */
3080                         if (stage == 3) {
3081                                 remove_cache_extent(inode_cache, &node->cache);
3082                                 free(node);
3083                                 free_inode_rec(rec);
3084                                 continue;
3085                         }
3086
3087                         if (list_empty(&rec->backrefs))
3088                                 continue;
3089
3090                         ret = repair_inode_backrefs(root, rec, inode_cache,
3091                                                     stage == 1);
3092                         if (ret < 0) {
3093                                 err = ret;
3094                                 stage = 2;
3095                                 break;
3096                         } if (ret > 0) {
3097                                 err = -EAGAIN;
3098                         }
3099                 }
3100         }
3101         if (err)
3102                 return err;
3103
3104         rec = get_inode_rec(inode_cache, root_dirid, 0);
3105         BUG_ON(IS_ERR(rec));
3106         if (rec) {
3107                 ret = check_root_dir(rec);
3108                 if (ret) {
3109                         fprintf(stderr, "root %llu root dir %llu error\n",
3110                                 (unsigned long long)root->root_key.objectid,
3111                                 (unsigned long long)root_dirid);
3112                         print_inode_error(root, rec);
3113                         error++;
3114                 }
3115         } else {
3116                 if (repair) {
3117                         struct btrfs_trans_handle *trans;
3118
3119                         trans = btrfs_start_transaction(root, 1);
3120                         if (IS_ERR(trans)) {
3121                                 err = PTR_ERR(trans);
3122                                 return err;
3123                         }
3124
3125                         fprintf(stderr,
3126                                 "root %llu missing its root dir, recreating\n",
3127                                 (unsigned long long)root->objectid);
3128
3129                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3130                         BUG_ON(ret);
3131
3132                         btrfs_commit_transaction(trans, root);
3133                         return -EAGAIN;
3134                 }
3135
3136                 fprintf(stderr, "root %llu root dir %llu not found\n",
3137                         (unsigned long long)root->root_key.objectid,
3138                         (unsigned long long)root_dirid);
3139         }
3140
3141         while (1) {
3142                 cache = search_cache_extent(inode_cache, 0);
3143                 if (!cache)
3144                         break;
3145                 node = container_of(cache, struct ptr_node, cache);
3146                 rec = node->data;
3147                 remove_cache_extent(inode_cache, &node->cache);
3148                 free(node);
3149                 if (rec->ino == root_dirid ||
3150                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3151                         free_inode_rec(rec);
3152                         continue;
3153                 }
3154
3155                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3156                         ret = check_orphan_item(root, rec->ino);
3157                         if (ret == 0)
3158                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3159                         if (can_free_inode_rec(rec)) {
3160                                 free_inode_rec(rec);
3161                                 continue;
3162                         }
3163                 }
3164
3165                 if (!rec->found_inode_item)
3166                         rec->errors |= I_ERR_NO_INODE_ITEM;
3167                 if (rec->found_link != rec->nlink)
3168                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3169                 if (repair) {
3170                         ret = try_repair_inode(root, rec);
3171                         if (ret == 0 && can_free_inode_rec(rec)) {
3172                                 free_inode_rec(rec);
3173                                 continue;
3174                         }
3175                         ret = 0;
3176                 }
3177
3178                 if (!(repair && ret == 0))
3179                         error++;
3180                 print_inode_error(root, rec);
3181                 list_for_each_entry(backref, &rec->backrefs, list) {
3182                         if (!backref->found_dir_item)
3183                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3184                         if (!backref->found_dir_index)
3185                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3186                         if (!backref->found_inode_ref)
3187                                 backref->errors |= REF_ERR_NO_INODE_REF;
3188                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3189                                 " namelen %u name %s filetype %d errors %x",
3190                                 (unsigned long long)backref->dir,
3191                                 (unsigned long long)backref->index,
3192                                 backref->namelen, backref->name,
3193                                 backref->filetype, backref->errors);
3194                         print_ref_error(backref->errors);
3195                 }
3196                 free_inode_rec(rec);
3197         }
3198         return (error > 0) ? -1 : 0;
3199 }
3200
3201 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3202                                         u64 objectid)
3203 {
3204         struct cache_extent *cache;
3205         struct root_record *rec = NULL;
3206         int ret;
3207
3208         cache = lookup_cache_extent(root_cache, objectid, 1);
3209         if (cache) {
3210                 rec = container_of(cache, struct root_record, cache);
3211         } else {
3212                 rec = calloc(1, sizeof(*rec));
3213                 if (!rec)
3214                         return ERR_PTR(-ENOMEM);
3215                 rec->objectid = objectid;
3216                 INIT_LIST_HEAD(&rec->backrefs);
3217                 rec->cache.start = objectid;
3218                 rec->cache.size = 1;
3219
3220                 ret = insert_cache_extent(root_cache, &rec->cache);
3221                 if (ret)
3222                         return ERR_PTR(-EEXIST);
3223         }
3224         return rec;
3225 }
3226
3227 static struct root_backref *get_root_backref(struct root_record *rec,
3228                                              u64 ref_root, u64 dir, u64 index,
3229                                              const char *name, int namelen)
3230 {
3231         struct root_backref *backref;
3232
3233         list_for_each_entry(backref, &rec->backrefs, list) {
3234                 if (backref->ref_root != ref_root || backref->dir != dir ||
3235                     backref->namelen != namelen)
3236                         continue;
3237                 if (memcmp(name, backref->name, namelen))
3238                         continue;
3239                 return backref;
3240         }
3241
3242         backref = calloc(1, sizeof(*backref) + namelen + 1);
3243         if (!backref)
3244                 return NULL;
3245         backref->ref_root = ref_root;
3246         backref->dir = dir;
3247         backref->index = index;
3248         backref->namelen = namelen;
3249         memcpy(backref->name, name, namelen);
3250         backref->name[namelen] = '\0';
3251         list_add_tail(&backref->list, &rec->backrefs);
3252         return backref;
3253 }
3254
3255 static void free_root_record(struct cache_extent *cache)
3256 {
3257         struct root_record *rec;
3258         struct root_backref *backref;
3259
3260         rec = container_of(cache, struct root_record, cache);
3261         while (!list_empty(&rec->backrefs)) {
3262                 backref = to_root_backref(rec->backrefs.next);
3263                 list_del(&backref->list);
3264                 free(backref);
3265         }
3266
3267         kfree(rec);
3268 }
3269
3270 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3271
3272 static int add_root_backref(struct cache_tree *root_cache,
3273                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3274                             const char *name, int namelen,
3275                             int item_type, int errors)
3276 {
3277         struct root_record *rec;
3278         struct root_backref *backref;
3279
3280         rec = get_root_rec(root_cache, root_id);
3281         BUG_ON(IS_ERR(rec));
3282         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3283         BUG_ON(!backref);
3284
3285         backref->errors |= errors;
3286
3287         if (item_type != BTRFS_DIR_ITEM_KEY) {
3288                 if (backref->found_dir_index || backref->found_back_ref ||
3289                     backref->found_forward_ref) {
3290                         if (backref->index != index)
3291                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3292                 } else {
3293                         backref->index = index;
3294                 }
3295         }
3296
3297         if (item_type == BTRFS_DIR_ITEM_KEY) {
3298                 if (backref->found_forward_ref)
3299                         rec->found_ref++;
3300                 backref->found_dir_item = 1;
3301         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3302                 backref->found_dir_index = 1;
3303         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3304                 if (backref->found_forward_ref)
3305                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3306                 else if (backref->found_dir_item)
3307                         rec->found_ref++;
3308                 backref->found_forward_ref = 1;
3309         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3310                 if (backref->found_back_ref)
3311                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3312                 backref->found_back_ref = 1;
3313         } else {
3314                 BUG_ON(1);
3315         }
3316
3317         if (backref->found_forward_ref && backref->found_dir_item)
3318                 backref->reachable = 1;
3319         return 0;
3320 }
3321
3322 static int merge_root_recs(struct btrfs_root *root,
3323                            struct cache_tree *src_cache,
3324                            struct cache_tree *dst_cache)
3325 {
3326         struct cache_extent *cache;
3327         struct ptr_node *node;
3328         struct inode_record *rec;
3329         struct inode_backref *backref;
3330         int ret = 0;
3331
3332         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3333                 free_inode_recs_tree(src_cache);
3334                 return 0;
3335         }
3336
3337         while (1) {
3338                 cache = search_cache_extent(src_cache, 0);
3339                 if (!cache)
3340                         break;
3341                 node = container_of(cache, struct ptr_node, cache);
3342                 rec = node->data;
3343                 remove_cache_extent(src_cache, &node->cache);
3344                 free(node);
3345
3346                 ret = is_child_root(root, root->objectid, rec->ino);
3347                 if (ret < 0)
3348                         break;
3349                 else if (ret == 0)
3350                         goto skip;
3351
3352                 list_for_each_entry(backref, &rec->backrefs, list) {
3353                         BUG_ON(backref->found_inode_ref);
3354                         if (backref->found_dir_item)
3355                                 add_root_backref(dst_cache, rec->ino,
3356                                         root->root_key.objectid, backref->dir,
3357                                         backref->index, backref->name,
3358                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3359                                         backref->errors);
3360                         if (backref->found_dir_index)
3361                                 add_root_backref(dst_cache, rec->ino,
3362                                         root->root_key.objectid, backref->dir,
3363                                         backref->index, backref->name,
3364                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3365                                         backref->errors);
3366                 }
3367 skip:
3368                 free_inode_rec(rec);
3369         }
3370         if (ret < 0)
3371                 return ret;
3372         return 0;
3373 }
3374
3375 static int check_root_refs(struct btrfs_root *root,
3376                            struct cache_tree *root_cache)
3377 {
3378         struct root_record *rec;
3379         struct root_record *ref_root;
3380         struct root_backref *backref;
3381         struct cache_extent *cache;
3382         int loop = 1;
3383         int ret;
3384         int error;
3385         int errors = 0;
3386
3387         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3388         BUG_ON(IS_ERR(rec));
3389         rec->found_ref = 1;
3390
3391         /* fixme: this can not detect circular references */
3392         while (loop) {
3393                 loop = 0;
3394                 cache = search_cache_extent(root_cache, 0);
3395                 while (1) {
3396                         if (!cache)
3397                                 break;
3398                         rec = container_of(cache, struct root_record, cache);
3399                         cache = next_cache_extent(cache);
3400
3401                         if (rec->found_ref == 0)
3402                                 continue;
3403
3404                         list_for_each_entry(backref, &rec->backrefs, list) {
3405                                 if (!backref->reachable)
3406                                         continue;
3407
3408                                 ref_root = get_root_rec(root_cache,
3409                                                         backref->ref_root);
3410                                 BUG_ON(IS_ERR(ref_root));
3411                                 if (ref_root->found_ref > 0)
3412                                         continue;
3413
3414                                 backref->reachable = 0;
3415                                 rec->found_ref--;
3416                                 if (rec->found_ref == 0)
3417                                         loop = 1;
3418                         }
3419                 }
3420         }
3421
3422         cache = search_cache_extent(root_cache, 0);
3423         while (1) {
3424                 if (!cache)
3425                         break;
3426                 rec = container_of(cache, struct root_record, cache);
3427                 cache = next_cache_extent(cache);
3428
3429                 if (rec->found_ref == 0 &&
3430                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3431                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3432                         ret = check_orphan_item(root->fs_info->tree_root,
3433                                                 rec->objectid);
3434                         if (ret == 0)
3435                                 continue;
3436
3437                         /*
3438                          * If we don't have a root item then we likely just have
3439                          * a dir item in a snapshot for this root but no actual
3440                          * ref key or anything so it's meaningless.
3441                          */
3442                         if (!rec->found_root_item)
3443                                 continue;
3444                         errors++;
3445                         fprintf(stderr, "fs tree %llu not referenced\n",
3446                                 (unsigned long long)rec->objectid);
3447                 }
3448
3449                 error = 0;
3450                 if (rec->found_ref > 0 && !rec->found_root_item)
3451                         error = 1;
3452                 list_for_each_entry(backref, &rec->backrefs, list) {
3453                         if (!backref->found_dir_item)
3454                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3455                         if (!backref->found_dir_index)
3456                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3457                         if (!backref->found_back_ref)
3458                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3459                         if (!backref->found_forward_ref)
3460                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3461                         if (backref->reachable && backref->errors)
3462                                 error = 1;
3463                 }
3464                 if (!error)
3465                         continue;
3466
3467                 errors++;
3468                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3469                         (unsigned long long)rec->objectid, rec->found_ref,
3470                          rec->found_root_item ? "" : "not found");
3471
3472                 list_for_each_entry(backref, &rec->backrefs, list) {
3473                         if (!backref->reachable)
3474                                 continue;
3475                         if (!backref->errors && rec->found_root_item)
3476                                 continue;
3477                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3478                                 " index %llu namelen %u name %s errors %x\n",
3479                                 (unsigned long long)backref->ref_root,
3480                                 (unsigned long long)backref->dir,
3481                                 (unsigned long long)backref->index,
3482                                 backref->namelen, backref->name,
3483                                 backref->errors);
3484                         print_ref_error(backref->errors);
3485                 }
3486         }
3487         return errors > 0 ? 1 : 0;
3488 }
3489
3490 static int process_root_ref(struct extent_buffer *eb, int slot,
3491                             struct btrfs_key *key,
3492                             struct cache_tree *root_cache)
3493 {
3494         u64 dirid;
3495         u64 index;
3496         u32 len;
3497         u32 name_len;
3498         struct btrfs_root_ref *ref;
3499         char namebuf[BTRFS_NAME_LEN];
3500         int error;
3501
3502         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3503
3504         dirid = btrfs_root_ref_dirid(eb, ref);
3505         index = btrfs_root_ref_sequence(eb, ref);
3506         name_len = btrfs_root_ref_name_len(eb, ref);
3507
3508         if (name_len <= BTRFS_NAME_LEN) {
3509                 len = name_len;
3510                 error = 0;
3511         } else {
3512                 len = BTRFS_NAME_LEN;
3513                 error = REF_ERR_NAME_TOO_LONG;
3514         }
3515         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3516
3517         if (key->type == BTRFS_ROOT_REF_KEY) {
3518                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3519                                  index, namebuf, len, key->type, error);
3520         } else {
3521                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3522                                  index, namebuf, len, key->type, error);
3523         }
3524         return 0;
3525 }
3526
3527 static void free_corrupt_block(struct cache_extent *cache)
3528 {
3529         struct btrfs_corrupt_block *corrupt;
3530
3531         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3532         free(corrupt);
3533 }
3534
3535 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3536
3537 /*
3538  * Repair the btree of the given root.
3539  *
3540  * The fix is to remove the node key in corrupt_blocks cache_tree.
3541  * and rebalance the tree.
3542  * After the fix, the btree should be writeable.
3543  */
3544 static int repair_btree(struct btrfs_root *root,
3545                         struct cache_tree *corrupt_blocks)
3546 {
3547         struct btrfs_trans_handle *trans;
3548         struct btrfs_path *path;
3549         struct btrfs_corrupt_block *corrupt;
3550         struct cache_extent *cache;
3551         struct btrfs_key key;
3552         u64 offset;
3553         int level;
3554         int ret = 0;
3555
3556         if (cache_tree_empty(corrupt_blocks))
3557                 return 0;
3558
3559         path = btrfs_alloc_path();
3560         if (!path)
3561                 return -ENOMEM;
3562
3563         trans = btrfs_start_transaction(root, 1);
3564         if (IS_ERR(trans)) {
3565                 ret = PTR_ERR(trans);
3566                 fprintf(stderr, "Error starting transaction: %s\n",
3567                         strerror(-ret));
3568                 goto out_free_path;
3569         }
3570         cache = first_cache_extent(corrupt_blocks);
3571         while (cache) {
3572                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3573                                        cache);
3574                 level = corrupt->level;
3575                 path->lowest_level = level;
3576                 key.objectid = corrupt->key.objectid;
3577                 key.type = corrupt->key.type;
3578                 key.offset = corrupt->key.offset;
3579
3580                 /*
3581                  * Here we don't want to do any tree balance, since it may
3582                  * cause a balance with corrupted brother leaf/node,
3583                  * so ins_len set to 0 here.
3584                  * Balance will be done after all corrupt node/leaf is deleted.
3585                  */
3586                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3587                 if (ret < 0)
3588                         goto out;
3589                 offset = btrfs_node_blockptr(path->nodes[level],
3590                                              path->slots[level]);
3591
3592                 /* Remove the ptr */
3593                 ret = btrfs_del_ptr(trans, root, path, level,
3594                                     path->slots[level]);
3595                 if (ret < 0)
3596                         goto out;
3597                 /*
3598                  * Remove the corresponding extent
3599                  * return value is not concerned.
3600                  */
3601                 btrfs_release_path(path);
3602                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3603                                         0, root->root_key.objectid,
3604                                         level - 1, 0);
3605                 cache = next_cache_extent(cache);
3606         }
3607
3608         /* Balance the btree using btrfs_search_slot() */
3609         cache = first_cache_extent(corrupt_blocks);
3610         while (cache) {
3611                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3612                                        cache);
3613                 memcpy(&key, &corrupt->key, sizeof(key));
3614                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3615                 if (ret < 0)
3616                         goto out;
3617                 /* return will always >0 since it won't find the item */
3618                 ret = 0;
3619                 btrfs_release_path(path);
3620                 cache = next_cache_extent(cache);
3621         }
3622 out:
3623         btrfs_commit_transaction(trans, root);
3624 out_free_path:
3625         btrfs_free_path(path);
3626         return ret;
3627 }
3628
3629 static int check_fs_root(struct btrfs_root *root,
3630                          struct cache_tree *root_cache,
3631                          struct walk_control *wc)
3632 {
3633         int ret = 0;
3634         int err = 0;
3635         int wret;
3636         int level;
3637         struct btrfs_path path;
3638         struct shared_node root_node;
3639         struct root_record *rec;
3640         struct btrfs_root_item *root_item = &root->root_item;
3641         struct cache_tree corrupt_blocks;
3642         struct orphan_data_extent *orphan;
3643         struct orphan_data_extent *tmp;
3644         enum btrfs_tree_block_status status;
3645         struct node_refs nrefs;
3646
3647         /*
3648          * Reuse the corrupt_block cache tree to record corrupted tree block
3649          *
3650          * Unlike the usage in extent tree check, here we do it in a per
3651          * fs/subvol tree base.
3652          */
3653         cache_tree_init(&corrupt_blocks);
3654         root->fs_info->corrupt_blocks = &corrupt_blocks;
3655
3656         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3657                 rec = get_root_rec(root_cache, root->root_key.objectid);
3658                 BUG_ON(IS_ERR(rec));
3659                 if (btrfs_root_refs(root_item) > 0)
3660                         rec->found_root_item = 1;
3661         }
3662
3663         btrfs_init_path(&path);
3664         memset(&root_node, 0, sizeof(root_node));
3665         cache_tree_init(&root_node.root_cache);
3666         cache_tree_init(&root_node.inode_cache);
3667         memset(&nrefs, 0, sizeof(nrefs));
3668
3669         /* Move the orphan extent record to corresponding inode_record */
3670         list_for_each_entry_safe(orphan, tmp,
3671                                  &root->orphan_data_extents, list) {
3672                 struct inode_record *inode;
3673
3674                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3675                                       1);
3676                 BUG_ON(IS_ERR(inode));
3677                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3678                 list_move(&orphan->list, &inode->orphan_extents);
3679         }
3680
3681         level = btrfs_header_level(root->node);
3682         memset(wc->nodes, 0, sizeof(wc->nodes));
3683         wc->nodes[level] = &root_node;
3684         wc->active_node = level;
3685         wc->root_level = level;
3686
3687         /* We may not have checked the root block, lets do that now */
3688         if (btrfs_is_leaf(root->node))
3689                 status = btrfs_check_leaf(root, NULL, root->node);
3690         else
3691                 status = btrfs_check_node(root, NULL, root->node);
3692         if (status != BTRFS_TREE_BLOCK_CLEAN)
3693                 return -EIO;
3694
3695         if (btrfs_root_refs(root_item) > 0 ||
3696             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3697                 path.nodes[level] = root->node;
3698                 extent_buffer_get(root->node);
3699                 path.slots[level] = 0;
3700         } else {
3701                 struct btrfs_key key;
3702                 struct btrfs_disk_key found_key;
3703
3704                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3705                 level = root_item->drop_level;
3706                 path.lowest_level = level;
3707                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3708                 if (wret < 0)
3709                         goto skip_walking;
3710                 btrfs_node_key(path.nodes[level], &found_key,
3711                                 path.slots[level]);
3712                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3713                                         sizeof(found_key)));
3714         }
3715
3716         while (1) {
3717                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3718                 if (wret < 0)
3719                         ret = wret;
3720                 if (wret != 0)
3721                         break;
3722
3723                 wret = walk_up_tree(root, &path, wc, &level);
3724                 if (wret < 0)
3725                         ret = wret;
3726                 if (wret != 0)
3727                         break;
3728         }
3729 skip_walking:
3730         btrfs_release_path(&path);
3731
3732         if (!cache_tree_empty(&corrupt_blocks)) {
3733                 struct cache_extent *cache;
3734                 struct btrfs_corrupt_block *corrupt;
3735
3736                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3737                        root->root_key.objectid);
3738                 cache = first_cache_extent(&corrupt_blocks);
3739                 while (cache) {
3740                         corrupt = container_of(cache,
3741                                                struct btrfs_corrupt_block,
3742                                                cache);
3743                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3744                                cache->start, corrupt->level,
3745                                corrupt->key.objectid, corrupt->key.type,
3746                                corrupt->key.offset);
3747                         cache = next_cache_extent(cache);
3748                 }
3749                 if (repair) {
3750                         printf("Try to repair the btree for root %llu\n",
3751                                root->root_key.objectid);
3752                         ret = repair_btree(root, &corrupt_blocks);
3753                         if (ret < 0)
3754                                 fprintf(stderr, "Failed to repair btree: %s\n",
3755                                         strerror(-ret));
3756                         if (!ret)
3757                                 printf("Btree for root %llu is fixed\n",
3758                                        root->root_key.objectid);
3759                 }
3760         }
3761
3762         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3763         if (err < 0)
3764                 ret = err;
3765
3766         if (root_node.current) {
3767                 root_node.current->checked = 1;
3768                 maybe_free_inode_rec(&root_node.inode_cache,
3769                                 root_node.current);
3770         }
3771
3772         err = check_inode_recs(root, &root_node.inode_cache);
3773         if (!ret)
3774                 ret = err;
3775
3776         free_corrupt_blocks_tree(&corrupt_blocks);
3777         root->fs_info->corrupt_blocks = NULL;
3778         free_orphan_data_extents(&root->orphan_data_extents);
3779         return ret;
3780 }
3781
3782 static int fs_root_objectid(u64 objectid)
3783 {
3784         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3785             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3786                 return 1;
3787         return is_fstree(objectid);
3788 }
3789
3790 static int check_fs_roots(struct btrfs_root *root,
3791                           struct cache_tree *root_cache)
3792 {
3793         struct btrfs_path path;
3794         struct btrfs_key key;
3795         struct walk_control wc;
3796         struct extent_buffer *leaf, *tree_node;
3797         struct btrfs_root *tmp_root;
3798         struct btrfs_root *tree_root = root->fs_info->tree_root;
3799         int ret;
3800         int err = 0;
3801
3802         if (ctx.progress_enabled) {
3803                 ctx.tp = TASK_FS_ROOTS;
3804                 task_start(ctx.info);
3805         }
3806
3807         /*
3808          * Just in case we made any changes to the extent tree that weren't
3809          * reflected into the free space cache yet.
3810          */
3811         if (repair)
3812                 reset_cached_block_groups(root->fs_info);
3813         memset(&wc, 0, sizeof(wc));
3814         cache_tree_init(&wc.shared);
3815         btrfs_init_path(&path);
3816
3817 again:
3818         key.offset = 0;
3819         key.objectid = 0;
3820         key.type = BTRFS_ROOT_ITEM_KEY;
3821         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3822         if (ret < 0) {
3823                 err = 1;
3824                 goto out;
3825         }
3826         tree_node = tree_root->node;
3827         while (1) {
3828                 if (tree_node != tree_root->node) {
3829                         free_root_recs_tree(root_cache);
3830                         btrfs_release_path(&path);
3831                         goto again;
3832                 }
3833                 leaf = path.nodes[0];
3834                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3835                         ret = btrfs_next_leaf(tree_root, &path);
3836                         if (ret) {
3837                                 if (ret < 0)
3838                                         err = 1;
3839                                 break;
3840                         }
3841                         leaf = path.nodes[0];
3842                 }
3843                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3844                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3845                     fs_root_objectid(key.objectid)) {
3846                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3847                                 tmp_root = btrfs_read_fs_root_no_cache(
3848                                                 root->fs_info, &key);
3849                         } else {
3850                                 key.offset = (u64)-1;
3851                                 tmp_root = btrfs_read_fs_root(
3852                                                 root->fs_info, &key);
3853                         }
3854                         if (IS_ERR(tmp_root)) {
3855                                 err = 1;
3856                                 goto next;
3857                         }
3858                         ret = check_fs_root(tmp_root, root_cache, &wc);
3859                         if (ret == -EAGAIN) {
3860                                 free_root_recs_tree(root_cache);
3861                                 btrfs_release_path(&path);
3862                                 goto again;
3863                         }
3864                         if (ret)
3865                                 err = 1;
3866                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3867                                 btrfs_free_fs_root(tmp_root);
3868                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3869                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3870                         process_root_ref(leaf, path.slots[0], &key,
3871                                          root_cache);
3872                 }
3873 next:
3874                 path.slots[0]++;
3875         }
3876 out:
3877         btrfs_release_path(&path);
3878         if (err)
3879                 free_extent_cache_tree(&wc.shared);
3880         if (!cache_tree_empty(&wc.shared))
3881                 fprintf(stderr, "warning line %d\n", __LINE__);
3882
3883         task_stop(ctx.info);
3884
3885         return err;
3886 }
3887
3888 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3889 {
3890         struct rb_node *n;
3891         struct extent_backref *back;
3892         struct tree_backref *tback;
3893         struct data_backref *dback;
3894         u64 found = 0;
3895         int err = 0;
3896
3897         for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
3898                 back = rb_node_to_extent_backref(n);
3899                 if (!back->found_extent_tree) {
3900                         err = 1;
3901                         if (!print_errs)
3902                                 goto out;
3903                         if (back->is_data) {
3904                                 dback = to_data_backref(back);
3905                                 fprintf(stderr, "Backref %llu %s %llu"
3906                                         " owner %llu offset %llu num_refs %lu"
3907                                         " not found in extent tree\n",
3908                                         (unsigned long long)rec->start,
3909                                         back->full_backref ?
3910                                         "parent" : "root",
3911                                         back->full_backref ?
3912                                         (unsigned long long)dback->parent:
3913                                         (unsigned long long)dback->root,
3914                                         (unsigned long long)dback->owner,
3915                                         (unsigned long long)dback->offset,
3916                                         (unsigned long)dback->num_refs);
3917                         } else {
3918                                 tback = to_tree_backref(back);
3919                                 fprintf(stderr, "Backref %llu parent %llu"
3920                                         " root %llu not found in extent tree\n",
3921                                         (unsigned long long)rec->start,
3922                                         (unsigned long long)tback->parent,
3923                                         (unsigned long long)tback->root);
3924                         }
3925                 }
3926                 if (!back->is_data && !back->found_ref) {
3927                         err = 1;
3928                         if (!print_errs)
3929                                 goto out;
3930                         tback = to_tree_backref(back);
3931                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3932                                 (unsigned long long)rec->start,
3933                                 back->full_backref ? "parent" : "root",
3934                                 back->full_backref ?
3935                                 (unsigned long long)tback->parent :
3936                                 (unsigned long long)tback->root, back);
3937                 }
3938                 if (back->is_data) {
3939                         dback = to_data_backref(back);
3940                         if (dback->found_ref != dback->num_refs) {
3941                                 err = 1;
3942                                 if (!print_errs)
3943                                         goto out;
3944                                 fprintf(stderr, "Incorrect local backref count"
3945                                         " on %llu %s %llu owner %llu"
3946                                         " offset %llu found %u wanted %u back %p\n",
3947                                         (unsigned long long)rec->start,
3948                                         back->full_backref ?
3949                                         "parent" : "root",
3950                                         back->full_backref ?
3951                                         (unsigned long long)dback->parent:
3952                                         (unsigned long long)dback->root,
3953                                         (unsigned long long)dback->owner,
3954                                         (unsigned long long)dback->offset,
3955                                         dback->found_ref, dback->num_refs, back);
3956                         }
3957                         if (dback->disk_bytenr != rec->start) {
3958                                 err = 1;
3959                                 if (!print_errs)
3960                                         goto out;
3961                                 fprintf(stderr, "Backref disk bytenr does not"
3962                                         " match extent record, bytenr=%llu, "
3963                                         "ref bytenr=%llu\n",
3964                                         (unsigned long long)rec->start,
3965                                         (unsigned long long)dback->disk_bytenr);
3966                         }
3967
3968                         if (dback->bytes != rec->nr) {
3969                                 err = 1;
3970                                 if (!print_errs)
3971                                         goto out;
3972                                 fprintf(stderr, "Backref bytes do not match "
3973                                         "extent backref, bytenr=%llu, ref "
3974                                         "bytes=%llu, backref bytes=%llu\n",
3975                                         (unsigned long long)rec->start,
3976                                         (unsigned long long)rec->nr,
3977                                         (unsigned long long)dback->bytes);
3978                         }
3979                 }
3980                 if (!back->is_data) {
3981                         found += 1;
3982                 } else {
3983                         dback = to_data_backref(back);
3984                         found += dback->found_ref;
3985                 }
3986         }
3987         if (found != rec->refs) {
3988                 err = 1;
3989                 if (!print_errs)
3990                         goto out;
3991                 fprintf(stderr, "Incorrect global backref count "
3992                         "on %llu found %llu wanted %llu\n",
3993                         (unsigned long long)rec->start,
3994                         (unsigned long long)found,
3995                         (unsigned long long)rec->refs);
3996         }
3997 out:
3998         return err;
3999 }
4000
4001 static void __free_one_backref(struct rb_node *node)
4002 {
4003         struct extent_backref *back = rb_node_to_extent_backref(node);
4004
4005         free(back);
4006 }
4007
4008 static void free_all_extent_backrefs(struct extent_record *rec)
4009 {
4010         rb_free_nodes(&rec->backref_tree, __free_one_backref);
4011 }
4012
4013 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4014                                      struct cache_tree *extent_cache)
4015 {
4016         struct cache_extent *cache;
4017         struct extent_record *rec;
4018
4019         while (1) {
4020                 cache = first_cache_extent(extent_cache);
4021                 if (!cache)
4022                         break;
4023                 rec = container_of(cache, struct extent_record, cache);
4024                 remove_cache_extent(extent_cache, cache);
4025                 free_all_extent_backrefs(rec);
4026                 free(rec);
4027         }
4028 }
4029
4030 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4031                                  struct extent_record *rec)
4032 {
4033         if (rec->content_checked && rec->owner_ref_checked &&
4034             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4035             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4036             !rec->bad_full_backref && !rec->crossing_stripes &&
4037             !rec->wrong_chunk_type) {
4038                 remove_cache_extent(extent_cache, &rec->cache);
4039                 free_all_extent_backrefs(rec);
4040                 list_del_init(&rec->list);
4041                 free(rec);
4042         }
4043         return 0;
4044 }
4045
4046 static int check_owner_ref(struct btrfs_root *root,
4047                             struct extent_record *rec,
4048                             struct extent_buffer *buf)
4049 {
4050         struct extent_backref *node, *tmp;
4051         struct tree_backref *back;
4052         struct btrfs_root *ref_root;
4053         struct btrfs_key key;
4054         struct btrfs_path path;
4055         struct extent_buffer *parent;
4056         int level;
4057         int found = 0;
4058         int ret;
4059
4060         rbtree_postorder_for_each_entry_safe(node, tmp,
4061                                              &rec->backref_tree, node) {
4062                 if (node->is_data)
4063                         continue;
4064                 if (!node->found_ref)
4065                         continue;
4066                 if (node->full_backref)
4067                         continue;
4068                 back = to_tree_backref(node);
4069                 if (btrfs_header_owner(buf) == back->root)
4070                         return 0;
4071         }
4072         BUG_ON(rec->is_root);
4073
4074         /* try to find the block by search corresponding fs tree */
4075         key.objectid = btrfs_header_owner(buf);
4076         key.type = BTRFS_ROOT_ITEM_KEY;
4077         key.offset = (u64)-1;
4078
4079         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4080         if (IS_ERR(ref_root))
4081                 return 1;
4082
4083         level = btrfs_header_level(buf);
4084         if (level == 0)
4085                 btrfs_item_key_to_cpu(buf, &key, 0);
4086         else
4087                 btrfs_node_key_to_cpu(buf, &key, 0);
4088
4089         btrfs_init_path(&path);
4090         path.lowest_level = level + 1;
4091         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4092         if (ret < 0)
4093                 return 0;
4094
4095         parent = path.nodes[level + 1];
4096         if (parent && buf->start == btrfs_node_blockptr(parent,
4097                                                         path.slots[level + 1]))
4098                 found = 1;
4099
4100         btrfs_release_path(&path);
4101         return found ? 0 : 1;
4102 }
4103
4104 static int is_extent_tree_record(struct extent_record *rec)
4105 {
4106         struct extent_backref *ref, *tmp;
4107         struct tree_backref *back;
4108         int is_extent = 0;
4109
4110         rbtree_postorder_for_each_entry_safe(ref, tmp,
4111                                              &rec->backref_tree, node) {
4112                 if (ref->is_data)
4113                         return 0;
4114                 back = to_tree_backref(ref);
4115                 if (ref->full_backref)
4116                         return 0;
4117                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4118                         is_extent = 1;
4119         }
4120         return is_extent;
4121 }
4122
4123
4124 static int record_bad_block_io(struct btrfs_fs_info *info,
4125                                struct cache_tree *extent_cache,
4126                                u64 start, u64 len)
4127 {
4128         struct extent_record *rec;
4129         struct cache_extent *cache;
4130         struct btrfs_key key;
4131
4132         cache = lookup_cache_extent(extent_cache, start, len);
4133         if (!cache)
4134                 return 0;
4135
4136         rec = container_of(cache, struct extent_record, cache);
4137         if (!is_extent_tree_record(rec))
4138                 return 0;
4139
4140         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4141         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4142 }
4143
4144 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4145                        struct extent_buffer *buf, int slot)
4146 {
4147         if (btrfs_header_level(buf)) {
4148                 struct btrfs_key_ptr ptr1, ptr2;
4149
4150                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4151                                    sizeof(struct btrfs_key_ptr));
4152                 read_extent_buffer(buf, &ptr2,
4153                                    btrfs_node_key_ptr_offset(slot + 1),
4154                                    sizeof(struct btrfs_key_ptr));
4155                 write_extent_buffer(buf, &ptr1,
4156                                     btrfs_node_key_ptr_offset(slot + 1),
4157                                     sizeof(struct btrfs_key_ptr));
4158                 write_extent_buffer(buf, &ptr2,
4159                                     btrfs_node_key_ptr_offset(slot),
4160                                     sizeof(struct btrfs_key_ptr));
4161                 if (slot == 0) {
4162                         struct btrfs_disk_key key;
4163                         btrfs_node_key(buf, &key, 0);
4164                         btrfs_fixup_low_keys(root, path, &key,
4165                                              btrfs_header_level(buf) + 1);
4166                 }
4167         } else {
4168                 struct btrfs_item *item1, *item2;
4169                 struct btrfs_key k1, k2;
4170                 char *item1_data, *item2_data;
4171                 u32 item1_offset, item2_offset, item1_size, item2_size;
4172
4173                 item1 = btrfs_item_nr(slot);
4174                 item2 = btrfs_item_nr(slot + 1);
4175                 btrfs_item_key_to_cpu(buf, &k1, slot);
4176                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4177                 item1_offset = btrfs_item_offset(buf, item1);
4178                 item2_offset = btrfs_item_offset(buf, item2);
4179                 item1_size = btrfs_item_size(buf, item1);
4180                 item2_size = btrfs_item_size(buf, item2);
4181
4182                 item1_data = malloc(item1_size);
4183                 if (!item1_data)
4184                         return -ENOMEM;
4185                 item2_data = malloc(item2_size);
4186                 if (!item2_data) {
4187                         free(item1_data);
4188                         return -ENOMEM;
4189                 }
4190
4191                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4192                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4193
4194                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4195                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4196                 free(item1_data);
4197                 free(item2_data);
4198
4199                 btrfs_set_item_offset(buf, item1, item2_offset);
4200                 btrfs_set_item_offset(buf, item2, item1_offset);
4201                 btrfs_set_item_size(buf, item1, item2_size);
4202                 btrfs_set_item_size(buf, item2, item1_size);
4203
4204                 path->slots[0] = slot;
4205                 btrfs_set_item_key_unsafe(root, path, &k2);
4206                 path->slots[0] = slot + 1;
4207                 btrfs_set_item_key_unsafe(root, path, &k1);
4208         }
4209         return 0;
4210 }
4211
4212 static int fix_key_order(struct btrfs_trans_handle *trans,
4213                          struct btrfs_root *root,
4214                          struct btrfs_path *path)
4215 {
4216         struct extent_buffer *buf;
4217         struct btrfs_key k1, k2;
4218         int i;
4219         int level = path->lowest_level;
4220         int ret = -EIO;
4221
4222         buf = path->nodes[level];
4223         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4224                 if (level) {
4225                         btrfs_node_key_to_cpu(buf, &k1, i);
4226                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4227                 } else {
4228                         btrfs_item_key_to_cpu(buf, &k1, i);
4229                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4230                 }
4231                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4232                         continue;
4233                 ret = swap_values(root, path, buf, i);
4234                 if (ret)
4235                         break;
4236                 btrfs_mark_buffer_dirty(buf);
4237                 i = 0;
4238         }
4239         return ret;
4240 }
4241
4242 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4243                              struct btrfs_root *root,
4244                              struct btrfs_path *path,
4245                              struct extent_buffer *buf, int slot)
4246 {
4247         struct btrfs_key key;
4248         int nritems = btrfs_header_nritems(buf);
4249
4250         btrfs_item_key_to_cpu(buf, &key, slot);
4251
4252         /* These are all the keys we can deal with missing. */
4253         if (key.type != BTRFS_DIR_INDEX_KEY &&
4254             key.type != BTRFS_EXTENT_ITEM_KEY &&
4255             key.type != BTRFS_METADATA_ITEM_KEY &&
4256             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4257             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4258                 return -1;
4259
4260         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4261                (unsigned long long)key.objectid, key.type,
4262                (unsigned long long)key.offset, slot, buf->start);
4263         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4264                               btrfs_item_nr_offset(slot + 1),
4265                               sizeof(struct btrfs_item) *
4266                               (nritems - slot - 1));
4267         btrfs_set_header_nritems(buf, nritems - 1);
4268         if (slot == 0) {
4269                 struct btrfs_disk_key disk_key;
4270
4271                 btrfs_item_key(buf, &disk_key, 0);
4272                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4273         }
4274         btrfs_mark_buffer_dirty(buf);
4275         return 0;
4276 }
4277
4278 static int fix_item_offset(struct btrfs_trans_handle *trans,
4279                            struct btrfs_root *root,
4280                            struct btrfs_path *path)
4281 {
4282         struct extent_buffer *buf;
4283         int i;
4284         int ret = 0;
4285
4286         /* We should only get this for leaves */
4287         BUG_ON(path->lowest_level);
4288         buf = path->nodes[0];
4289 again:
4290         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4291                 unsigned int shift = 0, offset;
4292
4293                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4294                     BTRFS_LEAF_DATA_SIZE(root)) {
4295                         if (btrfs_item_end_nr(buf, i) >
4296                             BTRFS_LEAF_DATA_SIZE(root)) {
4297                                 ret = delete_bogus_item(trans, root, path,
4298                                                         buf, i);
4299                                 if (!ret)
4300                                         goto again;
4301                                 fprintf(stderr, "item is off the end of the "
4302                                         "leaf, can't fix\n");
4303                                 ret = -EIO;
4304                                 break;
4305                         }
4306                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4307                                 btrfs_item_end_nr(buf, i);
4308                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4309                            btrfs_item_offset_nr(buf, i - 1)) {
4310                         if (btrfs_item_end_nr(buf, i) >
4311                             btrfs_item_offset_nr(buf, i - 1)) {
4312                                 ret = delete_bogus_item(trans, root, path,
4313                                                         buf, i);
4314                                 if (!ret)
4315                                         goto again;
4316                                 fprintf(stderr, "items overlap, can't fix\n");
4317                                 ret = -EIO;
4318                                 break;
4319                         }
4320                         shift = btrfs_item_offset_nr(buf, i - 1) -
4321                                 btrfs_item_end_nr(buf, i);
4322                 }
4323                 if (!shift)
4324                         continue;
4325
4326                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4327                        i, shift, (unsigned long long)buf->start);
4328                 offset = btrfs_item_offset_nr(buf, i);
4329                 memmove_extent_buffer(buf,
4330                                       btrfs_leaf_data(buf) + offset + shift,
4331                                       btrfs_leaf_data(buf) + offset,
4332                                       btrfs_item_size_nr(buf, i));
4333                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4334                                       offset + shift);
4335                 btrfs_mark_buffer_dirty(buf);
4336         }
4337
4338         /*
4339          * We may have moved things, in which case we want to exit so we don't
4340          * write those changes out.  Once we have proper abort functionality in
4341          * progs this can be changed to something nicer.
4342          */
4343         BUG_ON(ret);
4344         return ret;
4345 }
4346
4347 /*
4348  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4349  * then just return -EIO.
4350  */
4351 static int try_to_fix_bad_block(struct btrfs_root *root,
4352                                 struct extent_buffer *buf,
4353                                 enum btrfs_tree_block_status status)
4354 {
4355         struct btrfs_trans_handle *trans;
4356         struct ulist *roots;
4357         struct ulist_node *node;
4358         struct btrfs_root *search_root;
4359         struct btrfs_path *path;
4360         struct ulist_iterator iter;
4361         struct btrfs_key root_key, key;
4362         int ret;
4363
4364         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4365             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4366                 return -EIO;
4367
4368         path = btrfs_alloc_path();
4369         if (!path)
4370                 return -EIO;
4371
4372         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4373                                    0, &roots);
4374         if (ret) {
4375                 btrfs_free_path(path);
4376                 return -EIO;
4377         }
4378
4379         ULIST_ITER_INIT(&iter);
4380         while ((node = ulist_next(roots, &iter))) {
4381                 root_key.objectid = node->val;
4382                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4383                 root_key.offset = (u64)-1;
4384
4385                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4386                 if (IS_ERR(root)) {
4387                         ret = -EIO;
4388                         break;
4389                 }
4390
4391
4392                 trans = btrfs_start_transaction(search_root, 0);
4393                 if (IS_ERR(trans)) {
4394                         ret = PTR_ERR(trans);
4395                         break;
4396                 }
4397
4398                 path->lowest_level = btrfs_header_level(buf);
4399                 path->skip_check_block = 1;
4400                 if (path->lowest_level)
4401                         btrfs_node_key_to_cpu(buf, &key, 0);
4402                 else
4403                         btrfs_item_key_to_cpu(buf, &key, 0);
4404                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4405                 if (ret) {
4406                         ret = -EIO;
4407                         btrfs_commit_transaction(trans, search_root);
4408                         break;
4409                 }
4410                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4411                         ret = fix_key_order(trans, search_root, path);
4412                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4413                         ret = fix_item_offset(trans, search_root, path);
4414                 if (ret) {
4415                         btrfs_commit_transaction(trans, search_root);
4416                         break;
4417                 }
4418                 btrfs_release_path(path);
4419                 btrfs_commit_transaction(trans, search_root);
4420         }
4421         ulist_free(roots);
4422         btrfs_free_path(path);
4423         return ret;
4424 }
4425
4426 static int check_block(struct btrfs_root *root,
4427                        struct cache_tree *extent_cache,
4428                        struct extent_buffer *buf, u64 flags)
4429 {
4430         struct extent_record *rec;
4431         struct cache_extent *cache;
4432         struct btrfs_key key;
4433         enum btrfs_tree_block_status status;
4434         int ret = 0;
4435         int level;
4436
4437         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4438         if (!cache)
4439                 return 1;
4440         rec = container_of(cache, struct extent_record, cache);
4441         rec->generation = btrfs_header_generation(buf);
4442
4443         level = btrfs_header_level(buf);
4444         if (btrfs_header_nritems(buf) > 0) {
4445
4446                 if (level == 0)
4447                         btrfs_item_key_to_cpu(buf, &key, 0);
4448                 else
4449                         btrfs_node_key_to_cpu(buf, &key, 0);
4450
4451                 rec->info_objectid = key.objectid;
4452         }
4453         rec->info_level = level;
4454
4455         if (btrfs_is_leaf(buf))
4456                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4457         else
4458                 status = btrfs_check_node(root, &rec->parent_key, buf);
4459
4460         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4461                 if (repair)
4462                         status = try_to_fix_bad_block(root, buf, status);
4463                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4464                         ret = -EIO;
4465                         fprintf(stderr, "bad block %llu\n",
4466                                 (unsigned long long)buf->start);
4467                 } else {
4468                         /*
4469                          * Signal to callers we need to start the scan over
4470                          * again since we'll have cowed blocks.
4471                          */
4472                         ret = -EAGAIN;
4473                 }
4474         } else {
4475                 rec->content_checked = 1;
4476                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4477                         rec->owner_ref_checked = 1;
4478                 else {
4479                         ret = check_owner_ref(root, rec, buf);
4480                         if (!ret)
4481                                 rec->owner_ref_checked = 1;
4482                 }
4483         }
4484         if (!ret)
4485                 maybe_free_extent_rec(extent_cache, rec);
4486         return ret;
4487 }
4488
4489
4490 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4491                                                 u64 parent, u64 root)
4492 {
4493         struct rb_node *node;
4494         struct tree_backref *back = NULL;
4495         struct tree_backref match = {
4496                 .node = {
4497                         .is_data = 0,
4498                 },
4499         };
4500
4501         if (parent) {
4502                 match.parent = parent;
4503                 match.node.full_backref = 1;
4504         } else {
4505                 match.root = root;
4506         }
4507
4508         node = rb_search(&rec->backref_tree, &match.node.node,
4509                          (rb_compare_keys)compare_extent_backref, NULL);
4510         if (node)
4511                 back = to_tree_backref(rb_node_to_extent_backref(node));
4512
4513         return back;
4514 }
4515
4516 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4517                                                 u64 parent, u64 root)
4518 {
4519         struct tree_backref *ref = malloc(sizeof(*ref));
4520
4521         if (!ref)
4522                 return NULL;
4523         memset(&ref->node, 0, sizeof(ref->node));
4524         if (parent > 0) {
4525                 ref->parent = parent;
4526                 ref->node.full_backref = 1;
4527         } else {
4528                 ref->root = root;
4529                 ref->node.full_backref = 0;
4530         }
4531         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4532
4533         return ref;
4534 }
4535
4536 static struct data_backref *find_data_backref(struct extent_record *rec,
4537                                                 u64 parent, u64 root,
4538                                                 u64 owner, u64 offset,
4539                                                 int found_ref,
4540                                                 u64 disk_bytenr, u64 bytes)
4541 {
4542         struct rb_node *node;
4543         struct data_backref *back = NULL;
4544         struct data_backref match = {
4545                 .node = {
4546                         .is_data = 1,
4547                 },
4548                 .owner = owner,
4549                 .offset = offset,
4550                 .bytes = bytes,
4551                 .found_ref = found_ref,
4552                 .disk_bytenr = disk_bytenr,
4553         };
4554
4555         if (parent) {
4556                 match.parent = parent;
4557                 match.node.full_backref = 1;
4558         } else {
4559                 match.root = root;
4560         }
4561
4562         node = rb_search(&rec->backref_tree, &match.node.node,
4563                          (rb_compare_keys)compare_extent_backref, NULL);
4564         if (node)
4565                 back = to_data_backref(rb_node_to_extent_backref(node));
4566
4567         return back;
4568 }
4569
4570 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4571                                                 u64 parent, u64 root,
4572                                                 u64 owner, u64 offset,
4573                                                 u64 max_size)
4574 {
4575         struct data_backref *ref = malloc(sizeof(*ref));
4576
4577         if (!ref)
4578                 return NULL;
4579         memset(&ref->node, 0, sizeof(ref->node));
4580         ref->node.is_data = 1;
4581
4582         if (parent > 0) {
4583                 ref->parent = parent;
4584                 ref->owner = 0;
4585                 ref->offset = 0;
4586                 ref->node.full_backref = 1;
4587         } else {
4588                 ref->root = root;
4589                 ref->owner = owner;
4590                 ref->offset = offset;
4591                 ref->node.full_backref = 0;
4592         }
4593         ref->bytes = max_size;
4594         ref->found_ref = 0;
4595         ref->num_refs = 0;
4596         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4597         if (max_size > rec->max_size)
4598                 rec->max_size = max_size;
4599         return ref;
4600 }
4601
4602 /* Check if the type of extent matches with its chunk */
4603 static void check_extent_type(struct extent_record *rec)
4604 {
4605         struct btrfs_block_group_cache *bg_cache;
4606
4607         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4608         if (!bg_cache)
4609                 return;
4610
4611         /* data extent, check chunk directly*/
4612         if (!rec->metadata) {
4613                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4614                         rec->wrong_chunk_type = 1;
4615                 return;
4616         }
4617
4618         /* metadata extent, check the obvious case first */
4619         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4620                                  BTRFS_BLOCK_GROUP_METADATA))) {
4621                 rec->wrong_chunk_type = 1;
4622                 return;
4623         }
4624
4625         /*
4626          * Check SYSTEM extent, as it's also marked as metadata, we can only
4627          * make sure it's a SYSTEM extent by its backref
4628          */
4629         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4630                 struct extent_backref *node;
4631                 struct tree_backref *tback;
4632                 u64 bg_type;
4633
4634                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4635                 if (node->is_data) {
4636                         /* tree block shouldn't have data backref */
4637                         rec->wrong_chunk_type = 1;
4638                         return;
4639                 }
4640                 tback = container_of(node, struct tree_backref, node);
4641
4642                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4643                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4644                 else
4645                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4646                 if (!(bg_cache->flags & bg_type))
4647                         rec->wrong_chunk_type = 1;
4648         }
4649 }
4650
4651 /*
4652  * Allocate a new extent record, fill default values from @tmpl and insert int
4653  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4654  * the cache, otherwise it fails.
4655  */
4656 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4657                 struct extent_record *tmpl)
4658 {
4659         struct extent_record *rec;
4660         int ret = 0;
4661
4662         rec = malloc(sizeof(*rec));
4663         if (!rec)
4664                 return -ENOMEM;
4665         rec->start = tmpl->start;
4666         rec->max_size = tmpl->max_size;
4667         rec->nr = max(tmpl->nr, tmpl->max_size);
4668         rec->found_rec = tmpl->found_rec;
4669         rec->content_checked = tmpl->content_checked;
4670         rec->owner_ref_checked = tmpl->owner_ref_checked;
4671         rec->num_duplicates = 0;
4672         rec->metadata = tmpl->metadata;
4673         rec->flag_block_full_backref = FLAG_UNSET;
4674         rec->bad_full_backref = 0;
4675         rec->crossing_stripes = 0;
4676         rec->wrong_chunk_type = 0;
4677         rec->is_root = tmpl->is_root;
4678         rec->refs = tmpl->refs;
4679         rec->extent_item_refs = tmpl->extent_item_refs;
4680         rec->parent_generation = tmpl->parent_generation;
4681         INIT_LIST_HEAD(&rec->backrefs);
4682         INIT_LIST_HEAD(&rec->dups);
4683         INIT_LIST_HEAD(&rec->list);
4684         rec->backref_tree = RB_ROOT;
4685         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4686         rec->cache.start = tmpl->start;
4687         rec->cache.size = tmpl->nr;
4688         ret = insert_cache_extent(extent_cache, &rec->cache);
4689         BUG_ON(ret);
4690         bytes_used += rec->nr;
4691
4692         if (tmpl->metadata)
4693                 rec->crossing_stripes = check_crossing_stripes(rec->start,
4694                                 global_info->tree_root->nodesize);
4695         check_extent_type(rec);
4696         return ret;
4697 }
4698
4699 /*
4700  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4701  * some are hints:
4702  * - refs              - if found, increase refs
4703  * - is_root           - if found, set
4704  * - content_checked   - if found, set
4705  * - owner_ref_checked - if found, set
4706  *
4707  * If not found, create a new one, initialize and insert.
4708  */
4709 static int add_extent_rec(struct cache_tree *extent_cache,
4710                 struct extent_record *tmpl)
4711 {
4712         struct extent_record *rec;
4713         struct cache_extent *cache;
4714         int ret = 0;
4715         int dup = 0;
4716
4717         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4718         if (cache) {
4719                 rec = container_of(cache, struct extent_record, cache);
4720                 if (tmpl->refs)
4721                         rec->refs++;
4722                 if (rec->nr == 1)
4723                         rec->nr = max(tmpl->nr, tmpl->max_size);
4724
4725                 /*
4726                  * We need to make sure to reset nr to whatever the extent
4727                  * record says was the real size, this way we can compare it to
4728                  * the backrefs.
4729                  */
4730                 if (tmpl->found_rec) {
4731                         if (tmpl->start != rec->start || rec->found_rec) {
4732                                 struct extent_record *tmp;
4733
4734                                 dup = 1;
4735                                 if (list_empty(&rec->list))
4736                                         list_add_tail(&rec->list,
4737                                                       &duplicate_extents);
4738
4739                                 /*
4740                                  * We have to do this song and dance in case we
4741                                  * find an extent record that falls inside of
4742                                  * our current extent record but does not have
4743                                  * the same objectid.
4744                                  */
4745                                 tmp = malloc(sizeof(*tmp));
4746                                 if (!tmp)
4747                                         return -ENOMEM;
4748                                 tmp->start = tmpl->start;
4749                                 tmp->max_size = tmpl->max_size;
4750                                 tmp->nr = tmpl->nr;
4751                                 tmp->found_rec = 1;
4752                                 tmp->metadata = tmpl->metadata;
4753                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4754                                 INIT_LIST_HEAD(&tmp->list);
4755                                 list_add_tail(&tmp->list, &rec->dups);
4756                                 rec->num_duplicates++;
4757                         } else {
4758                                 rec->nr = tmpl->nr;
4759                                 rec->found_rec = 1;
4760                         }
4761                 }
4762
4763                 if (tmpl->extent_item_refs && !dup) {
4764                         if (rec->extent_item_refs) {
4765                                 fprintf(stderr, "block %llu rec "
4766                                         "extent_item_refs %llu, passed %llu\n",
4767                                         (unsigned long long)tmpl->start,
4768                                         (unsigned long long)
4769                                                         rec->extent_item_refs,
4770                                         (unsigned long long)tmpl->extent_item_refs);
4771                         }
4772                         rec->extent_item_refs = tmpl->extent_item_refs;
4773                 }
4774                 if (tmpl->is_root)
4775                         rec->is_root = 1;
4776                 if (tmpl->content_checked)
4777                         rec->content_checked = 1;
4778                 if (tmpl->owner_ref_checked)
4779                         rec->owner_ref_checked = 1;
4780                 memcpy(&rec->parent_key, &tmpl->parent_key,
4781                                 sizeof(tmpl->parent_key));
4782                 if (tmpl->parent_generation)
4783                         rec->parent_generation = tmpl->parent_generation;
4784                 if (rec->max_size < tmpl->max_size)
4785                         rec->max_size = tmpl->max_size;
4786
4787                 /*
4788                  * A metadata extent can't cross stripe_len boundary, otherwise
4789                  * kernel scrub won't be able to handle it.
4790                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4791                  * it.
4792                  */
4793                 if (tmpl->metadata)
4794                         rec->crossing_stripes = check_crossing_stripes(
4795                                 rec->start, global_info->tree_root->nodesize);
4796                 check_extent_type(rec);
4797                 maybe_free_extent_rec(extent_cache, rec);
4798                 return ret;
4799         }
4800
4801         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4802
4803         return ret;
4804 }
4805
4806 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4807                             u64 parent, u64 root, int found_ref)
4808 {
4809         struct extent_record *rec;
4810         struct tree_backref *back;
4811         struct cache_extent *cache;
4812
4813         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4814         if (!cache) {
4815                 struct extent_record tmpl;
4816
4817                 memset(&tmpl, 0, sizeof(tmpl));
4818                 tmpl.start = bytenr;
4819                 tmpl.nr = 1;
4820                 tmpl.metadata = 1;
4821
4822                 add_extent_rec_nolookup(extent_cache, &tmpl);
4823
4824                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4825                 if (!cache)
4826                         abort();
4827         }
4828
4829         rec = container_of(cache, struct extent_record, cache);
4830         if (rec->start != bytenr) {
4831                 abort();
4832         }
4833
4834         back = find_tree_backref(rec, parent, root);
4835         if (!back) {
4836                 back = alloc_tree_backref(rec, parent, root);
4837                 BUG_ON(!back);
4838         }
4839
4840         if (found_ref) {
4841                 if (back->node.found_ref) {
4842                         fprintf(stderr, "Extent back ref already exists "
4843                                 "for %llu parent %llu root %llu \n",
4844                                 (unsigned long long)bytenr,
4845                                 (unsigned long long)parent,
4846                                 (unsigned long long)root);
4847                 }
4848                 back->node.found_ref = 1;
4849         } else {
4850                 if (back->node.found_extent_tree) {
4851                         fprintf(stderr, "Extent back ref already exists "
4852                                 "for %llu parent %llu root %llu \n",
4853                                 (unsigned long long)bytenr,
4854                                 (unsigned long long)parent,
4855                                 (unsigned long long)root);
4856                 }
4857                 back->node.found_extent_tree = 1;
4858         }
4859         check_extent_type(rec);
4860         maybe_free_extent_rec(extent_cache, rec);
4861         return 0;
4862 }
4863
4864 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4865                             u64 parent, u64 root, u64 owner, u64 offset,
4866                             u32 num_refs, int found_ref, u64 max_size)
4867 {
4868         struct extent_record *rec;
4869         struct data_backref *back;
4870         struct cache_extent *cache;
4871
4872         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4873         if (!cache) {
4874                 struct extent_record tmpl;
4875
4876                 memset(&tmpl, 0, sizeof(tmpl));
4877                 tmpl.start = bytenr;
4878                 tmpl.nr = 1;
4879                 tmpl.max_size = max_size;
4880
4881                 add_extent_rec_nolookup(extent_cache, &tmpl);
4882
4883                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4884                 if (!cache)
4885                         abort();
4886         }
4887
4888         rec = container_of(cache, struct extent_record, cache);
4889         if (rec->max_size < max_size)
4890                 rec->max_size = max_size;
4891
4892         /*
4893          * If found_ref is set then max_size is the real size and must match the
4894          * existing refs.  So if we have already found a ref then we need to
4895          * make sure that this ref matches the existing one, otherwise we need
4896          * to add a new backref so we can notice that the backrefs don't match
4897          * and we need to figure out who is telling the truth.  This is to
4898          * account for that awful fsync bug I introduced where we'd end up with
4899          * a btrfs_file_extent_item that would have its length include multiple
4900          * prealloc extents or point inside of a prealloc extent.
4901          */
4902         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4903                                  bytenr, max_size);
4904         if (!back) {
4905                 back = alloc_data_backref(rec, parent, root, owner, offset,
4906                                           max_size);
4907                 BUG_ON(!back);
4908         }
4909
4910         if (found_ref) {
4911                 BUG_ON(num_refs != 1);
4912                 if (back->node.found_ref)
4913                         BUG_ON(back->bytes != max_size);
4914                 back->node.found_ref = 1;
4915                 back->found_ref += 1;
4916                 back->bytes = max_size;
4917                 back->disk_bytenr = bytenr;
4918                 rec->refs += 1;
4919                 rec->content_checked = 1;
4920                 rec->owner_ref_checked = 1;
4921         } else {
4922                 if (back->node.found_extent_tree) {
4923                         fprintf(stderr, "Extent back ref already exists "
4924                                 "for %llu parent %llu root %llu "
4925                                 "owner %llu offset %llu num_refs %lu\n",
4926                                 (unsigned long long)bytenr,
4927                                 (unsigned long long)parent,
4928                                 (unsigned long long)root,
4929                                 (unsigned long long)owner,
4930                                 (unsigned long long)offset,
4931                                 (unsigned long)num_refs);
4932                 }
4933                 back->num_refs = num_refs;
4934                 back->node.found_extent_tree = 1;
4935         }
4936         maybe_free_extent_rec(extent_cache, rec);
4937         return 0;
4938 }
4939
4940 static int add_pending(struct cache_tree *pending,
4941                        struct cache_tree *seen, u64 bytenr, u32 size)
4942 {
4943         int ret;
4944         ret = add_cache_extent(seen, bytenr, size);
4945         if (ret)
4946                 return ret;
4947         add_cache_extent(pending, bytenr, size);
4948         return 0;
4949 }
4950
4951 static int pick_next_pending(struct cache_tree *pending,
4952                         struct cache_tree *reada,
4953                         struct cache_tree *nodes,
4954                         u64 last, struct block_info *bits, int bits_nr,
4955                         int *reada_bits)
4956 {
4957         unsigned long node_start = last;
4958         struct cache_extent *cache;
4959         int ret;
4960
4961         cache = search_cache_extent(reada, 0);
4962         if (cache) {
4963                 bits[0].start = cache->start;
4964                 bits[0].size = cache->size;
4965                 *reada_bits = 1;
4966                 return 1;
4967         }
4968         *reada_bits = 0;
4969         if (node_start > 32768)
4970                 node_start -= 32768;
4971
4972         cache = search_cache_extent(nodes, node_start);
4973         if (!cache)
4974                 cache = search_cache_extent(nodes, 0);
4975
4976         if (!cache) {
4977                  cache = search_cache_extent(pending, 0);
4978                  if (!cache)
4979                          return 0;
4980                  ret = 0;
4981                  do {
4982                          bits[ret].start = cache->start;
4983                          bits[ret].size = cache->size;
4984                          cache = next_cache_extent(cache);
4985                          ret++;
4986                  } while (cache && ret < bits_nr);
4987                  return ret;
4988         }
4989
4990         ret = 0;
4991         do {
4992                 bits[ret].start = cache->start;
4993                 bits[ret].size = cache->size;
4994                 cache = next_cache_extent(cache);
4995                 ret++;
4996         } while (cache && ret < bits_nr);
4997
4998         if (bits_nr - ret > 8) {
4999                 u64 lookup = bits[0].start + bits[0].size;
5000                 struct cache_extent *next;
5001                 next = search_cache_extent(pending, lookup);
5002                 while(next) {
5003                         if (next->start - lookup > 32768)
5004                                 break;
5005                         bits[ret].start = next->start;
5006                         bits[ret].size = next->size;
5007                         lookup = next->start + next->size;
5008                         ret++;
5009                         if (ret == bits_nr)
5010                                 break;
5011                         next = next_cache_extent(next);
5012                         if (!next)
5013                                 break;
5014                 }
5015         }
5016         return ret;
5017 }
5018
5019 static void free_chunk_record(struct cache_extent *cache)
5020 {
5021         struct chunk_record *rec;
5022
5023         rec = container_of(cache, struct chunk_record, cache);
5024         list_del_init(&rec->list);
5025         list_del_init(&rec->dextents);
5026         free(rec);
5027 }
5028
5029 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5030 {
5031         cache_tree_free_extents(chunk_cache, free_chunk_record);
5032 }
5033
5034 static void free_device_record(struct rb_node *node)
5035 {
5036         struct device_record *rec;
5037
5038         rec = container_of(node, struct device_record, node);
5039         free(rec);
5040 }
5041
5042 FREE_RB_BASED_TREE(device_cache, free_device_record);
5043
5044 int insert_block_group_record(struct block_group_tree *tree,
5045                               struct block_group_record *bg_rec)
5046 {
5047         int ret;
5048
5049         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5050         if (ret)
5051                 return ret;
5052
5053         list_add_tail(&bg_rec->list, &tree->block_groups);
5054         return 0;
5055 }
5056
5057 static void free_block_group_record(struct cache_extent *cache)
5058 {
5059         struct block_group_record *rec;
5060
5061         rec = container_of(cache, struct block_group_record, cache);
5062         list_del_init(&rec->list);
5063         free(rec);
5064 }
5065
5066 void free_block_group_tree(struct block_group_tree *tree)
5067 {
5068         cache_tree_free_extents(&tree->tree, free_block_group_record);
5069 }
5070
5071 int insert_device_extent_record(struct device_extent_tree *tree,
5072                                 struct device_extent_record *de_rec)
5073 {
5074         int ret;
5075
5076         /*
5077          * Device extent is a bit different from the other extents, because
5078          * the extents which belong to the different devices may have the
5079          * same start and size, so we need use the special extent cache
5080          * search/insert functions.
5081          */
5082         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5083         if (ret)
5084                 return ret;
5085
5086         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5087         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5088         return 0;
5089 }
5090
5091 static void free_device_extent_record(struct cache_extent *cache)
5092 {
5093         struct device_extent_record *rec;
5094
5095         rec = container_of(cache, struct device_extent_record, cache);
5096         if (!list_empty(&rec->chunk_list))
5097                 list_del_init(&rec->chunk_list);
5098         if (!list_empty(&rec->device_list))
5099                 list_del_init(&rec->device_list);
5100         free(rec);
5101 }
5102
5103 void free_device_extent_tree(struct device_extent_tree *tree)
5104 {
5105         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5106 }
5107
5108 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5109 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5110                                  struct extent_buffer *leaf, int slot)
5111 {
5112         struct btrfs_extent_ref_v0 *ref0;
5113         struct btrfs_key key;
5114
5115         btrfs_item_key_to_cpu(leaf, &key, slot);
5116         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5117         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5118                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5119         } else {
5120                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5121                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5122         }
5123         return 0;
5124 }
5125 #endif
5126
5127 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5128                                             struct btrfs_key *key,
5129                                             int slot)
5130 {
5131         struct btrfs_chunk *ptr;
5132         struct chunk_record *rec;
5133         int num_stripes, i;
5134
5135         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5136         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5137
5138         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5139         if (!rec) {
5140                 fprintf(stderr, "memory allocation failed\n");
5141                 exit(-1);
5142         }
5143
5144         INIT_LIST_HEAD(&rec->list);
5145         INIT_LIST_HEAD(&rec->dextents);
5146         rec->bg_rec = NULL;
5147
5148         rec->cache.start = key->offset;
5149         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5150
5151         rec->generation = btrfs_header_generation(leaf);
5152
5153         rec->objectid = key->objectid;
5154         rec->type = key->type;
5155         rec->offset = key->offset;
5156
5157         rec->length = rec->cache.size;
5158         rec->owner = btrfs_chunk_owner(leaf, ptr);
5159         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5160         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5161         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5162         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5163         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5164         rec->num_stripes = num_stripes;
5165         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5166
5167         for (i = 0; i < rec->num_stripes; ++i) {
5168                 rec->stripes[i].devid =
5169                         btrfs_stripe_devid_nr(leaf, ptr, i);
5170                 rec->stripes[i].offset =
5171                         btrfs_stripe_offset_nr(leaf, ptr, i);
5172                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5173                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5174                                 BTRFS_UUID_SIZE);
5175         }
5176
5177         return rec;
5178 }
5179
5180 static int process_chunk_item(struct cache_tree *chunk_cache,
5181                               struct btrfs_key *key, struct extent_buffer *eb,
5182                               int slot)
5183 {
5184         struct chunk_record *rec;
5185         int ret = 0;
5186
5187         rec = btrfs_new_chunk_record(eb, key, slot);
5188         ret = insert_cache_extent(chunk_cache, &rec->cache);
5189         if (ret) {
5190                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5191                         rec->offset, rec->length);
5192                 free(rec);
5193         }
5194
5195         return ret;
5196 }
5197
5198 static int process_device_item(struct rb_root *dev_cache,
5199                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5200 {
5201         struct btrfs_dev_item *ptr;
5202         struct device_record *rec;
5203         int ret = 0;
5204
5205         ptr = btrfs_item_ptr(eb,
5206                 slot, struct btrfs_dev_item);
5207
5208         rec = malloc(sizeof(*rec));
5209         if (!rec) {
5210                 fprintf(stderr, "memory allocation failed\n");
5211                 return -ENOMEM;
5212         }
5213
5214         rec->devid = key->offset;
5215         rec->generation = btrfs_header_generation(eb);
5216
5217         rec->objectid = key->objectid;
5218         rec->type = key->type;
5219         rec->offset = key->offset;
5220
5221         rec->devid = btrfs_device_id(eb, ptr);
5222         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5223         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5224
5225         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5226         if (ret) {
5227                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5228                 free(rec);
5229         }
5230
5231         return ret;
5232 }
5233
5234 struct block_group_record *
5235 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5236                              int slot)
5237 {
5238         struct btrfs_block_group_item *ptr;
5239         struct block_group_record *rec;
5240
5241         rec = calloc(1, sizeof(*rec));
5242         if (!rec) {
5243                 fprintf(stderr, "memory allocation failed\n");
5244                 exit(-1);
5245         }
5246
5247         rec->cache.start = key->objectid;
5248         rec->cache.size = key->offset;
5249
5250         rec->generation = btrfs_header_generation(leaf);
5251
5252         rec->objectid = key->objectid;
5253         rec->type = key->type;
5254         rec->offset = key->offset;
5255
5256         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5257         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5258
5259         INIT_LIST_HEAD(&rec->list);
5260
5261         return rec;
5262 }
5263
5264 static int process_block_group_item(struct block_group_tree *block_group_cache,
5265                                     struct btrfs_key *key,
5266                                     struct extent_buffer *eb, int slot)
5267 {
5268         struct block_group_record *rec;
5269         int ret = 0;
5270
5271         rec = btrfs_new_block_group_record(eb, key, slot);
5272         ret = insert_block_group_record(block_group_cache, rec);
5273         if (ret) {
5274                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5275                         rec->objectid, rec->offset);
5276                 free(rec);
5277         }
5278
5279         return ret;
5280 }
5281
5282 struct device_extent_record *
5283 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5284                                struct btrfs_key *key, int slot)
5285 {
5286         struct device_extent_record *rec;
5287         struct btrfs_dev_extent *ptr;
5288
5289         rec = calloc(1, sizeof(*rec));
5290         if (!rec) {
5291                 fprintf(stderr, "memory allocation failed\n");
5292                 exit(-1);
5293         }
5294
5295         rec->cache.objectid = key->objectid;
5296         rec->cache.start = key->offset;
5297
5298         rec->generation = btrfs_header_generation(leaf);
5299
5300         rec->objectid = key->objectid;
5301         rec->type = key->type;
5302         rec->offset = key->offset;
5303
5304         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5305         rec->chunk_objecteid =
5306                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5307         rec->chunk_offset =
5308                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5309         rec->length = btrfs_dev_extent_length(leaf, ptr);
5310         rec->cache.size = rec->length;
5311
5312         INIT_LIST_HEAD(&rec->chunk_list);
5313         INIT_LIST_HEAD(&rec->device_list);
5314
5315         return rec;
5316 }
5317
5318 static int
5319 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5320                            struct btrfs_key *key, struct extent_buffer *eb,
5321                            int slot)
5322 {
5323         struct device_extent_record *rec;
5324         int ret;
5325
5326         rec = btrfs_new_device_extent_record(eb, key, slot);
5327         ret = insert_device_extent_record(dev_extent_cache, rec);
5328         if (ret) {
5329                 fprintf(stderr,
5330                         "Device extent[%llu, %llu, %llu] existed.\n",
5331                         rec->objectid, rec->offset, rec->length);
5332                 free(rec);
5333         }
5334
5335         return ret;
5336 }
5337
5338 static int process_extent_item(struct btrfs_root *root,
5339                                struct cache_tree *extent_cache,
5340                                struct extent_buffer *eb, int slot)
5341 {
5342         struct btrfs_extent_item *ei;
5343         struct btrfs_extent_inline_ref *iref;
5344         struct btrfs_extent_data_ref *dref;
5345         struct btrfs_shared_data_ref *sref;
5346         struct btrfs_key key;
5347         struct extent_record tmpl;
5348         unsigned long end;
5349         unsigned long ptr;
5350         int type;
5351         u32 item_size = btrfs_item_size_nr(eb, slot);
5352         u64 refs = 0;
5353         u64 offset;
5354         u64 num_bytes;
5355         int metadata = 0;
5356
5357         btrfs_item_key_to_cpu(eb, &key, slot);
5358
5359         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5360                 metadata = 1;
5361                 num_bytes = root->nodesize;
5362         } else {
5363                 num_bytes = key.offset;
5364         }
5365
5366         if (item_size < sizeof(*ei)) {
5367 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5368                 struct btrfs_extent_item_v0 *ei0;
5369                 BUG_ON(item_size != sizeof(*ei0));
5370                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5371                 refs = btrfs_extent_refs_v0(eb, ei0);
5372 #else
5373                 BUG();
5374 #endif
5375                 memset(&tmpl, 0, sizeof(tmpl));
5376                 tmpl.start = key.objectid;
5377                 tmpl.nr = num_bytes;
5378                 tmpl.extent_item_refs = refs;
5379                 tmpl.metadata = metadata;
5380                 tmpl.found_rec = 1;
5381                 tmpl.max_size = num_bytes;
5382
5383                 return add_extent_rec(extent_cache, &tmpl);
5384         }
5385
5386         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5387         refs = btrfs_extent_refs(eb, ei);
5388         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5389                 metadata = 1;
5390         else
5391                 metadata = 0;
5392
5393         memset(&tmpl, 0, sizeof(tmpl));
5394         tmpl.start = key.objectid;
5395         tmpl.nr = num_bytes;
5396         tmpl.extent_item_refs = refs;
5397         tmpl.metadata = metadata;
5398         tmpl.found_rec = 1;
5399         tmpl.max_size = num_bytes;
5400         add_extent_rec(extent_cache, &tmpl);
5401
5402         ptr = (unsigned long)(ei + 1);
5403         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5404             key.type == BTRFS_EXTENT_ITEM_KEY)
5405                 ptr += sizeof(struct btrfs_tree_block_info);
5406
5407         end = (unsigned long)ei + item_size;
5408         while (ptr < end) {
5409                 iref = (struct btrfs_extent_inline_ref *)ptr;
5410                 type = btrfs_extent_inline_ref_type(eb, iref);
5411                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5412                 switch (type) {
5413                 case BTRFS_TREE_BLOCK_REF_KEY:
5414                         add_tree_backref(extent_cache, key.objectid,
5415                                          0, offset, 0);
5416                         break;
5417                 case BTRFS_SHARED_BLOCK_REF_KEY:
5418                         add_tree_backref(extent_cache, key.objectid,
5419                                          offset, 0, 0);
5420                         break;
5421                 case BTRFS_EXTENT_DATA_REF_KEY:
5422                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5423                         add_data_backref(extent_cache, key.objectid, 0,
5424                                         btrfs_extent_data_ref_root(eb, dref),
5425                                         btrfs_extent_data_ref_objectid(eb,
5426                                                                        dref),
5427                                         btrfs_extent_data_ref_offset(eb, dref),
5428                                         btrfs_extent_data_ref_count(eb, dref),
5429                                         0, num_bytes);
5430                         break;
5431                 case BTRFS_SHARED_DATA_REF_KEY:
5432                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5433                         add_data_backref(extent_cache, key.objectid, offset,
5434                                         0, 0, 0,
5435                                         btrfs_shared_data_ref_count(eb, sref),
5436                                         0, num_bytes);
5437                         break;
5438                 default:
5439                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5440                                 key.objectid, key.type, num_bytes);
5441                         goto out;
5442                 }
5443                 ptr += btrfs_extent_inline_ref_size(type);
5444         }
5445         WARN_ON(ptr > end);
5446 out:
5447         return 0;
5448 }
5449
5450 static int check_cache_range(struct btrfs_root *root,
5451                              struct btrfs_block_group_cache *cache,
5452                              u64 offset, u64 bytes)
5453 {
5454         struct btrfs_free_space *entry;
5455         u64 *logical;
5456         u64 bytenr;
5457         int stripe_len;
5458         int i, nr, ret;
5459
5460         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5461                 bytenr = btrfs_sb_offset(i);
5462                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5463                                        cache->key.objectid, bytenr, 0,
5464                                        &logical, &nr, &stripe_len);
5465                 if (ret)
5466                         return ret;
5467
5468                 while (nr--) {
5469                         if (logical[nr] + stripe_len <= offset)
5470                                 continue;
5471                         if (offset + bytes <= logical[nr])
5472                                 continue;
5473                         if (logical[nr] == offset) {
5474                                 if (stripe_len >= bytes) {
5475                                         kfree(logical);
5476                                         return 0;
5477                                 }
5478                                 bytes -= stripe_len;
5479                                 offset += stripe_len;
5480                         } else if (logical[nr] < offset) {
5481                                 if (logical[nr] + stripe_len >=
5482                                     offset + bytes) {
5483                                         kfree(logical);
5484                                         return 0;
5485                                 }
5486                                 bytes = (offset + bytes) -
5487                                         (logical[nr] + stripe_len);
5488                                 offset = logical[nr] + stripe_len;
5489                         } else {
5490                                 /*
5491                                  * Could be tricky, the super may land in the
5492                                  * middle of the area we're checking.  First
5493                                  * check the easiest case, it's at the end.
5494                                  */
5495                                 if (logical[nr] + stripe_len >=
5496                                     bytes + offset) {
5497                                         bytes = logical[nr] - offset;
5498                                         continue;
5499                                 }
5500
5501                                 /* Check the left side */
5502                                 ret = check_cache_range(root, cache,
5503                                                         offset,
5504                                                         logical[nr] - offset);
5505                                 if (ret) {
5506                                         kfree(logical);
5507                                         return ret;
5508                                 }
5509
5510                                 /* Now we continue with the right side */
5511                                 bytes = (offset + bytes) -
5512                                         (logical[nr] + stripe_len);
5513                                 offset = logical[nr] + stripe_len;
5514                         }
5515                 }
5516
5517                 kfree(logical);
5518         }
5519
5520         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5521         if (!entry) {
5522                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5523                         offset, offset+bytes);
5524                 return -EINVAL;
5525         }
5526
5527         if (entry->offset != offset) {
5528                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5529                         entry->offset);
5530                 return -EINVAL;
5531         }
5532
5533         if (entry->bytes != bytes) {
5534                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5535                         bytes, entry->bytes, offset);
5536                 return -EINVAL;
5537         }
5538
5539         unlink_free_space(cache->free_space_ctl, entry);
5540         free(entry);
5541         return 0;
5542 }
5543
5544 static int verify_space_cache(struct btrfs_root *root,
5545                               struct btrfs_block_group_cache *cache)
5546 {
5547         struct btrfs_path *path;
5548         struct extent_buffer *leaf;
5549         struct btrfs_key key;
5550         u64 last;
5551         int ret = 0;
5552
5553         path = btrfs_alloc_path();
5554         if (!path)
5555                 return -ENOMEM;
5556
5557         root = root->fs_info->extent_root;
5558
5559         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5560
5561         key.objectid = last;
5562         key.offset = 0;
5563         key.type = BTRFS_EXTENT_ITEM_KEY;
5564
5565         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5566         if (ret < 0)
5567                 goto out;
5568         ret = 0;
5569         while (1) {
5570                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5571                         ret = btrfs_next_leaf(root, path);
5572                         if (ret < 0)
5573                                 goto out;
5574                         if (ret > 0) {
5575                                 ret = 0;
5576                                 break;
5577                         }
5578                 }
5579                 leaf = path->nodes[0];
5580                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5581                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5582                         break;
5583                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5584                     key.type != BTRFS_METADATA_ITEM_KEY) {
5585                         path->slots[0]++;
5586                         continue;
5587                 }
5588
5589                 if (last == key.objectid) {
5590                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5591                                 last = key.objectid + key.offset;
5592                         else
5593                                 last = key.objectid + root->nodesize;
5594                         path->slots[0]++;
5595                         continue;
5596                 }
5597
5598                 ret = check_cache_range(root, cache, last,
5599                                         key.objectid - last);
5600                 if (ret)
5601                         break;
5602                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5603                         last = key.objectid + key.offset;
5604                 else
5605                         last = key.objectid + root->nodesize;
5606                 path->slots[0]++;
5607         }
5608
5609         if (last < cache->key.objectid + cache->key.offset)
5610                 ret = check_cache_range(root, cache, last,
5611                                         cache->key.objectid +
5612                                         cache->key.offset - last);
5613
5614 out:
5615         btrfs_free_path(path);
5616
5617         if (!ret &&
5618             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5619                 fprintf(stderr, "There are still entries left in the space "
5620                         "cache\n");
5621                 ret = -EINVAL;
5622         }
5623
5624         return ret;
5625 }
5626
5627 static int check_space_cache(struct btrfs_root *root)
5628 {
5629         struct btrfs_block_group_cache *cache;
5630         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5631         int ret;
5632         int error = 0;
5633
5634         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5635             btrfs_super_generation(root->fs_info->super_copy) !=
5636             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5637                 printf("cache and super generation don't match, space cache "
5638                        "will be invalidated\n");
5639                 return 0;
5640         }
5641
5642         if (ctx.progress_enabled) {
5643                 ctx.tp = TASK_FREE_SPACE;
5644                 task_start(ctx.info);
5645         }
5646
5647         while (1) {
5648                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5649                 if (!cache)
5650                         break;
5651
5652                 start = cache->key.objectid + cache->key.offset;
5653                 if (!cache->free_space_ctl) {
5654                         if (btrfs_init_free_space_ctl(cache,
5655                                                       root->sectorsize)) {
5656                                 ret = -ENOMEM;
5657                                 break;
5658                         }
5659                 } else {
5660                         btrfs_remove_free_space_cache(cache);
5661                 }
5662
5663                 if (btrfs_fs_compat_ro(root->fs_info,
5664                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5665                         ret = exclude_super_stripes(root, cache);
5666                         if (ret) {
5667                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5668                                         strerror(-ret));
5669                                 error++;
5670                                 continue;
5671                         }
5672                         ret = load_free_space_tree(root->fs_info, cache);
5673                         free_excluded_extents(root, cache);
5674                         if (ret < 0) {
5675                                 fprintf(stderr, "could not load free space tree: %s\n",
5676                                         strerror(-ret));
5677                                 error++;
5678                                 continue;
5679                         }
5680                         error += ret;
5681                 } else {
5682                         ret = load_free_space_cache(root->fs_info, cache);
5683                         if (!ret)
5684                                 continue;
5685                 }
5686
5687                 ret = verify_space_cache(root, cache);
5688                 if (ret) {
5689                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5690                                 cache->key.objectid);
5691                         error++;
5692                 }
5693         }
5694
5695         task_stop(ctx.info);
5696
5697         return error ? -EINVAL : 0;
5698 }
5699
5700 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5701                         u64 num_bytes, unsigned long leaf_offset,
5702                         struct extent_buffer *eb) {
5703
5704         u64 offset = 0;
5705         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5706         char *data;
5707         unsigned long csum_offset;
5708         u32 csum;
5709         u32 csum_expected;
5710         u64 read_len;
5711         u64 data_checked = 0;
5712         u64 tmp;
5713         int ret = 0;
5714         int mirror;
5715         int num_copies;
5716
5717         if (num_bytes % root->sectorsize)
5718                 return -EINVAL;
5719
5720         data = malloc(num_bytes);
5721         if (!data)
5722                 return -ENOMEM;
5723
5724         while (offset < num_bytes) {
5725                 mirror = 0;
5726 again:
5727                 read_len = num_bytes - offset;
5728                 /* read as much space once a time */
5729                 ret = read_extent_data(root, data + offset,
5730                                 bytenr + offset, &read_len, mirror);
5731                 if (ret)
5732                         goto out;
5733                 data_checked = 0;
5734                 /* verify every 4k data's checksum */
5735                 while (data_checked < read_len) {
5736                         csum = ~(u32)0;
5737                         tmp = offset + data_checked;
5738
5739                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5740                                                csum, root->sectorsize);
5741                         btrfs_csum_final(csum, (char *)&csum);
5742
5743                         csum_offset = leaf_offset +
5744                                  tmp / root->sectorsize * csum_size;
5745                         read_extent_buffer(eb, (char *)&csum_expected,
5746                                            csum_offset, csum_size);
5747                         /* try another mirror */
5748                         if (csum != csum_expected) {
5749                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5750                                                 mirror, bytenr + tmp,
5751                                                 csum, csum_expected);
5752                                 num_copies = btrfs_num_copies(
5753                                                 &root->fs_info->mapping_tree,
5754                                                 bytenr, num_bytes);
5755                                 if (mirror < num_copies - 1) {
5756                                         mirror += 1;
5757                                         goto again;
5758                                 }
5759                         }
5760                         data_checked += root->sectorsize;
5761                 }
5762                 offset += read_len;
5763         }
5764 out:
5765         free(data);
5766         return ret;
5767 }
5768
5769 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5770                                u64 num_bytes)
5771 {
5772         struct btrfs_path *path;
5773         struct extent_buffer *leaf;
5774         struct btrfs_key key;
5775         int ret;
5776
5777         path = btrfs_alloc_path();
5778         if (!path) {
5779                 fprintf(stderr, "Error allocating path\n");
5780                 return -ENOMEM;
5781         }
5782
5783         key.objectid = bytenr;
5784         key.type = BTRFS_EXTENT_ITEM_KEY;
5785         key.offset = (u64)-1;
5786
5787 again:
5788         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5789                                 0, 0);
5790         if (ret < 0) {
5791                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5792                 btrfs_free_path(path);
5793                 return ret;
5794         } else if (ret) {
5795                 if (path->slots[0] > 0) {
5796                         path->slots[0]--;
5797                 } else {
5798                         ret = btrfs_prev_leaf(root, path);
5799                         if (ret < 0) {
5800                                 goto out;
5801                         } else if (ret > 0) {
5802                                 ret = 0;
5803                                 goto out;
5804                         }
5805                 }
5806         }
5807
5808         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5809
5810         /*
5811          * Block group items come before extent items if they have the same
5812          * bytenr, so walk back one more just in case.  Dear future traveller,
5813          * first congrats on mastering time travel.  Now if it's not too much
5814          * trouble could you go back to 2006 and tell Chris to make the
5815          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5816          * EXTENT_ITEM_KEY please?
5817          */
5818         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5819                 if (path->slots[0] > 0) {
5820                         path->slots[0]--;
5821                 } else {
5822                         ret = btrfs_prev_leaf(root, path);
5823                         if (ret < 0) {
5824                                 goto out;
5825                         } else if (ret > 0) {
5826                                 ret = 0;
5827                                 goto out;
5828                         }
5829                 }
5830                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5831         }
5832
5833         while (num_bytes) {
5834                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5835                         ret = btrfs_next_leaf(root, path);
5836                         if (ret < 0) {
5837                                 fprintf(stderr, "Error going to next leaf "
5838                                         "%d\n", ret);
5839                                 btrfs_free_path(path);
5840                                 return ret;
5841                         } else if (ret) {
5842                                 break;
5843                         }
5844                 }
5845                 leaf = path->nodes[0];
5846                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5847                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5848                         path->slots[0]++;
5849                         continue;
5850                 }
5851                 if (key.objectid + key.offset < bytenr) {
5852                         path->slots[0]++;
5853                         continue;
5854                 }
5855                 if (key.objectid > bytenr + num_bytes)
5856                         break;
5857
5858                 if (key.objectid == bytenr) {
5859                         if (key.offset >= num_bytes) {
5860                                 num_bytes = 0;
5861                                 break;
5862                         }
5863                         num_bytes -= key.offset;
5864                         bytenr += key.offset;
5865                 } else if (key.objectid < bytenr) {
5866                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5867                                 num_bytes = 0;
5868                                 break;
5869                         }
5870                         num_bytes = (bytenr + num_bytes) -
5871                                 (key.objectid + key.offset);
5872                         bytenr = key.objectid + key.offset;
5873                 } else {
5874                         if (key.objectid + key.offset < bytenr + num_bytes) {
5875                                 u64 new_start = key.objectid + key.offset;
5876                                 u64 new_bytes = bytenr + num_bytes - new_start;
5877
5878                                 /*
5879                                  * Weird case, the extent is in the middle of
5880                                  * our range, we'll have to search one side
5881                                  * and then the other.  Not sure if this happens
5882                                  * in real life, but no harm in coding it up
5883                                  * anyway just in case.
5884                                  */
5885                                 btrfs_release_path(path);
5886                                 ret = check_extent_exists(root, new_start,
5887                                                           new_bytes);
5888                                 if (ret) {
5889                                         fprintf(stderr, "Right section didn't "
5890                                                 "have a record\n");
5891                                         break;
5892                                 }
5893                                 num_bytes = key.objectid - bytenr;
5894                                 goto again;
5895                         }
5896                         num_bytes = key.objectid - bytenr;
5897                 }
5898                 path->slots[0]++;
5899         }
5900         ret = 0;
5901
5902 out:
5903         if (num_bytes && !ret) {
5904                 fprintf(stderr, "There are no extents for csum range "
5905                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5906                 ret = 1;
5907         }
5908
5909         btrfs_free_path(path);
5910         return ret;
5911 }
5912
5913 static int check_csums(struct btrfs_root *root)
5914 {
5915         struct btrfs_path *path;
5916         struct extent_buffer *leaf;
5917         struct btrfs_key key;
5918         u64 offset = 0, num_bytes = 0;
5919         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5920         int errors = 0;
5921         int ret;
5922         u64 data_len;
5923         unsigned long leaf_offset;
5924
5925         root = root->fs_info->csum_root;
5926         if (!extent_buffer_uptodate(root->node)) {
5927                 fprintf(stderr, "No valid csum tree found\n");
5928                 return -ENOENT;
5929         }
5930
5931         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5932         key.type = BTRFS_EXTENT_CSUM_KEY;
5933         key.offset = 0;
5934
5935         path = btrfs_alloc_path();
5936         if (!path)
5937                 return -ENOMEM;
5938
5939         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5940         if (ret < 0) {
5941                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5942                 btrfs_free_path(path);
5943                 return ret;
5944         }
5945
5946         if (ret > 0 && path->slots[0])
5947                 path->slots[0]--;
5948         ret = 0;
5949
5950         while (1) {
5951                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5952                         ret = btrfs_next_leaf(root, path);
5953                         if (ret < 0) {
5954                                 fprintf(stderr, "Error going to next leaf "
5955                                         "%d\n", ret);
5956                                 break;
5957                         }
5958                         if (ret)
5959                                 break;
5960                 }
5961                 leaf = path->nodes[0];
5962
5963                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5964                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5965                         path->slots[0]++;
5966                         continue;
5967                 }
5968
5969                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5970                               csum_size) * root->sectorsize;
5971                 if (!check_data_csum)
5972                         goto skip_csum_check;
5973                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5974                 ret = check_extent_csums(root, key.offset, data_len,
5975                                          leaf_offset, leaf);
5976                 if (ret)
5977                         break;
5978 skip_csum_check:
5979                 if (!num_bytes) {
5980                         offset = key.offset;
5981                 } else if (key.offset != offset + num_bytes) {
5982                         ret = check_extent_exists(root, offset, num_bytes);
5983                         if (ret) {
5984                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5985                                         "there is no extent record\n",
5986                                         offset, offset+num_bytes);
5987                                 errors++;
5988                         }
5989                         offset = key.offset;
5990                         num_bytes = 0;
5991                 }
5992                 num_bytes += data_len;
5993                 path->slots[0]++;
5994         }
5995
5996         btrfs_free_path(path);
5997         return errors;
5998 }
5999
6000 static int is_dropped_key(struct btrfs_key *key,
6001                           struct btrfs_key *drop_key) {
6002         if (key->objectid < drop_key->objectid)
6003                 return 1;
6004         else if (key->objectid == drop_key->objectid) {
6005                 if (key->type < drop_key->type)
6006                         return 1;
6007                 else if (key->type == drop_key->type) {
6008                         if (key->offset < drop_key->offset)
6009                                 return 1;
6010                 }
6011         }
6012         return 0;
6013 }
6014
6015 /*
6016  * Here are the rules for FULL_BACKREF.
6017  *
6018  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6019  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6020  *      FULL_BACKREF set.
6021  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6022  *    if it happened after the relocation occurred since we'll have dropped the
6023  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6024  *    have no real way to know for sure.
6025  *
6026  * We process the blocks one root at a time, and we start from the lowest root
6027  * objectid and go to the highest.  So we can just lookup the owner backref for
6028  * the record and if we don't find it then we know it doesn't exist and we have
6029  * a FULL BACKREF.
6030  *
6031  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6032  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6033  * be set or not and then we can check later once we've gathered all the refs.
6034  */
6035 static int calc_extent_flag(struct btrfs_root *root,
6036                            struct cache_tree *extent_cache,
6037                            struct extent_buffer *buf,
6038                            struct root_item_record *ri,
6039                            u64 *flags)
6040 {
6041         struct extent_record *rec;
6042         struct cache_extent *cache;
6043         struct tree_backref *tback;
6044         u64 owner = 0;
6045
6046         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6047         /* we have added this extent before */
6048         BUG_ON(!cache);
6049         rec = container_of(cache, struct extent_record, cache);
6050
6051         /*
6052          * Except file/reloc tree, we can not have
6053          * FULL BACKREF MODE
6054          */
6055         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6056                 goto normal;
6057         /*
6058          * root node
6059          */
6060         if (buf->start == ri->bytenr)
6061                 goto normal;
6062
6063         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6064                 goto full_backref;
6065
6066         owner = btrfs_header_owner(buf);
6067         if (owner == ri->objectid)
6068                 goto normal;
6069
6070         tback = find_tree_backref(rec, 0, owner);
6071         if (!tback)
6072                 goto full_backref;
6073 normal:
6074         *flags = 0;
6075         if (rec->flag_block_full_backref != FLAG_UNSET &&
6076             rec->flag_block_full_backref != 0)
6077                 rec->bad_full_backref = 1;
6078         return 0;
6079 full_backref:
6080         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6081         if (rec->flag_block_full_backref != FLAG_UNSET &&
6082             rec->flag_block_full_backref != 1)
6083                 rec->bad_full_backref = 1;
6084         return 0;
6085 }
6086
6087 static int run_next_block(struct btrfs_root *root,
6088                           struct block_info *bits,
6089                           int bits_nr,
6090                           u64 *last,
6091                           struct cache_tree *pending,
6092                           struct cache_tree *seen,
6093                           struct cache_tree *reada,
6094                           struct cache_tree *nodes,
6095                           struct cache_tree *extent_cache,
6096                           struct cache_tree *chunk_cache,
6097                           struct rb_root *dev_cache,
6098                           struct block_group_tree *block_group_cache,
6099                           struct device_extent_tree *dev_extent_cache,
6100                           struct root_item_record *ri)
6101 {
6102         struct extent_buffer *buf;
6103         struct extent_record *rec = NULL;
6104         u64 bytenr;
6105         u32 size;
6106         u64 parent;
6107         u64 owner;
6108         u64 flags;
6109         u64 ptr;
6110         u64 gen = 0;
6111         int ret = 0;
6112         int i;
6113         int nritems;
6114         struct btrfs_key key;
6115         struct cache_extent *cache;
6116         int reada_bits;
6117
6118         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6119                                     bits_nr, &reada_bits);
6120         if (nritems == 0)
6121                 return 1;
6122
6123         if (!reada_bits) {
6124                 for(i = 0; i < nritems; i++) {
6125                         ret = add_cache_extent(reada, bits[i].start,
6126                                                bits[i].size);
6127                         if (ret == -EEXIST)
6128                                 continue;
6129
6130                         /* fixme, get the parent transid */
6131                         readahead_tree_block(root, bits[i].start,
6132                                              bits[i].size, 0);
6133                 }
6134         }
6135         *last = bits[0].start;
6136         bytenr = bits[0].start;
6137         size = bits[0].size;
6138
6139         cache = lookup_cache_extent(pending, bytenr, size);
6140         if (cache) {
6141                 remove_cache_extent(pending, cache);
6142                 free(cache);
6143         }
6144         cache = lookup_cache_extent(reada, bytenr, size);
6145         if (cache) {
6146                 remove_cache_extent(reada, cache);
6147                 free(cache);
6148         }
6149         cache = lookup_cache_extent(nodes, bytenr, size);
6150         if (cache) {
6151                 remove_cache_extent(nodes, cache);
6152                 free(cache);
6153         }
6154         cache = lookup_cache_extent(extent_cache, bytenr, size);
6155         if (cache) {
6156                 rec = container_of(cache, struct extent_record, cache);
6157                 gen = rec->parent_generation;
6158         }
6159
6160         /* fixme, get the real parent transid */
6161         buf = read_tree_block(root, bytenr, size, gen);
6162         if (!extent_buffer_uptodate(buf)) {
6163                 record_bad_block_io(root->fs_info,
6164                                     extent_cache, bytenr, size);
6165                 goto out;
6166         }
6167
6168         nritems = btrfs_header_nritems(buf);
6169
6170         flags = 0;
6171         if (!init_extent_tree) {
6172                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6173                                        btrfs_header_level(buf), 1, NULL,
6174                                        &flags);
6175                 if (ret < 0) {
6176                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6177                         if (ret < 0) {
6178                                 fprintf(stderr, "Couldn't calc extent flags\n");
6179                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6180                         }
6181                 }
6182         } else {
6183                 flags = 0;
6184                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6185                 if (ret < 0) {
6186                         fprintf(stderr, "Couldn't calc extent flags\n");
6187                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6188                 }
6189         }
6190
6191         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6192                 if (ri != NULL &&
6193                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6194                     ri->objectid == btrfs_header_owner(buf)) {
6195                         /*
6196                          * Ok we got to this block from it's original owner and
6197                          * we have FULL_BACKREF set.  Relocation can leave
6198                          * converted blocks over so this is altogether possible,
6199                          * however it's not possible if the generation > the
6200                          * last snapshot, so check for this case.
6201                          */
6202                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6203                             btrfs_header_generation(buf) > ri->last_snapshot) {
6204                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6205                                 rec->bad_full_backref = 1;
6206                         }
6207                 }
6208         } else {
6209                 if (ri != NULL &&
6210                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6211                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6212                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6213                         rec->bad_full_backref = 1;
6214                 }
6215         }
6216
6217         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6218                 rec->flag_block_full_backref = 1;
6219                 parent = bytenr;
6220                 owner = 0;
6221         } else {
6222                 rec->flag_block_full_backref = 0;
6223                 parent = 0;
6224                 owner = btrfs_header_owner(buf);
6225         }
6226
6227         ret = check_block(root, extent_cache, buf, flags);
6228         if (ret)
6229                 goto out;
6230
6231         if (btrfs_is_leaf(buf)) {
6232                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6233                 for (i = 0; i < nritems; i++) {
6234                         struct btrfs_file_extent_item *fi;
6235                         btrfs_item_key_to_cpu(buf, &key, i);
6236                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6237                                 process_extent_item(root, extent_cache, buf,
6238                                                     i);
6239                                 continue;
6240                         }
6241                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6242                                 process_extent_item(root, extent_cache, buf,
6243                                                     i);
6244                                 continue;
6245                         }
6246                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6247                                 total_csum_bytes +=
6248                                         btrfs_item_size_nr(buf, i);
6249                                 continue;
6250                         }
6251                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6252                                 process_chunk_item(chunk_cache, &key, buf, i);
6253                                 continue;
6254                         }
6255                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6256                                 process_device_item(dev_cache, &key, buf, i);
6257                                 continue;
6258                         }
6259                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6260                                 process_block_group_item(block_group_cache,
6261                                         &key, buf, i);
6262                                 continue;
6263                         }
6264                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6265                                 process_device_extent_item(dev_extent_cache,
6266                                         &key, buf, i);
6267                                 continue;
6268
6269                         }
6270                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6271 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6272                                 process_extent_ref_v0(extent_cache, buf, i);
6273 #else
6274                                 BUG();
6275 #endif
6276                                 continue;
6277                         }
6278
6279                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6280                                 add_tree_backref(extent_cache, key.objectid, 0,
6281                                                  key.offset, 0);
6282                                 continue;
6283                         }
6284                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6285                                 add_tree_backref(extent_cache, key.objectid,
6286                                                  key.offset, 0, 0);
6287                                 continue;
6288                         }
6289                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6290                                 struct btrfs_extent_data_ref *ref;
6291                                 ref = btrfs_item_ptr(buf, i,
6292                                                 struct btrfs_extent_data_ref);
6293                                 add_data_backref(extent_cache,
6294                                         key.objectid, 0,
6295                                         btrfs_extent_data_ref_root(buf, ref),
6296                                         btrfs_extent_data_ref_objectid(buf,
6297                                                                        ref),
6298                                         btrfs_extent_data_ref_offset(buf, ref),
6299                                         btrfs_extent_data_ref_count(buf, ref),
6300                                         0, root->sectorsize);
6301                                 continue;
6302                         }
6303                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6304                                 struct btrfs_shared_data_ref *ref;
6305                                 ref = btrfs_item_ptr(buf, i,
6306                                                 struct btrfs_shared_data_ref);
6307                                 add_data_backref(extent_cache,
6308                                         key.objectid, key.offset, 0, 0, 0,
6309                                         btrfs_shared_data_ref_count(buf, ref),
6310                                         0, root->sectorsize);
6311                                 continue;
6312                         }
6313                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6314                                 struct bad_item *bad;
6315
6316                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6317                                         continue;
6318                                 if (!owner)
6319                                         continue;
6320                                 bad = malloc(sizeof(struct bad_item));
6321                                 if (!bad)
6322                                         continue;
6323                                 INIT_LIST_HEAD(&bad->list);
6324                                 memcpy(&bad->key, &key,
6325                                        sizeof(struct btrfs_key));
6326                                 bad->root_id = owner;
6327                                 list_add_tail(&bad->list, &delete_items);
6328                                 continue;
6329                         }
6330                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6331                                 continue;
6332                         fi = btrfs_item_ptr(buf, i,
6333                                             struct btrfs_file_extent_item);
6334                         if (btrfs_file_extent_type(buf, fi) ==
6335                             BTRFS_FILE_EXTENT_INLINE)
6336                                 continue;
6337                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6338                                 continue;
6339
6340                         data_bytes_allocated +=
6341                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6342                         if (data_bytes_allocated < root->sectorsize) {
6343                                 abort();
6344                         }
6345                         data_bytes_referenced +=
6346                                 btrfs_file_extent_num_bytes(buf, fi);
6347                         add_data_backref(extent_cache,
6348                                 btrfs_file_extent_disk_bytenr(buf, fi),
6349                                 parent, owner, key.objectid, key.offset -
6350                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6351                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6352                 }
6353         } else {
6354                 int level;
6355                 struct btrfs_key first_key;
6356
6357                 first_key.objectid = 0;
6358
6359                 if (nritems > 0)
6360                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6361                 level = btrfs_header_level(buf);
6362                 for (i = 0; i < nritems; i++) {
6363                         struct extent_record tmpl;
6364
6365                         ptr = btrfs_node_blockptr(buf, i);
6366                         size = root->nodesize;
6367                         btrfs_node_key_to_cpu(buf, &key, i);
6368                         if (ri != NULL) {
6369                                 if ((level == ri->drop_level)
6370                                     && is_dropped_key(&key, &ri->drop_key)) {
6371                                         continue;
6372                                 }
6373                         }
6374
6375                         memset(&tmpl, 0, sizeof(tmpl));
6376                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6377                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6378                         tmpl.start = ptr;
6379                         tmpl.nr = size;
6380                         tmpl.refs = 1;
6381                         tmpl.metadata = 1;
6382                         tmpl.max_size = size;
6383                         ret = add_extent_rec(extent_cache, &tmpl);
6384                         BUG_ON(ret);
6385
6386                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6387
6388                         if (level > 1) {
6389                                 add_pending(nodes, seen, ptr, size);
6390                         } else {
6391                                 add_pending(pending, seen, ptr, size);
6392                         }
6393                 }
6394                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6395                                       nritems) * sizeof(struct btrfs_key_ptr);
6396         }
6397         total_btree_bytes += buf->len;
6398         if (fs_root_objectid(btrfs_header_owner(buf)))
6399                 total_fs_tree_bytes += buf->len;
6400         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6401                 total_extent_tree_bytes += buf->len;
6402         if (!found_old_backref &&
6403             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6404             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6405             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6406                 found_old_backref = 1;
6407 out:
6408         free_extent_buffer(buf);
6409         return ret;
6410 }
6411
6412 static int add_root_to_pending(struct extent_buffer *buf,
6413                                struct cache_tree *extent_cache,
6414                                struct cache_tree *pending,
6415                                struct cache_tree *seen,
6416                                struct cache_tree *nodes,
6417                                u64 objectid)
6418 {
6419         struct extent_record tmpl;
6420
6421         if (btrfs_header_level(buf) > 0)
6422                 add_pending(nodes, seen, buf->start, buf->len);
6423         else
6424                 add_pending(pending, seen, buf->start, buf->len);
6425
6426         memset(&tmpl, 0, sizeof(tmpl));
6427         tmpl.start = buf->start;
6428         tmpl.nr = buf->len;
6429         tmpl.is_root = 1;
6430         tmpl.refs = 1;
6431         tmpl.metadata = 1;
6432         tmpl.max_size = buf->len;
6433         add_extent_rec(extent_cache, &tmpl);
6434
6435         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6436             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6437                 add_tree_backref(extent_cache, buf->start, buf->start,
6438                                  0, 1);
6439         else
6440                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6441         return 0;
6442 }
6443
6444 /* as we fix the tree, we might be deleting blocks that
6445  * we're tracking for repair.  This hook makes sure we
6446  * remove any backrefs for blocks as we are fixing them.
6447  */
6448 static int free_extent_hook(struct btrfs_trans_handle *trans,
6449                             struct btrfs_root *root,
6450                             u64 bytenr, u64 num_bytes, u64 parent,
6451                             u64 root_objectid, u64 owner, u64 offset,
6452                             int refs_to_drop)
6453 {
6454         struct extent_record *rec;
6455         struct cache_extent *cache;
6456         int is_data;
6457         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6458
6459         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6460         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6461         if (!cache)
6462                 return 0;
6463
6464         rec = container_of(cache, struct extent_record, cache);
6465         if (is_data) {
6466                 struct data_backref *back;
6467                 back = find_data_backref(rec, parent, root_objectid, owner,
6468                                          offset, 1, bytenr, num_bytes);
6469                 if (!back)
6470                         goto out;
6471                 if (back->node.found_ref) {
6472                         back->found_ref -= refs_to_drop;
6473                         if (rec->refs)
6474                                 rec->refs -= refs_to_drop;
6475                 }
6476                 if (back->node.found_extent_tree) {
6477                         back->num_refs -= refs_to_drop;
6478                         if (rec->extent_item_refs)
6479                                 rec->extent_item_refs -= refs_to_drop;
6480                 }
6481                 if (back->found_ref == 0)
6482                         back->node.found_ref = 0;
6483                 if (back->num_refs == 0)
6484                         back->node.found_extent_tree = 0;
6485
6486                 if (!back->node.found_extent_tree && back->node.found_ref) {
6487                         rb_erase(&back->node.node, &rec->backref_tree);
6488                         free(back);
6489                 }
6490         } else {
6491                 struct tree_backref *back;
6492                 back = find_tree_backref(rec, parent, root_objectid);
6493                 if (!back)
6494                         goto out;
6495                 if (back->node.found_ref) {
6496                         if (rec->refs)
6497                                 rec->refs--;
6498                         back->node.found_ref = 0;
6499                 }
6500                 if (back->node.found_extent_tree) {
6501                         if (rec->extent_item_refs)
6502                                 rec->extent_item_refs--;
6503                         back->node.found_extent_tree = 0;
6504                 }
6505                 if (!back->node.found_extent_tree && back->node.found_ref) {
6506                         rb_erase(&back->node.node, &rec->backref_tree);
6507                         free(back);
6508                 }
6509         }
6510         maybe_free_extent_rec(extent_cache, rec);
6511 out:
6512         return 0;
6513 }
6514
6515 static int delete_extent_records(struct btrfs_trans_handle *trans,
6516                                  struct btrfs_root *root,
6517                                  struct btrfs_path *path,
6518                                  u64 bytenr, u64 new_len)
6519 {
6520         struct btrfs_key key;
6521         struct btrfs_key found_key;
6522         struct extent_buffer *leaf;
6523         int ret;
6524         int slot;
6525
6526
6527         key.objectid = bytenr;
6528         key.type = (u8)-1;
6529         key.offset = (u64)-1;
6530
6531         while(1) {
6532                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6533                                         &key, path, 0, 1);
6534                 if (ret < 0)
6535                         break;
6536
6537                 if (ret > 0) {
6538                         ret = 0;
6539                         if (path->slots[0] == 0)
6540                                 break;
6541                         path->slots[0]--;
6542                 }
6543                 ret = 0;
6544
6545                 leaf = path->nodes[0];
6546                 slot = path->slots[0];
6547
6548                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6549                 if (found_key.objectid != bytenr)
6550                         break;
6551
6552                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6553                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6554                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6555                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6556                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6557                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6558                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6559                         btrfs_release_path(path);
6560                         if (found_key.type == 0) {
6561                                 if (found_key.offset == 0)
6562                                         break;
6563                                 key.offset = found_key.offset - 1;
6564                                 key.type = found_key.type;
6565                         }
6566                         key.type = found_key.type - 1;
6567                         key.offset = (u64)-1;
6568                         continue;
6569                 }
6570
6571                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6572                         found_key.objectid, found_key.type, found_key.offset);
6573
6574                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6575                 if (ret)
6576                         break;
6577                 btrfs_release_path(path);
6578
6579                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6580                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6581                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6582                                 found_key.offset : root->nodesize;
6583
6584                         ret = btrfs_update_block_group(trans, root, bytenr,
6585                                                        bytes, 0, 0);
6586                         if (ret)
6587                                 break;
6588                 }
6589         }
6590
6591         btrfs_release_path(path);
6592         return ret;
6593 }
6594
6595 /*
6596  * for a single backref, this will allocate a new extent
6597  * and add the backref to it.
6598  */
6599 static int record_extent(struct btrfs_trans_handle *trans,
6600                          struct btrfs_fs_info *info,
6601                          struct btrfs_path *path,
6602                          struct extent_record *rec,
6603                          struct extent_backref *back,
6604                          int allocated, u64 flags)
6605 {
6606         int ret;
6607         struct btrfs_root *extent_root = info->extent_root;
6608         struct extent_buffer *leaf;
6609         struct btrfs_key ins_key;
6610         struct btrfs_extent_item *ei;
6611         struct tree_backref *tback;
6612         struct data_backref *dback;
6613         struct btrfs_tree_block_info *bi;
6614
6615         if (!back->is_data)
6616                 rec->max_size = max_t(u64, rec->max_size,
6617                                     info->extent_root->nodesize);
6618
6619         if (!allocated) {
6620                 u32 item_size = sizeof(*ei);
6621
6622                 if (!back->is_data)
6623                         item_size += sizeof(*bi);
6624
6625                 ins_key.objectid = rec->start;
6626                 ins_key.offset = rec->max_size;
6627                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6628
6629                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6630                                         &ins_key, item_size);
6631                 if (ret)
6632                         goto fail;
6633
6634                 leaf = path->nodes[0];
6635                 ei = btrfs_item_ptr(leaf, path->slots[0],
6636                                     struct btrfs_extent_item);
6637
6638                 btrfs_set_extent_refs(leaf, ei, 0);
6639                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6640
6641                 if (back->is_data) {
6642                         btrfs_set_extent_flags(leaf, ei,
6643                                                BTRFS_EXTENT_FLAG_DATA);
6644                 } else {
6645                         struct btrfs_disk_key copy_key;;
6646
6647                         tback = to_tree_backref(back);
6648                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6649                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6650                                              sizeof(*bi));
6651
6652                         btrfs_set_disk_key_objectid(&copy_key,
6653                                                     rec->info_objectid);
6654                         btrfs_set_disk_key_type(&copy_key, 0);
6655                         btrfs_set_disk_key_offset(&copy_key, 0);
6656
6657                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6658                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6659
6660                         btrfs_set_extent_flags(leaf, ei,
6661                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6662                 }
6663
6664                 btrfs_mark_buffer_dirty(leaf);
6665                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6666                                                rec->max_size, 1, 0);
6667                 if (ret)
6668                         goto fail;
6669                 btrfs_release_path(path);
6670         }
6671
6672         if (back->is_data) {
6673                 u64 parent;
6674                 int i;
6675
6676                 dback = to_data_backref(back);
6677                 if (back->full_backref)
6678                         parent = dback->parent;
6679                 else
6680                         parent = 0;
6681
6682                 for (i = 0; i < dback->found_ref; i++) {
6683                         /* if parent != 0, we're doing a full backref
6684                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6685                          * just makes the backref allocator create a data
6686                          * backref
6687                          */
6688                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6689                                                    rec->start, rec->max_size,
6690                                                    parent,
6691                                                    dback->root,
6692                                                    parent ?
6693                                                    BTRFS_FIRST_FREE_OBJECTID :
6694                                                    dback->owner,
6695                                                    dback->offset);
6696                         if (ret)
6697                                 break;
6698                 }
6699                 fprintf(stderr, "adding new data backref"
6700                                 " on %llu %s %llu owner %llu"
6701                                 " offset %llu found %d\n",
6702                                 (unsigned long long)rec->start,
6703                                 back->full_backref ?
6704                                 "parent" : "root",
6705                                 back->full_backref ?
6706                                 (unsigned long long)parent :
6707                                 (unsigned long long)dback->root,
6708                                 (unsigned long long)dback->owner,
6709                                 (unsigned long long)dback->offset,
6710                                 dback->found_ref);
6711         } else {
6712                 u64 parent;
6713
6714                 tback = to_tree_backref(back);
6715                 if (back->full_backref)
6716                         parent = tback->parent;
6717                 else
6718                         parent = 0;
6719
6720                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6721                                            rec->start, rec->max_size,
6722                                            parent, tback->root, 0, 0);
6723                 fprintf(stderr, "adding new tree backref on "
6724                         "start %llu len %llu parent %llu root %llu\n",
6725                         rec->start, rec->max_size, parent, tback->root);
6726         }
6727 fail:
6728         btrfs_release_path(path);
6729         return ret;
6730 }
6731
6732 static struct extent_entry *find_entry(struct list_head *entries,
6733                                        u64 bytenr, u64 bytes)
6734 {
6735         struct extent_entry *entry = NULL;
6736
6737         list_for_each_entry(entry, entries, list) {
6738                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6739                         return entry;
6740         }
6741
6742         return NULL;
6743 }
6744
6745 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6746 {
6747         struct extent_entry *entry, *best = NULL, *prev = NULL;
6748
6749         list_for_each_entry(entry, entries, list) {
6750                 if (!prev) {
6751                         prev = entry;
6752                         continue;
6753                 }
6754
6755                 /*
6756                  * If there are as many broken entries as entries then we know
6757                  * not to trust this particular entry.
6758                  */
6759                 if (entry->broken == entry->count)
6760                         continue;
6761
6762                 /*
6763                  * If our current entry == best then we can't be sure our best
6764                  * is really the best, so we need to keep searching.
6765                  */
6766                 if (best && best->count == entry->count) {
6767                         prev = entry;
6768                         best = NULL;
6769                         continue;
6770                 }
6771
6772                 /* Prev == entry, not good enough, have to keep searching */
6773                 if (!prev->broken && prev->count == entry->count)
6774                         continue;
6775
6776                 if (!best)
6777                         best = (prev->count > entry->count) ? prev : entry;
6778                 else if (best->count < entry->count)
6779                         best = entry;
6780                 prev = entry;
6781         }
6782
6783         return best;
6784 }
6785
6786 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6787                       struct data_backref *dback, struct extent_entry *entry)
6788 {
6789         struct btrfs_trans_handle *trans;
6790         struct btrfs_root *root;
6791         struct btrfs_file_extent_item *fi;
6792         struct extent_buffer *leaf;
6793         struct btrfs_key key;
6794         u64 bytenr, bytes;
6795         int ret, err;
6796
6797         key.objectid = dback->root;
6798         key.type = BTRFS_ROOT_ITEM_KEY;
6799         key.offset = (u64)-1;
6800         root = btrfs_read_fs_root(info, &key);
6801         if (IS_ERR(root)) {
6802                 fprintf(stderr, "Couldn't find root for our ref\n");
6803                 return -EINVAL;
6804         }
6805
6806         /*
6807          * The backref points to the original offset of the extent if it was
6808          * split, so we need to search down to the offset we have and then walk
6809          * forward until we find the backref we're looking for.
6810          */
6811         key.objectid = dback->owner;
6812         key.type = BTRFS_EXTENT_DATA_KEY;
6813         key.offset = dback->offset;
6814         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6815         if (ret < 0) {
6816                 fprintf(stderr, "Error looking up ref %d\n", ret);
6817                 return ret;
6818         }
6819
6820         while (1) {
6821                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6822                         ret = btrfs_next_leaf(root, path);
6823                         if (ret) {
6824                                 fprintf(stderr, "Couldn't find our ref, next\n");
6825                                 return -EINVAL;
6826                         }
6827                 }
6828                 leaf = path->nodes[0];
6829                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6830                 if (key.objectid != dback->owner ||
6831                     key.type != BTRFS_EXTENT_DATA_KEY) {
6832                         fprintf(stderr, "Couldn't find our ref, search\n");
6833                         return -EINVAL;
6834                 }
6835                 fi = btrfs_item_ptr(leaf, path->slots[0],
6836                                     struct btrfs_file_extent_item);
6837                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6838                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6839
6840                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6841                         break;
6842                 path->slots[0]++;
6843         }
6844
6845         btrfs_release_path(path);
6846
6847         trans = btrfs_start_transaction(root, 1);
6848         if (IS_ERR(trans))
6849                 return PTR_ERR(trans);
6850
6851         /*
6852          * Ok we have the key of the file extent we want to fix, now we can cow
6853          * down to the thing and fix it.
6854          */
6855         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6856         if (ret < 0) {
6857                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6858                         key.objectid, key.type, key.offset, ret);
6859                 goto out;
6860         }
6861         if (ret > 0) {
6862                 fprintf(stderr, "Well that's odd, we just found this key "
6863                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6864                         key.offset);
6865                 ret = -EINVAL;
6866                 goto out;
6867         }
6868         leaf = path->nodes[0];
6869         fi = btrfs_item_ptr(leaf, path->slots[0],
6870                             struct btrfs_file_extent_item);
6871
6872         if (btrfs_file_extent_compression(leaf, fi) &&
6873             dback->disk_bytenr != entry->bytenr) {
6874                 fprintf(stderr, "Ref doesn't match the record start and is "
6875                         "compressed, please take a btrfs-image of this file "
6876                         "system and send it to a btrfs developer so they can "
6877                         "complete this functionality for bytenr %Lu\n",
6878                         dback->disk_bytenr);
6879                 ret = -EINVAL;
6880                 goto out;
6881         }
6882
6883         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6884                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6885         } else if (dback->disk_bytenr > entry->bytenr) {
6886                 u64 off_diff, offset;
6887
6888                 off_diff = dback->disk_bytenr - entry->bytenr;
6889                 offset = btrfs_file_extent_offset(leaf, fi);
6890                 if (dback->disk_bytenr + offset +
6891                     btrfs_file_extent_num_bytes(leaf, fi) >
6892                     entry->bytenr + entry->bytes) {
6893                         fprintf(stderr, "Ref is past the entry end, please "
6894                                 "take a btrfs-image of this file system and "
6895                                 "send it to a btrfs developer, ref %Lu\n",
6896                                 dback->disk_bytenr);
6897                         ret = -EINVAL;
6898                         goto out;
6899                 }
6900                 offset += off_diff;
6901                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6902                 btrfs_set_file_extent_offset(leaf, fi, offset);
6903         } else if (dback->disk_bytenr < entry->bytenr) {
6904                 u64 offset;
6905
6906                 offset = btrfs_file_extent_offset(leaf, fi);
6907                 if (dback->disk_bytenr + offset < entry->bytenr) {
6908                         fprintf(stderr, "Ref is before the entry start, please"
6909                                 " take a btrfs-image of this file system and "
6910                                 "send it to a btrfs developer, ref %Lu\n",
6911                                 dback->disk_bytenr);
6912                         ret = -EINVAL;
6913                         goto out;
6914                 }
6915
6916                 offset += dback->disk_bytenr;
6917                 offset -= entry->bytenr;
6918                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6919                 btrfs_set_file_extent_offset(leaf, fi, offset);
6920         }
6921
6922         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6923
6924         /*
6925          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6926          * only do this if we aren't using compression, otherwise it's a
6927          * trickier case.
6928          */
6929         if (!btrfs_file_extent_compression(leaf, fi))
6930                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6931         else
6932                 printf("ram bytes may be wrong?\n");
6933         btrfs_mark_buffer_dirty(leaf);
6934 out:
6935         err = btrfs_commit_transaction(trans, root);
6936         btrfs_release_path(path);
6937         return ret ? ret : err;
6938 }
6939
6940 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6941                            struct extent_record *rec)
6942 {
6943         struct extent_backref *back, *tmp;
6944         struct data_backref *dback;
6945         struct extent_entry *entry, *best = NULL;
6946         LIST_HEAD(entries);
6947         int nr_entries = 0;
6948         int broken_entries = 0;
6949         int ret = 0;
6950         short mismatch = 0;
6951
6952         /*
6953          * Metadata is easy and the backrefs should always agree on bytenr and
6954          * size, if not we've got bigger issues.
6955          */
6956         if (rec->metadata)
6957                 return 0;
6958
6959         rbtree_postorder_for_each_entry_safe(back, tmp,
6960                                              &rec->backref_tree, node) {
6961                 if (back->full_backref || !back->is_data)
6962                         continue;
6963
6964                 dback = to_data_backref(back);
6965
6966                 /*
6967                  * We only pay attention to backrefs that we found a real
6968                  * backref for.
6969                  */
6970                 if (dback->found_ref == 0)
6971                         continue;
6972
6973                 /*
6974                  * For now we only catch when the bytes don't match, not the
6975                  * bytenr.  We can easily do this at the same time, but I want
6976                  * to have a fs image to test on before we just add repair
6977                  * functionality willy-nilly so we know we won't screw up the
6978                  * repair.
6979                  */
6980
6981                 entry = find_entry(&entries, dback->disk_bytenr,
6982                                    dback->bytes);
6983                 if (!entry) {
6984                         entry = malloc(sizeof(struct extent_entry));
6985                         if (!entry) {
6986                                 ret = -ENOMEM;
6987                                 goto out;
6988                         }
6989                         memset(entry, 0, sizeof(*entry));
6990                         entry->bytenr = dback->disk_bytenr;
6991                         entry->bytes = dback->bytes;
6992                         list_add_tail(&entry->list, &entries);
6993                         nr_entries++;
6994                 }
6995
6996                 /*
6997                  * If we only have on entry we may think the entries agree when
6998                  * in reality they don't so we have to do some extra checking.
6999                  */
7000                 if (dback->disk_bytenr != rec->start ||
7001                     dback->bytes != rec->nr || back->broken)
7002                         mismatch = 1;
7003
7004                 if (back->broken) {
7005                         entry->broken++;
7006                         broken_entries++;
7007                 }
7008
7009                 entry->count++;
7010         }
7011
7012         /* Yay all the backrefs agree, carry on good sir */
7013         if (nr_entries <= 1 && !mismatch)
7014                 goto out;
7015
7016         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7017                 "%Lu\n", rec->start);
7018
7019         /*
7020          * First we want to see if the backrefs can agree amongst themselves who
7021          * is right, so figure out which one of the entries has the highest
7022          * count.
7023          */
7024         best = find_most_right_entry(&entries);
7025
7026         /*
7027          * Ok so we may have an even split between what the backrefs think, so
7028          * this is where we use the extent ref to see what it thinks.
7029          */
7030         if (!best) {
7031                 entry = find_entry(&entries, rec->start, rec->nr);
7032                 if (!entry && (!broken_entries || !rec->found_rec)) {
7033                         fprintf(stderr, "Backrefs don't agree with each other "
7034                                 "and extent record doesn't agree with anybody,"
7035                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7036                                 rec->start, rec->nr);
7037                         ret = -EINVAL;
7038                         goto out;
7039                 } else if (!entry) {
7040                         /*
7041                          * Ok our backrefs were broken, we'll assume this is the
7042                          * correct value and add an entry for this range.
7043                          */
7044                         entry = malloc(sizeof(struct extent_entry));
7045                         if (!entry) {
7046                                 ret = -ENOMEM;
7047                                 goto out;
7048                         }
7049                         memset(entry, 0, sizeof(*entry));
7050                         entry->bytenr = rec->start;
7051                         entry->bytes = rec->nr;
7052                         list_add_tail(&entry->list, &entries);
7053                         nr_entries++;
7054                 }
7055                 entry->count++;
7056                 best = find_most_right_entry(&entries);
7057                 if (!best) {
7058                         fprintf(stderr, "Backrefs and extent record evenly "
7059                                 "split on who is right, this is going to "
7060                                 "require user input to fix bytenr %Lu bytes "
7061                                 "%Lu\n", rec->start, rec->nr);
7062                         ret = -EINVAL;
7063                         goto out;
7064                 }
7065         }
7066
7067         /*
7068          * I don't think this can happen currently as we'll abort() if we catch
7069          * this case higher up, but in case somebody removes that we still can't
7070          * deal with it properly here yet, so just bail out of that's the case.
7071          */
7072         if (best->bytenr != rec->start) {
7073                 fprintf(stderr, "Extent start and backref starts don't match, "
7074                         "please use btrfs-image on this file system and send "
7075                         "it to a btrfs developer so they can make fsck fix "
7076                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7077                         rec->start, rec->nr);
7078                 ret = -EINVAL;
7079                 goto out;
7080         }
7081
7082         /*
7083          * Ok great we all agreed on an extent record, let's go find the real
7084          * references and fix up the ones that don't match.
7085          */
7086         rbtree_postorder_for_each_entry_safe(back, tmp,
7087                                              &rec->backref_tree, node) {
7088                 if (back->full_backref || !back->is_data)
7089                         continue;
7090
7091                 dback = to_data_backref(back);
7092
7093                 /*
7094                  * Still ignoring backrefs that don't have a real ref attached
7095                  * to them.
7096                  */
7097                 if (dback->found_ref == 0)
7098                         continue;
7099
7100                 if (dback->bytes == best->bytes &&
7101                     dback->disk_bytenr == best->bytenr)
7102                         continue;
7103
7104                 ret = repair_ref(info, path, dback, best);
7105                 if (ret)
7106                         goto out;
7107         }
7108
7109         /*
7110          * Ok we messed with the actual refs, which means we need to drop our
7111          * entire cache and go back and rescan.  I know this is a huge pain and
7112          * adds a lot of extra work, but it's the only way to be safe.  Once all
7113          * the backrefs agree we may not need to do anything to the extent
7114          * record itself.
7115          */
7116         ret = -EAGAIN;
7117 out:
7118         while (!list_empty(&entries)) {
7119                 entry = list_entry(entries.next, struct extent_entry, list);
7120                 list_del_init(&entry->list);
7121                 free(entry);
7122         }
7123         return ret;
7124 }
7125
7126 static int process_duplicates(struct btrfs_root *root,
7127                               struct cache_tree *extent_cache,
7128                               struct extent_record *rec)
7129 {
7130         struct extent_record *good, *tmp;
7131         struct cache_extent *cache;
7132         int ret;
7133
7134         /*
7135          * If we found a extent record for this extent then return, or if we
7136          * have more than one duplicate we are likely going to need to delete
7137          * something.
7138          */
7139         if (rec->found_rec || rec->num_duplicates > 1)
7140                 return 0;
7141
7142         /* Shouldn't happen but just in case */
7143         BUG_ON(!rec->num_duplicates);
7144
7145         /*
7146          * So this happens if we end up with a backref that doesn't match the
7147          * actual extent entry.  So either the backref is bad or the extent
7148          * entry is bad.  Either way we want to have the extent_record actually
7149          * reflect what we found in the extent_tree, so we need to take the
7150          * duplicate out and use that as the extent_record since the only way we
7151          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7152          */
7153         remove_cache_extent(extent_cache, &rec->cache);
7154
7155         good = to_extent_record(rec->dups.next);
7156         list_del_init(&good->list);
7157         INIT_LIST_HEAD(&good->backrefs);
7158         INIT_LIST_HEAD(&good->dups);
7159         good->cache.start = good->start;
7160         good->cache.size = good->nr;
7161         good->content_checked = 0;
7162         good->owner_ref_checked = 0;
7163         good->num_duplicates = 0;
7164         good->refs = rec->refs;
7165         list_splice_init(&rec->backrefs, &good->backrefs);
7166         while (1) {
7167                 cache = lookup_cache_extent(extent_cache, good->start,
7168                                             good->nr);
7169                 if (!cache)
7170                         break;
7171                 tmp = container_of(cache, struct extent_record, cache);
7172
7173                 /*
7174                  * If we find another overlapping extent and it's found_rec is
7175                  * set then it's a duplicate and we need to try and delete
7176                  * something.
7177                  */
7178                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7179                         if (list_empty(&good->list))
7180                                 list_add_tail(&good->list,
7181                                               &duplicate_extents);
7182                         good->num_duplicates += tmp->num_duplicates + 1;
7183                         list_splice_init(&tmp->dups, &good->dups);
7184                         list_del_init(&tmp->list);
7185                         list_add_tail(&tmp->list, &good->dups);
7186                         remove_cache_extent(extent_cache, &tmp->cache);
7187                         continue;
7188                 }
7189
7190                 /*
7191                  * Ok we have another non extent item backed extent rec, so lets
7192                  * just add it to this extent and carry on like we did above.
7193                  */
7194                 good->refs += tmp->refs;
7195                 list_splice_init(&tmp->backrefs, &good->backrefs);
7196                 remove_cache_extent(extent_cache, &tmp->cache);
7197                 free(tmp);
7198         }
7199         ret = insert_cache_extent(extent_cache, &good->cache);
7200         BUG_ON(ret);
7201         free(rec);
7202         return good->num_duplicates ? 0 : 1;
7203 }
7204
7205 static int delete_duplicate_records(struct btrfs_root *root,
7206                                     struct extent_record *rec)
7207 {
7208         struct btrfs_trans_handle *trans;
7209         LIST_HEAD(delete_list);
7210         struct btrfs_path *path;
7211         struct extent_record *tmp, *good, *n;
7212         int nr_del = 0;
7213         int ret = 0, err;
7214         struct btrfs_key key;
7215
7216         path = btrfs_alloc_path();
7217         if (!path) {
7218                 ret = -ENOMEM;
7219                 goto out;
7220         }
7221
7222         good = rec;
7223         /* Find the record that covers all of the duplicates. */
7224         list_for_each_entry(tmp, &rec->dups, list) {
7225                 if (good->start < tmp->start)
7226                         continue;
7227                 if (good->nr > tmp->nr)
7228                         continue;
7229
7230                 if (tmp->start + tmp->nr < good->start + good->nr) {
7231                         fprintf(stderr, "Ok we have overlapping extents that "
7232                                 "aren't completely covered by each other, this "
7233                                 "is going to require more careful thought.  "
7234                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7235                                 tmp->start, tmp->nr, good->start, good->nr);
7236                         abort();
7237                 }
7238                 good = tmp;
7239         }
7240
7241         if (good != rec)
7242                 list_add_tail(&rec->list, &delete_list);
7243
7244         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7245                 if (tmp == good)
7246                         continue;
7247                 list_move_tail(&tmp->list, &delete_list);
7248         }
7249
7250         root = root->fs_info->extent_root;
7251         trans = btrfs_start_transaction(root, 1);
7252         if (IS_ERR(trans)) {
7253                 ret = PTR_ERR(trans);
7254                 goto out;
7255         }
7256
7257         list_for_each_entry(tmp, &delete_list, list) {
7258                 if (tmp->found_rec == 0)
7259                         continue;
7260                 key.objectid = tmp->start;
7261                 key.type = BTRFS_EXTENT_ITEM_KEY;
7262                 key.offset = tmp->nr;
7263
7264                 /* Shouldn't happen but just in case */
7265                 if (tmp->metadata) {
7266                         fprintf(stderr, "Well this shouldn't happen, extent "
7267                                 "record overlaps but is metadata? "
7268                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7269                         abort();
7270                 }
7271
7272                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7273                 if (ret) {
7274                         if (ret > 0)
7275                                 ret = -EINVAL;
7276                         break;
7277                 }
7278                 ret = btrfs_del_item(trans, root, path);
7279                 if (ret)
7280                         break;
7281                 btrfs_release_path(path);
7282                 nr_del++;
7283         }
7284         err = btrfs_commit_transaction(trans, root);
7285         if (err && !ret)
7286                 ret = err;
7287 out:
7288         while (!list_empty(&delete_list)) {
7289                 tmp = to_extent_record(delete_list.next);
7290                 list_del_init(&tmp->list);
7291                 if (tmp == rec)
7292                         continue;
7293                 free(tmp);
7294         }
7295
7296         while (!list_empty(&rec->dups)) {
7297                 tmp = to_extent_record(rec->dups.next);
7298                 list_del_init(&tmp->list);
7299                 free(tmp);
7300         }
7301
7302         btrfs_free_path(path);
7303
7304         if (!ret && !nr_del)
7305                 rec->num_duplicates = 0;
7306
7307         return ret ? ret : nr_del;
7308 }
7309
7310 static int find_possible_backrefs(struct btrfs_fs_info *info,
7311                                   struct btrfs_path *path,
7312                                   struct cache_tree *extent_cache,
7313                                   struct extent_record *rec)
7314 {
7315         struct btrfs_root *root;
7316         struct extent_backref *back, *tmp;
7317         struct data_backref *dback;
7318         struct cache_extent *cache;
7319         struct btrfs_file_extent_item *fi;
7320         struct btrfs_key key;
7321         u64 bytenr, bytes;
7322         int ret;
7323
7324         rbtree_postorder_for_each_entry_safe(back, tmp,
7325                                              &rec->backref_tree, node) {
7326                 /* Don't care about full backrefs (poor unloved backrefs) */
7327                 if (back->full_backref || !back->is_data)
7328                         continue;
7329
7330                 dback = to_data_backref(back);
7331
7332                 /* We found this one, we don't need to do a lookup */
7333                 if (dback->found_ref)
7334                         continue;
7335
7336                 key.objectid = dback->root;
7337                 key.type = BTRFS_ROOT_ITEM_KEY;
7338                 key.offset = (u64)-1;
7339
7340                 root = btrfs_read_fs_root(info, &key);
7341
7342                 /* No root, definitely a bad ref, skip */
7343                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7344                         continue;
7345                 /* Other err, exit */
7346                 if (IS_ERR(root))
7347                         return PTR_ERR(root);
7348
7349                 key.objectid = dback->owner;
7350                 key.type = BTRFS_EXTENT_DATA_KEY;
7351                 key.offset = dback->offset;
7352                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7353                 if (ret) {
7354                         btrfs_release_path(path);
7355                         if (ret < 0)
7356                                 return ret;
7357                         /* Didn't find it, we can carry on */
7358                         ret = 0;
7359                         continue;
7360                 }
7361
7362                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7363                                     struct btrfs_file_extent_item);
7364                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7365                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7366                 btrfs_release_path(path);
7367                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7368                 if (cache) {
7369                         struct extent_record *tmp;
7370                         tmp = container_of(cache, struct extent_record, cache);
7371
7372                         /*
7373                          * If we found an extent record for the bytenr for this
7374                          * particular backref then we can't add it to our
7375                          * current extent record.  We only want to add backrefs
7376                          * that don't have a corresponding extent item in the
7377                          * extent tree since they likely belong to this record
7378                          * and we need to fix it if it doesn't match bytenrs.
7379                          */
7380                         if  (tmp->found_rec)
7381                                 continue;
7382                 }
7383
7384                 dback->found_ref += 1;
7385                 dback->disk_bytenr = bytenr;
7386                 dback->bytes = bytes;
7387
7388                 /*
7389                  * Set this so the verify backref code knows not to trust the
7390                  * values in this backref.
7391                  */
7392                 back->broken = 1;
7393         }
7394
7395         return 0;
7396 }
7397
7398 /*
7399  * Record orphan data ref into corresponding root.
7400  *
7401  * Return 0 if the extent item contains data ref and recorded.
7402  * Return 1 if the extent item contains no useful data ref
7403  *   On that case, it may contains only shared_dataref or metadata backref
7404  *   or the file extent exists(this should be handled by the extent bytenr
7405  *   recovery routine)
7406  * Return <0 if something goes wrong.
7407  */
7408 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7409                                       struct extent_record *rec)
7410 {
7411         struct btrfs_key key;
7412         struct btrfs_root *dest_root;
7413         struct extent_backref *back, *tmp;
7414         struct data_backref *dback;
7415         struct orphan_data_extent *orphan;
7416         struct btrfs_path *path;
7417         int recorded_data_ref = 0;
7418         int ret = 0;
7419
7420         if (rec->metadata)
7421                 return 1;
7422         path = btrfs_alloc_path();
7423         if (!path)
7424                 return -ENOMEM;
7425         rbtree_postorder_for_each_entry_safe(back, tmp,
7426                                              &rec->backref_tree, node) {
7427                 if (back->full_backref || !back->is_data ||
7428                     !back->found_extent_tree)
7429                         continue;
7430                 dback = to_data_backref(back);
7431                 if (dback->found_ref)
7432                         continue;
7433                 key.objectid = dback->root;
7434                 key.type = BTRFS_ROOT_ITEM_KEY;
7435                 key.offset = (u64)-1;
7436
7437                 dest_root = btrfs_read_fs_root(fs_info, &key);
7438
7439                 /* For non-exist root we just skip it */
7440                 if (IS_ERR(dest_root) || !dest_root)
7441                         continue;
7442
7443                 key.objectid = dback->owner;
7444                 key.type = BTRFS_EXTENT_DATA_KEY;
7445                 key.offset = dback->offset;
7446
7447                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7448                 /*
7449                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7450                  * we need to record it for inode/file extent rebuild.
7451                  * For ret > 0, we record it only for file extent rebuild.
7452                  * For ret == 0, the file extent exists but only bytenr
7453                  * mismatch, let the original bytenr fix routine to handle,
7454                  * don't record it.
7455                  */
7456                 if (ret == 0)
7457                         continue;
7458                 ret = 0;
7459                 orphan = malloc(sizeof(*orphan));
7460                 if (!orphan) {
7461                         ret = -ENOMEM;
7462                         goto out;
7463                 }
7464                 INIT_LIST_HEAD(&orphan->list);
7465                 orphan->root = dback->root;
7466                 orphan->objectid = dback->owner;
7467                 orphan->offset = dback->offset;
7468                 orphan->disk_bytenr = rec->cache.start;
7469                 orphan->disk_len = rec->cache.size;
7470                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7471                 recorded_data_ref = 1;
7472         }
7473 out:
7474         btrfs_free_path(path);
7475         if (!ret)
7476                 return !recorded_data_ref;
7477         else
7478                 return ret;
7479 }
7480
7481 /*
7482  * when an incorrect extent item is found, this will delete
7483  * all of the existing entries for it and recreate them
7484  * based on what the tree scan found.
7485  */
7486 static int fixup_extent_refs(struct btrfs_fs_info *info,
7487                              struct cache_tree *extent_cache,
7488                              struct extent_record *rec)
7489 {
7490         struct btrfs_trans_handle *trans = NULL;
7491         int ret;
7492         struct btrfs_path *path;
7493         struct cache_extent *cache;
7494         struct extent_backref *back, *tmp;
7495         int allocated = 0;
7496         u64 flags = 0;
7497
7498         if (rec->flag_block_full_backref)
7499                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7500
7501         path = btrfs_alloc_path();
7502         if (!path)
7503                 return -ENOMEM;
7504
7505         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7506                 /*
7507                  * Sometimes the backrefs themselves are so broken they don't
7508                  * get attached to any meaningful rec, so first go back and
7509                  * check any of our backrefs that we couldn't find and throw
7510                  * them into the list if we find the backref so that
7511                  * verify_backrefs can figure out what to do.
7512                  */
7513                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7514                 if (ret < 0)
7515                         goto out;
7516         }
7517
7518         /* step one, make sure all of the backrefs agree */
7519         ret = verify_backrefs(info, path, rec);
7520         if (ret < 0)
7521                 goto out;
7522
7523         trans = btrfs_start_transaction(info->extent_root, 1);
7524         if (IS_ERR(trans)) {
7525                 ret = PTR_ERR(trans);
7526                 goto out;
7527         }
7528
7529         /* step two, delete all the existing records */
7530         ret = delete_extent_records(trans, info->extent_root, path,
7531                                     rec->start, rec->max_size);
7532
7533         if (ret < 0)
7534                 goto out;
7535
7536         /* was this block corrupt?  If so, don't add references to it */
7537         cache = lookup_cache_extent(info->corrupt_blocks,
7538                                     rec->start, rec->max_size);
7539         if (cache) {
7540                 ret = 0;
7541                 goto out;
7542         }
7543
7544         /* step three, recreate all the refs we did find */
7545         rbtree_postorder_for_each_entry_safe(back, tmp,
7546                                              &rec->backref_tree, node) {
7547                 /*
7548                  * if we didn't find any references, don't create a
7549                  * new extent record
7550                  */
7551                 if (!back->found_ref)
7552                         continue;
7553
7554                 rec->bad_full_backref = 0;
7555                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7556                 allocated = 1;
7557
7558                 if (ret)
7559                         goto out;
7560         }
7561 out:
7562         if (trans) {
7563                 int err = btrfs_commit_transaction(trans, info->extent_root);
7564                 if (!ret)
7565                         ret = err;
7566         }
7567
7568         btrfs_free_path(path);
7569         return ret;
7570 }
7571
7572 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7573                               struct extent_record *rec)
7574 {
7575         struct btrfs_trans_handle *trans;
7576         struct btrfs_root *root = fs_info->extent_root;
7577         struct btrfs_path *path;
7578         struct btrfs_extent_item *ei;
7579         struct btrfs_key key;
7580         u64 flags;
7581         int ret = 0;
7582
7583         key.objectid = rec->start;
7584         if (rec->metadata) {
7585                 key.type = BTRFS_METADATA_ITEM_KEY;
7586                 key.offset = rec->info_level;
7587         } else {
7588                 key.type = BTRFS_EXTENT_ITEM_KEY;
7589                 key.offset = rec->max_size;
7590         }
7591
7592         path = btrfs_alloc_path();
7593         if (!path)
7594                 return -ENOMEM;
7595
7596         trans = btrfs_start_transaction(root, 0);
7597         if (IS_ERR(trans)) {
7598                 btrfs_free_path(path);
7599                 return PTR_ERR(trans);
7600         }
7601
7602         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7603         if (ret < 0) {
7604                 btrfs_free_path(path);
7605                 btrfs_commit_transaction(trans, root);
7606                 return ret;
7607         } else if (ret) {
7608                 fprintf(stderr, "Didn't find extent for %llu\n",
7609                         (unsigned long long)rec->start);
7610                 btrfs_free_path(path);
7611                 btrfs_commit_transaction(trans, root);
7612                 return -ENOENT;
7613         }
7614
7615         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7616                             struct btrfs_extent_item);
7617         flags = btrfs_extent_flags(path->nodes[0], ei);
7618         if (rec->flag_block_full_backref) {
7619                 fprintf(stderr, "setting full backref on %llu\n",
7620                         (unsigned long long)key.objectid);
7621                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7622         } else {
7623                 fprintf(stderr, "clearing full backref on %llu\n",
7624                         (unsigned long long)key.objectid);
7625                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7626         }
7627         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7628         btrfs_mark_buffer_dirty(path->nodes[0]);
7629         btrfs_free_path(path);
7630         return btrfs_commit_transaction(trans, root);
7631 }
7632
7633 /* right now we only prune from the extent allocation tree */
7634 static int prune_one_block(struct btrfs_trans_handle *trans,
7635                            struct btrfs_fs_info *info,
7636                            struct btrfs_corrupt_block *corrupt)
7637 {
7638         int ret;
7639         struct btrfs_path path;
7640         struct extent_buffer *eb;
7641         u64 found;
7642         int slot;
7643         int nritems;
7644         int level = corrupt->level + 1;
7645
7646         btrfs_init_path(&path);
7647 again:
7648         /* we want to stop at the parent to our busted block */
7649         path.lowest_level = level;
7650
7651         ret = btrfs_search_slot(trans, info->extent_root,
7652                                 &corrupt->key, &path, -1, 1);
7653
7654         if (ret < 0)
7655                 goto out;
7656
7657         eb = path.nodes[level];
7658         if (!eb) {
7659                 ret = -ENOENT;
7660                 goto out;
7661         }
7662
7663         /*
7664          * hopefully the search gave us the block we want to prune,
7665          * lets try that first
7666          */
7667         slot = path.slots[level];
7668         found =  btrfs_node_blockptr(eb, slot);
7669         if (found == corrupt->cache.start)
7670                 goto del_ptr;
7671
7672         nritems = btrfs_header_nritems(eb);
7673
7674         /* the search failed, lets scan this node and hope we find it */
7675         for (slot = 0; slot < nritems; slot++) {
7676                 found =  btrfs_node_blockptr(eb, slot);
7677                 if (found == corrupt->cache.start)
7678                         goto del_ptr;
7679         }
7680         /*
7681          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7682          * to this block
7683          */
7684         if (eb == info->extent_root->node) {
7685                 ret = -ENOENT;
7686                 goto out;
7687         } else {
7688                 level++;
7689                 btrfs_release_path(&path);
7690                 goto again;
7691         }
7692
7693 del_ptr:
7694         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7695         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7696
7697 out:
7698         btrfs_release_path(&path);
7699         return ret;
7700 }
7701
7702 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7703 {
7704         struct btrfs_trans_handle *trans = NULL;
7705         struct cache_extent *cache;
7706         struct btrfs_corrupt_block *corrupt;
7707
7708         while (1) {
7709                 cache = search_cache_extent(info->corrupt_blocks, 0);
7710                 if (!cache)
7711                         break;
7712                 if (!trans) {
7713                         trans = btrfs_start_transaction(info->extent_root, 1);
7714                         if (IS_ERR(trans))
7715                                 return PTR_ERR(trans);
7716                 }
7717                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7718                 prune_one_block(trans, info, corrupt);
7719                 remove_cache_extent(info->corrupt_blocks, cache);
7720         }
7721         if (trans)
7722                 return btrfs_commit_transaction(trans, info->extent_root);
7723         return 0;
7724 }
7725
7726 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7727 {
7728         struct btrfs_block_group_cache *cache;
7729         u64 start, end;
7730         int ret;
7731
7732         while (1) {
7733                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7734                                             &start, &end, EXTENT_DIRTY);
7735                 if (ret)
7736                         break;
7737                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7738                                    GFP_NOFS);
7739         }
7740
7741         start = 0;
7742         while (1) {
7743                 cache = btrfs_lookup_first_block_group(fs_info, start);
7744                 if (!cache)
7745                         break;
7746                 if (cache->cached)
7747                         cache->cached = 0;
7748                 start = cache->key.objectid + cache->key.offset;
7749         }
7750 }
7751
7752 static int check_extent_refs(struct btrfs_root *root,
7753                              struct cache_tree *extent_cache)
7754 {
7755         struct extent_record *rec;
7756         struct cache_extent *cache;
7757         int err = 0;
7758         int ret = 0;
7759         int fixed = 0;
7760         int had_dups = 0;
7761         int recorded = 0;
7762
7763         if (repair) {
7764                 /*
7765                  * if we're doing a repair, we have to make sure
7766                  * we don't allocate from the problem extents.
7767                  * In the worst case, this will be all the
7768                  * extents in the FS
7769                  */
7770                 cache = search_cache_extent(extent_cache, 0);
7771                 while(cache) {
7772                         rec = container_of(cache, struct extent_record, cache);
7773                         set_extent_dirty(root->fs_info->excluded_extents,
7774                                          rec->start,
7775                                          rec->start + rec->max_size - 1,
7776                                          GFP_NOFS);
7777                         cache = next_cache_extent(cache);
7778                 }
7779
7780                 /* pin down all the corrupted blocks too */
7781                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7782                 while(cache) {
7783                         set_extent_dirty(root->fs_info->excluded_extents,
7784                                          cache->start,
7785                                          cache->start + cache->size - 1,
7786                                          GFP_NOFS);
7787                         cache = next_cache_extent(cache);
7788                 }
7789                 prune_corrupt_blocks(root->fs_info);
7790                 reset_cached_block_groups(root->fs_info);
7791         }
7792
7793         reset_cached_block_groups(root->fs_info);
7794
7795         /*
7796          * We need to delete any duplicate entries we find first otherwise we
7797          * could mess up the extent tree when we have backrefs that actually
7798          * belong to a different extent item and not the weird duplicate one.
7799          */
7800         while (repair && !list_empty(&duplicate_extents)) {
7801                 rec = to_extent_record(duplicate_extents.next);
7802                 list_del_init(&rec->list);
7803
7804                 /* Sometimes we can find a backref before we find an actual
7805                  * extent, so we need to process it a little bit to see if there
7806                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7807                  * if this is a backref screwup.  If we need to delete stuff
7808                  * process_duplicates() will return 0, otherwise it will return
7809                  * 1 and we
7810                  */
7811                 if (process_duplicates(root, extent_cache, rec))
7812                         continue;
7813                 ret = delete_duplicate_records(root, rec);
7814                 if (ret < 0)
7815                         return ret;
7816                 /*
7817                  * delete_duplicate_records will return the number of entries
7818                  * deleted, so if it's greater than 0 then we know we actually
7819                  * did something and we need to remove.
7820                  */
7821                 if (ret)
7822                         had_dups = 1;
7823         }
7824
7825         if (had_dups)
7826                 return -EAGAIN;
7827
7828         while(1) {
7829                 int cur_err = 0;
7830
7831                 fixed = 0;
7832                 recorded = 0;
7833                 cache = search_cache_extent(extent_cache, 0);
7834                 if (!cache)
7835                         break;
7836                 rec = container_of(cache, struct extent_record, cache);
7837                 if (rec->num_duplicates) {
7838                         fprintf(stderr, "extent item %llu has multiple extent "
7839                                 "items\n", (unsigned long long)rec->start);
7840                         err = 1;
7841                         cur_err = 1;
7842                 }
7843
7844                 if (rec->refs != rec->extent_item_refs) {
7845                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7846                                 (unsigned long long)rec->start,
7847                                 (unsigned long long)rec->nr);
7848                         fprintf(stderr, "extent item %llu, found %llu\n",
7849                                 (unsigned long long)rec->extent_item_refs,
7850                                 (unsigned long long)rec->refs);
7851                         ret = record_orphan_data_extents(root->fs_info, rec);
7852                         if (ret < 0)
7853                                 goto repair_abort;
7854                         if (ret == 0) {
7855                                 recorded = 1;
7856                         } else {
7857                                 /*
7858                                  * we can't use the extent to repair file
7859                                  * extent, let the fallback method handle it.
7860                                  */
7861                                 if (!fixed && repair) {
7862                                         ret = fixup_extent_refs(
7863                                                         root->fs_info,
7864                                                         extent_cache, rec);
7865                                         if (ret)
7866                                                 goto repair_abort;
7867                                         fixed = 1;
7868                                 }
7869                         }
7870                         err = 1;
7871                         cur_err = 1;
7872                 }
7873                 if (all_backpointers_checked(rec, 1)) {
7874                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7875                                 (unsigned long long)rec->start,
7876                                 (unsigned long long)rec->nr);
7877
7878                         if (!fixed && !recorded && repair) {
7879                                 ret = fixup_extent_refs(root->fs_info,
7880                                                         extent_cache, rec);
7881                                 if (ret)
7882                                         goto repair_abort;
7883                                 fixed = 1;
7884                         }
7885                         cur_err = 1;
7886                         err = 1;
7887                 }
7888                 if (!rec->owner_ref_checked) {
7889                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7890                                 (unsigned long long)rec->start,
7891                                 (unsigned long long)rec->nr);
7892                         if (!fixed && !recorded && repair) {
7893                                 ret = fixup_extent_refs(root->fs_info,
7894                                                         extent_cache, rec);
7895                                 if (ret)
7896                                         goto repair_abort;
7897                                 fixed = 1;
7898                         }
7899                         err = 1;
7900                         cur_err = 1;
7901                 }
7902                 if (rec->bad_full_backref) {
7903                         fprintf(stderr, "bad full backref, on [%llu]\n",
7904                                 (unsigned long long)rec->start);
7905                         if (repair) {
7906                                 ret = fixup_extent_flags(root->fs_info, rec);
7907                                 if (ret)
7908                                         goto repair_abort;
7909                                 fixed = 1;
7910                         }
7911                         err = 1;
7912                         cur_err = 1;
7913                 }
7914                 /*
7915                  * Although it's not a extent ref's problem, we reuse this
7916                  * routine for error reporting.
7917                  * No repair function yet.
7918                  */
7919                 if (rec->crossing_stripes) {
7920                         fprintf(stderr,
7921                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7922                                 rec->start, rec->start + rec->max_size);
7923                         err = 1;
7924                         cur_err = 1;
7925                 }
7926
7927                 if (rec->wrong_chunk_type) {
7928                         fprintf(stderr,
7929                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7930                                 rec->start, rec->start + rec->max_size);
7931                         err = 1;
7932                         cur_err = 1;
7933                 }
7934
7935                 remove_cache_extent(extent_cache, cache);
7936                 free_all_extent_backrefs(rec);
7937                 if (!init_extent_tree && repair && (!cur_err || fixed))
7938                         clear_extent_dirty(root->fs_info->excluded_extents,
7939                                            rec->start,
7940                                            rec->start + rec->max_size - 1,
7941                                            GFP_NOFS);
7942                 free(rec);
7943         }
7944 repair_abort:
7945         if (repair) {
7946                 if (ret && ret != -EAGAIN) {
7947                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7948                         exit(1);
7949                 } else if (!ret) {
7950                         struct btrfs_trans_handle *trans;
7951
7952                         root = root->fs_info->extent_root;
7953                         trans = btrfs_start_transaction(root, 1);
7954                         if (IS_ERR(trans)) {
7955                                 ret = PTR_ERR(trans);
7956                                 goto repair_abort;
7957                         }
7958
7959                         btrfs_fix_block_accounting(trans, root);
7960                         ret = btrfs_commit_transaction(trans, root);
7961                         if (ret)
7962                                 goto repair_abort;
7963                 }
7964                 if (err)
7965                         fprintf(stderr, "repaired damaged extent references\n");
7966                 return ret;
7967         }
7968         return err;
7969 }
7970
7971 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7972 {
7973         u64 stripe_size;
7974
7975         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7976                 stripe_size = length;
7977                 stripe_size /= num_stripes;
7978         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7979                 stripe_size = length * 2;
7980                 stripe_size /= num_stripes;
7981         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7982                 stripe_size = length;
7983                 stripe_size /= (num_stripes - 1);
7984         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7985                 stripe_size = length;
7986                 stripe_size /= (num_stripes - 2);
7987         } else {
7988                 stripe_size = length;
7989         }
7990         return stripe_size;
7991 }
7992
7993 /*
7994  * Check the chunk with its block group/dev list ref:
7995  * Return 0 if all refs seems valid.
7996  * Return 1 if part of refs seems valid, need later check for rebuild ref
7997  * like missing block group and needs to search extent tree to rebuild them.
7998  * Return -1 if essential refs are missing and unable to rebuild.
7999  */
8000 static int check_chunk_refs(struct chunk_record *chunk_rec,
8001                             struct block_group_tree *block_group_cache,
8002                             struct device_extent_tree *dev_extent_cache,
8003                             int silent)
8004 {
8005         struct cache_extent *block_group_item;
8006         struct block_group_record *block_group_rec;
8007         struct cache_extent *dev_extent_item;
8008         struct device_extent_record *dev_extent_rec;
8009         u64 devid;
8010         u64 offset;
8011         u64 length;
8012         int metadump_v2 = 0;
8013         int i;
8014         int ret = 0;
8015
8016         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8017                                                chunk_rec->offset,
8018                                                chunk_rec->length);
8019         if (block_group_item) {
8020                 block_group_rec = container_of(block_group_item,
8021                                                struct block_group_record,
8022                                                cache);
8023                 if (chunk_rec->length != block_group_rec->offset ||
8024                     chunk_rec->offset != block_group_rec->objectid ||
8025                     (!metadump_v2 &&
8026                      chunk_rec->type_flags != block_group_rec->flags)) {
8027                         if (!silent)
8028                                 fprintf(stderr,
8029                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8030                                         chunk_rec->objectid,
8031                                         chunk_rec->type,
8032                                         chunk_rec->offset,
8033                                         chunk_rec->length,
8034                                         chunk_rec->offset,
8035                                         chunk_rec->type_flags,
8036                                         block_group_rec->objectid,
8037                                         block_group_rec->type,
8038                                         block_group_rec->offset,
8039                                         block_group_rec->offset,
8040                                         block_group_rec->objectid,
8041                                         block_group_rec->flags);
8042                         ret = -1;
8043                 } else {
8044                         list_del_init(&block_group_rec->list);
8045                         chunk_rec->bg_rec = block_group_rec;
8046                 }
8047         } else {
8048                 if (!silent)
8049                         fprintf(stderr,
8050                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8051                                 chunk_rec->objectid,
8052                                 chunk_rec->type,
8053                                 chunk_rec->offset,
8054                                 chunk_rec->length,
8055                                 chunk_rec->offset,
8056                                 chunk_rec->type_flags);
8057                 ret = 1;
8058         }
8059
8060         if (metadump_v2)
8061                 return ret;
8062
8063         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8064                                     chunk_rec->num_stripes);
8065         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8066                 devid = chunk_rec->stripes[i].devid;
8067                 offset = chunk_rec->stripes[i].offset;
8068                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8069                                                        devid, offset, length);
8070                 if (dev_extent_item) {
8071                         dev_extent_rec = container_of(dev_extent_item,
8072                                                 struct device_extent_record,
8073                                                 cache);
8074                         if (dev_extent_rec->objectid != devid ||
8075                             dev_extent_rec->offset != offset ||
8076                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8077                             dev_extent_rec->length != length) {
8078                                 if (!silent)
8079                                         fprintf(stderr,
8080                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8081                                                 chunk_rec->objectid,
8082                                                 chunk_rec->type,
8083                                                 chunk_rec->offset,
8084                                                 chunk_rec->stripes[i].devid,
8085                                                 chunk_rec->stripes[i].offset,
8086                                                 dev_extent_rec->objectid,
8087                                                 dev_extent_rec->offset,
8088                                                 dev_extent_rec->length);
8089                                 ret = -1;
8090                         } else {
8091                                 list_move(&dev_extent_rec->chunk_list,
8092                                           &chunk_rec->dextents);
8093                         }
8094                 } else {
8095                         if (!silent)
8096                                 fprintf(stderr,
8097                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8098                                         chunk_rec->objectid,
8099                                         chunk_rec->type,
8100                                         chunk_rec->offset,
8101                                         chunk_rec->stripes[i].devid,
8102                                         chunk_rec->stripes[i].offset);
8103                         ret = -1;
8104                 }
8105         }
8106         return ret;
8107 }
8108
8109 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8110 int check_chunks(struct cache_tree *chunk_cache,
8111                  struct block_group_tree *block_group_cache,
8112                  struct device_extent_tree *dev_extent_cache,
8113                  struct list_head *good, struct list_head *bad,
8114                  struct list_head *rebuild, int silent)
8115 {
8116         struct cache_extent *chunk_item;
8117         struct chunk_record *chunk_rec;
8118         struct block_group_record *bg_rec;
8119         struct device_extent_record *dext_rec;
8120         int err;
8121         int ret = 0;
8122
8123         chunk_item = first_cache_extent(chunk_cache);
8124         while (chunk_item) {
8125                 chunk_rec = container_of(chunk_item, struct chunk_record,
8126                                          cache);
8127                 err = check_chunk_refs(chunk_rec, block_group_cache,
8128                                        dev_extent_cache, silent);
8129                 if (err < 0)
8130                         ret = err;
8131                 if (err == 0 && good)
8132                         list_add_tail(&chunk_rec->list, good);
8133                 if (err > 0 && rebuild)
8134                         list_add_tail(&chunk_rec->list, rebuild);
8135                 if (err < 0 && bad)
8136                         list_add_tail(&chunk_rec->list, bad);
8137                 chunk_item = next_cache_extent(chunk_item);
8138         }
8139
8140         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8141                 if (!silent)
8142                         fprintf(stderr,
8143                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8144                                 bg_rec->objectid,
8145                                 bg_rec->offset,
8146                                 bg_rec->flags);
8147                 if (!ret)
8148                         ret = 1;
8149         }
8150
8151         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8152                             chunk_list) {
8153                 if (!silent)
8154                         fprintf(stderr,
8155                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8156                                 dext_rec->objectid,
8157                                 dext_rec->offset,
8158                                 dext_rec->length);
8159                 if (!ret)
8160                         ret = 1;
8161         }
8162         return ret;
8163 }
8164
8165
8166 static int check_device_used(struct device_record *dev_rec,
8167                              struct device_extent_tree *dext_cache)
8168 {
8169         struct cache_extent *cache;
8170         struct device_extent_record *dev_extent_rec;
8171         u64 total_byte = 0;
8172
8173         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8174         while (cache) {
8175                 dev_extent_rec = container_of(cache,
8176                                               struct device_extent_record,
8177                                               cache);
8178                 if (dev_extent_rec->objectid != dev_rec->devid)
8179                         break;
8180
8181                 list_del_init(&dev_extent_rec->device_list);
8182                 total_byte += dev_extent_rec->length;
8183                 cache = next_cache_extent(cache);
8184         }
8185
8186         if (total_byte != dev_rec->byte_used) {
8187                 fprintf(stderr,
8188                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8189                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8190                         dev_rec->type, dev_rec->offset);
8191                 return -1;
8192         } else {
8193                 return 0;
8194         }
8195 }
8196
8197 /* check btrfs_dev_item -> btrfs_dev_extent */
8198 static int check_devices(struct rb_root *dev_cache,
8199                          struct device_extent_tree *dev_extent_cache)
8200 {
8201         struct rb_node *dev_node;
8202         struct device_record *dev_rec;
8203         struct device_extent_record *dext_rec;
8204         int err;
8205         int ret = 0;
8206
8207         dev_node = rb_first(dev_cache);
8208         while (dev_node) {
8209                 dev_rec = container_of(dev_node, struct device_record, node);
8210                 err = check_device_used(dev_rec, dev_extent_cache);
8211                 if (err)
8212                         ret = err;
8213
8214                 dev_node = rb_next(dev_node);
8215         }
8216         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8217                             device_list) {
8218                 fprintf(stderr,
8219                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8220                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8221                 if (!ret)
8222                         ret = 1;
8223         }
8224         return ret;
8225 }
8226
8227 static int add_root_item_to_list(struct list_head *head,
8228                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8229                                   u8 level, u8 drop_level,
8230                                   int level_size, struct btrfs_key *drop_key)
8231 {
8232
8233         struct root_item_record *ri_rec;
8234         ri_rec = malloc(sizeof(*ri_rec));
8235         if (!ri_rec)
8236                 return -ENOMEM;
8237         ri_rec->bytenr = bytenr;
8238         ri_rec->objectid = objectid;
8239         ri_rec->level = level;
8240         ri_rec->level_size = level_size;
8241         ri_rec->drop_level = drop_level;
8242         ri_rec->last_snapshot = last_snapshot;
8243         if (drop_key)
8244                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8245         list_add_tail(&ri_rec->list, head);
8246
8247         return 0;
8248 }
8249
8250 static void free_root_item_list(struct list_head *list)
8251 {
8252         struct root_item_record *ri_rec;
8253
8254         while (!list_empty(list)) {
8255                 ri_rec = list_first_entry(list, struct root_item_record,
8256                                           list);
8257                 list_del_init(&ri_rec->list);
8258                 free(ri_rec);
8259         }
8260 }
8261
8262 static int deal_root_from_list(struct list_head *list,
8263                                struct btrfs_root *root,
8264                                struct block_info *bits,
8265                                int bits_nr,
8266                                struct cache_tree *pending,
8267                                struct cache_tree *seen,
8268                                struct cache_tree *reada,
8269                                struct cache_tree *nodes,
8270                                struct cache_tree *extent_cache,
8271                                struct cache_tree *chunk_cache,
8272                                struct rb_root *dev_cache,
8273                                struct block_group_tree *block_group_cache,
8274                                struct device_extent_tree *dev_extent_cache)
8275 {
8276         int ret = 0;
8277         u64 last;
8278
8279         while (!list_empty(list)) {
8280                 struct root_item_record *rec;
8281                 struct extent_buffer *buf;
8282                 rec = list_entry(list->next,
8283                                  struct root_item_record, list);
8284                 last = 0;
8285                 buf = read_tree_block(root->fs_info->tree_root,
8286                                       rec->bytenr, rec->level_size, 0);
8287                 if (!extent_buffer_uptodate(buf)) {
8288                         free_extent_buffer(buf);
8289                         ret = -EIO;
8290                         break;
8291                 }
8292                 add_root_to_pending(buf, extent_cache, pending,
8293                                     seen, nodes, rec->objectid);
8294                 /*
8295                  * To rebuild extent tree, we need deal with snapshot
8296                  * one by one, otherwise we deal with node firstly which
8297                  * can maximize readahead.
8298                  */
8299                 while (1) {
8300                         ret = run_next_block(root, bits, bits_nr, &last,
8301                                              pending, seen, reada, nodes,
8302                                              extent_cache, chunk_cache,
8303                                              dev_cache, block_group_cache,
8304                                              dev_extent_cache, rec);
8305                         if (ret != 0)
8306                                 break;
8307                 }
8308                 free_extent_buffer(buf);
8309                 list_del(&rec->list);
8310                 free(rec);
8311                 if (ret < 0)
8312                         break;
8313         }
8314         while (ret >= 0) {
8315                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8316                                      reada, nodes, extent_cache, chunk_cache,
8317                                      dev_cache, block_group_cache,
8318                                      dev_extent_cache, NULL);
8319                 if (ret != 0) {
8320                         if (ret > 0)
8321                                 ret = 0;
8322                         break;
8323                 }
8324         }
8325         return ret;
8326 }
8327
8328 static int check_chunks_and_extents(struct btrfs_root *root)
8329 {
8330         struct rb_root dev_cache;
8331         struct cache_tree chunk_cache;
8332         struct block_group_tree block_group_cache;
8333         struct device_extent_tree dev_extent_cache;
8334         struct cache_tree extent_cache;
8335         struct cache_tree seen;
8336         struct cache_tree pending;
8337         struct cache_tree reada;
8338         struct cache_tree nodes;
8339         struct extent_io_tree excluded_extents;
8340         struct cache_tree corrupt_blocks;
8341         struct btrfs_path path;
8342         struct btrfs_key key;
8343         struct btrfs_key found_key;
8344         int ret, err = 0;
8345         struct block_info *bits;
8346         int bits_nr;
8347         struct extent_buffer *leaf;
8348         int slot;
8349         struct btrfs_root_item ri;
8350         struct list_head dropping_trees;
8351         struct list_head normal_trees;
8352         struct btrfs_root *root1;
8353         u64 objectid;
8354         u32 level_size;
8355         u8 level;
8356
8357         dev_cache = RB_ROOT;
8358         cache_tree_init(&chunk_cache);
8359         block_group_tree_init(&block_group_cache);
8360         device_extent_tree_init(&dev_extent_cache);
8361
8362         cache_tree_init(&extent_cache);
8363         cache_tree_init(&seen);
8364         cache_tree_init(&pending);
8365         cache_tree_init(&nodes);
8366         cache_tree_init(&reada);
8367         cache_tree_init(&corrupt_blocks);
8368         extent_io_tree_init(&excluded_extents);
8369         INIT_LIST_HEAD(&dropping_trees);
8370         INIT_LIST_HEAD(&normal_trees);
8371
8372         if (repair) {
8373                 root->fs_info->excluded_extents = &excluded_extents;
8374                 root->fs_info->fsck_extent_cache = &extent_cache;
8375                 root->fs_info->free_extent_hook = free_extent_hook;
8376                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8377         }
8378
8379         bits_nr = 1024;
8380         bits = malloc(bits_nr * sizeof(struct block_info));
8381         if (!bits) {
8382                 perror("malloc");
8383                 exit(1);
8384         }
8385
8386         if (ctx.progress_enabled) {
8387                 ctx.tp = TASK_EXTENTS;
8388                 task_start(ctx.info);
8389         }
8390
8391 again:
8392         root1 = root->fs_info->tree_root;
8393         level = btrfs_header_level(root1->node);
8394         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8395                                     root1->node->start, 0, level, 0,
8396                                     root1->nodesize, NULL);
8397         if (ret < 0)
8398                 goto out;
8399         root1 = root->fs_info->chunk_root;
8400         level = btrfs_header_level(root1->node);
8401         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8402                                     root1->node->start, 0, level, 0,
8403                                     root1->nodesize, NULL);
8404         if (ret < 0)
8405                 goto out;
8406         btrfs_init_path(&path);
8407         key.offset = 0;
8408         key.objectid = 0;
8409         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8410         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8411                                         &key, &path, 0, 0);
8412         if (ret < 0)
8413                 goto out;
8414         while(1) {
8415                 leaf = path.nodes[0];
8416                 slot = path.slots[0];
8417                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8418                         ret = btrfs_next_leaf(root, &path);
8419                         if (ret != 0)
8420                                 break;
8421                         leaf = path.nodes[0];
8422                         slot = path.slots[0];
8423                 }
8424                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8425                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8426                         unsigned long offset;
8427                         u64 last_snapshot;
8428
8429                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8430                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8431                         last_snapshot = btrfs_root_last_snapshot(&ri);
8432                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8433                                 level = btrfs_root_level(&ri);
8434                                 level_size = root->nodesize;
8435                                 ret = add_root_item_to_list(&normal_trees,
8436                                                 found_key.objectid,
8437                                                 btrfs_root_bytenr(&ri),
8438                                                 last_snapshot, level,
8439                                                 0, level_size, NULL);
8440                                 if (ret < 0)
8441                                         goto out;
8442                         } else {
8443                                 level = btrfs_root_level(&ri);
8444                                 level_size = root->nodesize;
8445                                 objectid = found_key.objectid;
8446                                 btrfs_disk_key_to_cpu(&found_key,
8447                                                       &ri.drop_progress);
8448                                 ret = add_root_item_to_list(&dropping_trees,
8449                                                 objectid,
8450                                                 btrfs_root_bytenr(&ri),
8451                                                 last_snapshot, level,
8452                                                 ri.drop_level,
8453                                                 level_size, &found_key);
8454                                 if (ret < 0)
8455                                         goto out;
8456                         }
8457                 }
8458                 path.slots[0]++;
8459         }
8460         btrfs_release_path(&path);
8461
8462         /*
8463          * check_block can return -EAGAIN if it fixes something, please keep
8464          * this in mind when dealing with return values from these functions, if
8465          * we get -EAGAIN we want to fall through and restart the loop.
8466          */
8467         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8468                                   &seen, &reada, &nodes, &extent_cache,
8469                                   &chunk_cache, &dev_cache, &block_group_cache,
8470                                   &dev_extent_cache);
8471         if (ret < 0) {
8472                 if (ret == -EAGAIN)
8473                         goto loop;
8474                 goto out;
8475         }
8476         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8477                                   &pending, &seen, &reada, &nodes,
8478                                   &extent_cache, &chunk_cache, &dev_cache,
8479                                   &block_group_cache, &dev_extent_cache);
8480         if (ret < 0) {
8481                 if (ret == -EAGAIN)
8482                         goto loop;
8483                 goto out;
8484         }
8485
8486         ret = check_chunks(&chunk_cache, &block_group_cache,
8487                            &dev_extent_cache, NULL, NULL, NULL, 0);
8488         if (ret) {
8489                 if (ret == -EAGAIN)
8490                         goto loop;
8491                 err = ret;
8492         }
8493
8494         ret = check_extent_refs(root, &extent_cache);
8495         if (ret < 0) {
8496                 if (ret == -EAGAIN)
8497                         goto loop;
8498                 goto out;
8499         }
8500
8501         ret = check_devices(&dev_cache, &dev_extent_cache);
8502         if (ret && err)
8503                 ret = err;
8504
8505 out:
8506         task_stop(ctx.info);
8507         if (repair) {
8508                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8509                 extent_io_tree_cleanup(&excluded_extents);
8510                 root->fs_info->fsck_extent_cache = NULL;
8511                 root->fs_info->free_extent_hook = NULL;
8512                 root->fs_info->corrupt_blocks = NULL;
8513                 root->fs_info->excluded_extents = NULL;
8514         }
8515         free(bits);
8516         free_chunk_cache_tree(&chunk_cache);
8517         free_device_cache_tree(&dev_cache);
8518         free_block_group_tree(&block_group_cache);
8519         free_device_extent_tree(&dev_extent_cache);
8520         free_extent_cache_tree(&seen);
8521         free_extent_cache_tree(&pending);
8522         free_extent_cache_tree(&reada);
8523         free_extent_cache_tree(&nodes);
8524         return ret;
8525 loop:
8526         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8527         free_extent_cache_tree(&seen);
8528         free_extent_cache_tree(&pending);
8529         free_extent_cache_tree(&reada);
8530         free_extent_cache_tree(&nodes);
8531         free_chunk_cache_tree(&chunk_cache);
8532         free_block_group_tree(&block_group_cache);
8533         free_device_cache_tree(&dev_cache);
8534         free_device_extent_tree(&dev_extent_cache);
8535         free_extent_record_cache(root->fs_info, &extent_cache);
8536         free_root_item_list(&normal_trees);
8537         free_root_item_list(&dropping_trees);
8538         extent_io_tree_cleanup(&excluded_extents);
8539         goto again;
8540 }
8541
8542 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8543                            struct btrfs_root *root, int overwrite)
8544 {
8545         struct extent_buffer *c;
8546         struct extent_buffer *old = root->node;
8547         int level;
8548         int ret;
8549         struct btrfs_disk_key disk_key = {0,0,0};
8550
8551         level = 0;
8552
8553         if (overwrite) {
8554                 c = old;
8555                 extent_buffer_get(c);
8556                 goto init;
8557         }
8558         c = btrfs_alloc_free_block(trans, root,
8559                                    root->nodesize,
8560                                    root->root_key.objectid,
8561                                    &disk_key, level, 0, 0);
8562         if (IS_ERR(c)) {
8563                 c = old;
8564                 extent_buffer_get(c);
8565                 overwrite = 1;
8566         }
8567 init:
8568         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8569         btrfs_set_header_level(c, level);
8570         btrfs_set_header_bytenr(c, c->start);
8571         btrfs_set_header_generation(c, trans->transid);
8572         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8573         btrfs_set_header_owner(c, root->root_key.objectid);
8574
8575         write_extent_buffer(c, root->fs_info->fsid,
8576                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8577
8578         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8579                             btrfs_header_chunk_tree_uuid(c),
8580                             BTRFS_UUID_SIZE);
8581
8582         btrfs_mark_buffer_dirty(c);
8583         /*
8584          * this case can happen in the following case:
8585          *
8586          * 1.overwrite previous root.
8587          *
8588          * 2.reinit reloc data root, this is because we skip pin
8589          * down reloc data tree before which means we can allocate
8590          * same block bytenr here.
8591          */
8592         if (old->start == c->start) {
8593                 btrfs_set_root_generation(&root->root_item,
8594                                           trans->transid);
8595                 root->root_item.level = btrfs_header_level(root->node);
8596                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8597                                         &root->root_key, &root->root_item);
8598                 if (ret) {
8599                         free_extent_buffer(c);
8600                         return ret;
8601                 }
8602         }
8603         free_extent_buffer(old);
8604         root->node = c;
8605         add_root_to_dirty_list(root);
8606         return 0;
8607 }
8608
8609 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8610                                 struct extent_buffer *eb, int tree_root)
8611 {
8612         struct extent_buffer *tmp;
8613         struct btrfs_root_item *ri;
8614         struct btrfs_key key;
8615         u64 bytenr;
8616         u32 nodesize;
8617         int level = btrfs_header_level(eb);
8618         int nritems;
8619         int ret;
8620         int i;
8621
8622         /*
8623          * If we have pinned this block before, don't pin it again.
8624          * This can not only avoid forever loop with broken filesystem
8625          * but also give us some speedups.
8626          */
8627         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8628                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8629                 return 0;
8630
8631         btrfs_pin_extent(fs_info, eb->start, eb->len);
8632
8633         nodesize = btrfs_super_nodesize(fs_info->super_copy);
8634         nritems = btrfs_header_nritems(eb);
8635         for (i = 0; i < nritems; i++) {
8636                 if (level == 0) {
8637                         btrfs_item_key_to_cpu(eb, &key, i);
8638                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8639                                 continue;
8640                         /* Skip the extent root and reloc roots */
8641                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8642                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8643                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8644                                 continue;
8645                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8646                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8647
8648                         /*
8649                          * If at any point we start needing the real root we
8650                          * will have to build a stump root for the root we are
8651                          * in, but for now this doesn't actually use the root so
8652                          * just pass in extent_root.
8653                          */
8654                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8655                                               nodesize, 0);
8656                         if (!extent_buffer_uptodate(tmp)) {
8657                                 fprintf(stderr, "Error reading root block\n");
8658                                 return -EIO;
8659                         }
8660                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8661                         free_extent_buffer(tmp);
8662                         if (ret)
8663                                 return ret;
8664                 } else {
8665                         bytenr = btrfs_node_blockptr(eb, i);
8666
8667                         /* If we aren't the tree root don't read the block */
8668                         if (level == 1 && !tree_root) {
8669                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
8670                                 continue;
8671                         }
8672
8673                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8674                                               nodesize, 0);
8675                         if (!extent_buffer_uptodate(tmp)) {
8676                                 fprintf(stderr, "Error reading tree block\n");
8677                                 return -EIO;
8678                         }
8679                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8680                         free_extent_buffer(tmp);
8681                         if (ret)
8682                                 return ret;
8683                 }
8684         }
8685
8686         return 0;
8687 }
8688
8689 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8690 {
8691         int ret;
8692
8693         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8694         if (ret)
8695                 return ret;
8696
8697         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8698 }
8699
8700 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8701 {
8702         struct btrfs_block_group_cache *cache;
8703         struct btrfs_path *path;
8704         struct extent_buffer *leaf;
8705         struct btrfs_chunk *chunk;
8706         struct btrfs_key key;
8707         int ret;
8708         u64 start;
8709
8710         path = btrfs_alloc_path();
8711         if (!path)
8712                 return -ENOMEM;
8713
8714         key.objectid = 0;
8715         key.type = BTRFS_CHUNK_ITEM_KEY;
8716         key.offset = 0;
8717
8718         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8719         if (ret < 0) {
8720                 btrfs_free_path(path);
8721                 return ret;
8722         }
8723
8724         /*
8725          * We do this in case the block groups were screwed up and had alloc
8726          * bits that aren't actually set on the chunks.  This happens with
8727          * restored images every time and could happen in real life I guess.
8728          */
8729         fs_info->avail_data_alloc_bits = 0;
8730         fs_info->avail_metadata_alloc_bits = 0;
8731         fs_info->avail_system_alloc_bits = 0;
8732
8733         /* First we need to create the in-memory block groups */
8734         while (1) {
8735                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8736                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
8737                         if (ret < 0) {
8738                                 btrfs_free_path(path);
8739                                 return ret;
8740                         }
8741                         if (ret) {
8742                                 ret = 0;
8743                                 break;
8744                         }
8745                 }
8746                 leaf = path->nodes[0];
8747                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8748                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8749                         path->slots[0]++;
8750                         continue;
8751                 }
8752
8753                 chunk = btrfs_item_ptr(leaf, path->slots[0],
8754                                        struct btrfs_chunk);
8755                 btrfs_add_block_group(fs_info, 0,
8756                                       btrfs_chunk_type(leaf, chunk),
8757                                       key.objectid, key.offset,
8758                                       btrfs_chunk_length(leaf, chunk));
8759                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8760                                  key.offset + btrfs_chunk_length(leaf, chunk),
8761                                  GFP_NOFS);
8762                 path->slots[0]++;
8763         }
8764         start = 0;
8765         while (1) {
8766                 cache = btrfs_lookup_first_block_group(fs_info, start);
8767                 if (!cache)
8768                         break;
8769                 cache->cached = 1;
8770                 start = cache->key.objectid + cache->key.offset;
8771         }
8772
8773         btrfs_free_path(path);
8774         return 0;
8775 }
8776
8777 static int reset_balance(struct btrfs_trans_handle *trans,
8778                          struct btrfs_fs_info *fs_info)
8779 {
8780         struct btrfs_root *root = fs_info->tree_root;
8781         struct btrfs_path *path;
8782         struct extent_buffer *leaf;
8783         struct btrfs_key key;
8784         int del_slot, del_nr = 0;
8785         int ret;
8786         int found = 0;
8787
8788         path = btrfs_alloc_path();
8789         if (!path)
8790                 return -ENOMEM;
8791
8792         key.objectid = BTRFS_BALANCE_OBJECTID;
8793         key.type = BTRFS_BALANCE_ITEM_KEY;
8794         key.offset = 0;
8795
8796         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8797         if (ret) {
8798                 if (ret > 0)
8799                         ret = 0;
8800                 if (!ret)
8801                         goto reinit_data_reloc;
8802                 else
8803                         goto out;
8804         }
8805
8806         ret = btrfs_del_item(trans, root, path);
8807         if (ret)
8808                 goto out;
8809         btrfs_release_path(path);
8810
8811         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8812         key.type = BTRFS_ROOT_ITEM_KEY;
8813         key.offset = 0;
8814
8815         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8816         if (ret < 0)
8817                 goto out;
8818         while (1) {
8819                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8820                         if (!found)
8821                                 break;
8822
8823                         if (del_nr) {
8824                                 ret = btrfs_del_items(trans, root, path,
8825                                                       del_slot, del_nr);
8826                                 del_nr = 0;
8827                                 if (ret)
8828                                         goto out;
8829                         }
8830                         key.offset++;
8831                         btrfs_release_path(path);
8832
8833                         found = 0;
8834                         ret = btrfs_search_slot(trans, root, &key, path,
8835                                                 -1, 1);
8836                         if (ret < 0)
8837                                 goto out;
8838                         continue;
8839                 }
8840                 found = 1;
8841                 leaf = path->nodes[0];
8842                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8843                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8844                         break;
8845                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8846                         path->slots[0]++;
8847                         continue;
8848                 }
8849                 if (!del_nr) {
8850                         del_slot = path->slots[0];
8851                         del_nr = 1;
8852                 } else {
8853                         del_nr++;
8854                 }
8855                 path->slots[0]++;
8856         }
8857
8858         if (del_nr) {
8859                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8860                 if (ret)
8861                         goto out;
8862         }
8863         btrfs_release_path(path);
8864
8865 reinit_data_reloc:
8866         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8867         key.type = BTRFS_ROOT_ITEM_KEY;
8868         key.offset = (u64)-1;
8869         root = btrfs_read_fs_root(fs_info, &key);
8870         if (IS_ERR(root)) {
8871                 fprintf(stderr, "Error reading data reloc tree\n");
8872                 ret = PTR_ERR(root);
8873                 goto out;
8874         }
8875         record_root_in_trans(trans, root);
8876         ret = btrfs_fsck_reinit_root(trans, root, 0);
8877         if (ret)
8878                 goto out;
8879         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8880 out:
8881         btrfs_free_path(path);
8882         return ret;
8883 }
8884
8885 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8886                               struct btrfs_fs_info *fs_info)
8887 {
8888         u64 start = 0;
8889         int ret;
8890
8891         /*
8892          * The only reason we don't do this is because right now we're just
8893          * walking the trees we find and pinning down their bytes, we don't look
8894          * at any of the leaves.  In order to do mixed groups we'd have to check
8895          * the leaves of any fs roots and pin down the bytes for any file
8896          * extents we find.  Not hard but why do it if we don't have to?
8897          */
8898         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8899                 fprintf(stderr, "We don't support re-initing the extent tree "
8900                         "for mixed block groups yet, please notify a btrfs "
8901                         "developer you want to do this so they can add this "
8902                         "functionality.\n");
8903                 return -EINVAL;
8904         }
8905
8906         /*
8907          * first we need to walk all of the trees except the extent tree and pin
8908          * down the bytes that are in use so we don't overwrite any existing
8909          * metadata.
8910          */
8911         ret = pin_metadata_blocks(fs_info);
8912         if (ret) {
8913                 fprintf(stderr, "error pinning down used bytes\n");
8914                 return ret;
8915         }
8916
8917         /*
8918          * Need to drop all the block groups since we're going to recreate all
8919          * of them again.
8920          */
8921         btrfs_free_block_groups(fs_info);
8922         ret = reset_block_groups(fs_info);
8923         if (ret) {
8924                 fprintf(stderr, "error resetting the block groups\n");
8925                 return ret;
8926         }
8927
8928         /* Ok we can allocate now, reinit the extent root */
8929         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8930         if (ret) {
8931                 fprintf(stderr, "extent root initialization failed\n");
8932                 /*
8933                  * When the transaction code is updated we should end the
8934                  * transaction, but for now progs only knows about commit so
8935                  * just return an error.
8936                  */
8937                 return ret;
8938         }
8939
8940         /*
8941          * Now we have all the in-memory block groups setup so we can make
8942          * allocations properly, and the metadata we care about is safe since we
8943          * pinned all of it above.
8944          */
8945         while (1) {
8946                 struct btrfs_block_group_cache *cache;
8947
8948                 cache = btrfs_lookup_first_block_group(fs_info, start);
8949                 if (!cache)
8950                         break;
8951                 start = cache->key.objectid + cache->key.offset;
8952                 ret = btrfs_insert_item(trans, fs_info->extent_root,
8953                                         &cache->key, &cache->item,
8954                                         sizeof(cache->item));
8955                 if (ret) {
8956                         fprintf(stderr, "Error adding block group\n");
8957                         return ret;
8958                 }
8959                 btrfs_extent_post_op(trans, fs_info->extent_root);
8960         }
8961
8962         ret = reset_balance(trans, fs_info);
8963         if (ret)
8964                 fprintf(stderr, "error resetting the pending balance\n");
8965
8966         return ret;
8967 }
8968
8969 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8970 {
8971         struct btrfs_path *path;
8972         struct btrfs_trans_handle *trans;
8973         struct btrfs_key key;
8974         int ret;
8975
8976         printf("Recowing metadata block %llu\n", eb->start);
8977         key.objectid = btrfs_header_owner(eb);
8978         key.type = BTRFS_ROOT_ITEM_KEY;
8979         key.offset = (u64)-1;
8980
8981         root = btrfs_read_fs_root(root->fs_info, &key);
8982         if (IS_ERR(root)) {
8983                 fprintf(stderr, "Couldn't find owner root %llu\n",
8984                         key.objectid);
8985                 return PTR_ERR(root);
8986         }
8987
8988         path = btrfs_alloc_path();
8989         if (!path)
8990                 return -ENOMEM;
8991
8992         trans = btrfs_start_transaction(root, 1);
8993         if (IS_ERR(trans)) {
8994                 btrfs_free_path(path);
8995                 return PTR_ERR(trans);
8996         }
8997
8998         path->lowest_level = btrfs_header_level(eb);
8999         if (path->lowest_level)
9000                 btrfs_node_key_to_cpu(eb, &key, 0);
9001         else
9002                 btrfs_item_key_to_cpu(eb, &key, 0);
9003
9004         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9005         btrfs_commit_transaction(trans, root);
9006         btrfs_free_path(path);
9007         return ret;
9008 }
9009
9010 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
9011 {
9012         struct btrfs_path *path;
9013         struct btrfs_trans_handle *trans;
9014         struct btrfs_key key;
9015         int ret;
9016
9017         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
9018                bad->key.type, bad->key.offset);
9019         key.objectid = bad->root_id;
9020         key.type = BTRFS_ROOT_ITEM_KEY;
9021         key.offset = (u64)-1;
9022
9023         root = btrfs_read_fs_root(root->fs_info, &key);
9024         if (IS_ERR(root)) {
9025                 fprintf(stderr, "Couldn't find owner root %llu\n",
9026                         key.objectid);
9027                 return PTR_ERR(root);
9028         }
9029
9030         path = btrfs_alloc_path();
9031         if (!path)
9032                 return -ENOMEM;
9033
9034         trans = btrfs_start_transaction(root, 1);
9035         if (IS_ERR(trans)) {
9036                 btrfs_free_path(path);
9037                 return PTR_ERR(trans);
9038         }
9039
9040         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
9041         if (ret) {
9042                 if (ret > 0)
9043                         ret = 0;
9044                 goto out;
9045         }
9046         ret = btrfs_del_item(trans, root, path);
9047 out:
9048         btrfs_commit_transaction(trans, root);
9049         btrfs_free_path(path);
9050         return ret;
9051 }
9052
9053 static int zero_log_tree(struct btrfs_root *root)
9054 {
9055         struct btrfs_trans_handle *trans;
9056         int ret;
9057
9058         trans = btrfs_start_transaction(root, 1);
9059         if (IS_ERR(trans)) {
9060                 ret = PTR_ERR(trans);
9061                 return ret;
9062         }
9063         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
9064         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
9065         ret = btrfs_commit_transaction(trans, root);
9066         return ret;
9067 }
9068
9069 static int populate_csum(struct btrfs_trans_handle *trans,
9070                          struct btrfs_root *csum_root, char *buf, u64 start,
9071                          u64 len)
9072 {
9073         u64 offset = 0;
9074         u64 sectorsize;
9075         int ret = 0;
9076
9077         while (offset < len) {
9078                 sectorsize = csum_root->sectorsize;
9079                 ret = read_extent_data(csum_root, buf, start + offset,
9080                                        &sectorsize, 0);
9081                 if (ret)
9082                         break;
9083                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
9084                                             start + offset, buf, sectorsize);
9085                 if (ret)
9086                         break;
9087                 offset += sectorsize;
9088         }
9089         return ret;
9090 }
9091
9092 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
9093                                       struct btrfs_root *csum_root,
9094                                       struct btrfs_root *cur_root)
9095 {
9096         struct btrfs_path *path;
9097         struct btrfs_key key;
9098         struct extent_buffer *node;
9099         struct btrfs_file_extent_item *fi;
9100         char *buf = NULL;
9101         u64 start = 0;
9102         u64 len = 0;
9103         int slot = 0;
9104         int ret = 0;
9105
9106         path = btrfs_alloc_path();
9107         if (!path)
9108                 return -ENOMEM;
9109         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
9110         if (!buf) {
9111                 ret = -ENOMEM;
9112                 goto out;
9113         }
9114
9115         key.objectid = 0;
9116         key.offset = 0;
9117         key.type = 0;
9118
9119         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
9120         if (ret < 0)
9121                 goto out;
9122         /* Iterate all regular file extents and fill its csum */
9123         while (1) {
9124                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
9125
9126                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9127                         goto next;
9128                 node = path->nodes[0];
9129                 slot = path->slots[0];
9130                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
9131                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
9132                         goto next;
9133                 start = btrfs_file_extent_disk_bytenr(node, fi);
9134                 len = btrfs_file_extent_disk_num_bytes(node, fi);
9135
9136                 ret = populate_csum(trans, csum_root, buf, start, len);
9137                 if (ret == -EEXIST)
9138                         ret = 0;
9139                 if (ret < 0)
9140                         goto out;
9141 next:
9142                 /*
9143                  * TODO: if next leaf is corrupted, jump to nearest next valid
9144                  * leaf.
9145                  */
9146                 ret = btrfs_next_item(cur_root, path);
9147                 if (ret < 0)
9148                         goto out;
9149                 if (ret > 0) {
9150                         ret = 0;
9151                         goto out;
9152                 }
9153         }
9154
9155 out:
9156         btrfs_free_path(path);
9157         free(buf);
9158         return ret;
9159 }
9160
9161 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
9162                                   struct btrfs_root *csum_root)
9163 {
9164         struct btrfs_fs_info *fs_info = csum_root->fs_info;
9165         struct btrfs_path *path;
9166         struct btrfs_root *tree_root = fs_info->tree_root;
9167         struct btrfs_root *cur_root;
9168         struct extent_buffer *node;
9169         struct btrfs_key key;
9170         int slot = 0;
9171         int ret = 0;
9172
9173         path = btrfs_alloc_path();
9174         if (!path)
9175                 return -ENOMEM;
9176
9177         key.objectid = BTRFS_FS_TREE_OBJECTID;
9178         key.offset = 0;
9179         key.type = BTRFS_ROOT_ITEM_KEY;
9180
9181         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
9182         if (ret < 0)
9183                 goto out;
9184         if (ret > 0) {
9185                 ret = -ENOENT;
9186                 goto out;
9187         }
9188
9189         while (1) {
9190                 node = path->nodes[0];
9191                 slot = path->slots[0];
9192                 btrfs_item_key_to_cpu(node, &key, slot);
9193                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
9194                         goto out;
9195                 if (key.type != BTRFS_ROOT_ITEM_KEY)
9196                         goto next;
9197                 if (!is_fstree(key.objectid))
9198                         goto next;
9199                 key.offset = (u64)-1;
9200
9201                 cur_root = btrfs_read_fs_root(fs_info, &key);
9202                 if (IS_ERR(cur_root) || !cur_root) {
9203                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
9204                                 key.objectid);
9205                         goto out;
9206                 }
9207                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
9208                                 cur_root);
9209                 if (ret < 0)
9210                         goto out;
9211 next:
9212                 ret = btrfs_next_item(tree_root, path);
9213                 if (ret > 0) {
9214                         ret = 0;
9215                         goto out;
9216                 }
9217                 if (ret < 0)
9218                         goto out;
9219         }
9220
9221 out:
9222         btrfs_free_path(path);
9223         return ret;
9224 }
9225
9226 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9227                                       struct btrfs_root *csum_root)
9228 {
9229         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9230         struct btrfs_path *path;
9231         struct btrfs_extent_item *ei;
9232         struct extent_buffer *leaf;
9233         char *buf;
9234         struct btrfs_key key;
9235         int ret;
9236
9237         path = btrfs_alloc_path();
9238         if (!path)
9239                 return -ENOMEM;
9240
9241         key.objectid = 0;
9242         key.type = BTRFS_EXTENT_ITEM_KEY;
9243         key.offset = 0;
9244
9245         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9246         if (ret < 0) {
9247                 btrfs_free_path(path);
9248                 return ret;
9249         }
9250
9251         buf = malloc(csum_root->sectorsize);
9252         if (!buf) {
9253                 btrfs_free_path(path);
9254                 return -ENOMEM;
9255         }
9256
9257         while (1) {
9258                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9259                         ret = btrfs_next_leaf(extent_root, path);
9260                         if (ret < 0)
9261                                 break;
9262                         if (ret) {
9263                                 ret = 0;
9264                                 break;
9265                         }
9266                 }
9267                 leaf = path->nodes[0];
9268
9269                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9270                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9271                         path->slots[0]++;
9272                         continue;
9273                 }
9274
9275                 ei = btrfs_item_ptr(leaf, path->slots[0],
9276                                     struct btrfs_extent_item);
9277                 if (!(btrfs_extent_flags(leaf, ei) &
9278                       BTRFS_EXTENT_FLAG_DATA)) {
9279                         path->slots[0]++;
9280                         continue;
9281                 }
9282
9283                 ret = populate_csum(trans, csum_root, buf, key.objectid,
9284                                     key.offset);
9285                 if (ret)
9286                         break;
9287                 path->slots[0]++;
9288         }
9289
9290         btrfs_free_path(path);
9291         free(buf);
9292         return ret;
9293 }
9294
9295 /*
9296  * Recalculate the csum and put it into the csum tree.
9297  *
9298  * Extent tree init will wipe out all the extent info, so in that case, we
9299  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
9300  * will use fs/subvol trees to init the csum tree.
9301  */
9302 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9303                           struct btrfs_root *csum_root,
9304                           int search_fs_tree)
9305 {
9306         if (search_fs_tree)
9307                 return fill_csum_tree_from_fs(trans, csum_root);
9308         else
9309                 return fill_csum_tree_from_extent(trans, csum_root);
9310 }
9311
9312 static void free_roots_info_cache(void)
9313 {
9314         if (!roots_info_cache)
9315                 return;
9316
9317         while (!cache_tree_empty(roots_info_cache)) {
9318                 struct cache_extent *entry;
9319                 struct root_item_info *rii;
9320
9321                 entry = first_cache_extent(roots_info_cache);
9322                 if (!entry)
9323                         break;
9324                 remove_cache_extent(roots_info_cache, entry);
9325                 rii = container_of(entry, struct root_item_info, cache_extent);
9326                 free(rii);
9327         }
9328
9329         free(roots_info_cache);
9330         roots_info_cache = NULL;
9331 }
9332
9333 static int build_roots_info_cache(struct btrfs_fs_info *info)
9334 {
9335         int ret = 0;
9336         struct btrfs_key key;
9337         struct extent_buffer *leaf;
9338         struct btrfs_path *path;
9339
9340         if (!roots_info_cache) {
9341                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9342                 if (!roots_info_cache)
9343                         return -ENOMEM;
9344                 cache_tree_init(roots_info_cache);
9345         }
9346
9347         path = btrfs_alloc_path();
9348         if (!path)
9349                 return -ENOMEM;
9350
9351         key.objectid = 0;
9352         key.type = BTRFS_EXTENT_ITEM_KEY;
9353         key.offset = 0;
9354
9355         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9356         if (ret < 0)
9357                 goto out;
9358         leaf = path->nodes[0];
9359
9360         while (1) {
9361                 struct btrfs_key found_key;
9362                 struct btrfs_extent_item *ei;
9363                 struct btrfs_extent_inline_ref *iref;
9364                 int slot = path->slots[0];
9365                 int type;
9366                 u64 flags;
9367                 u64 root_id;
9368                 u8 level;
9369                 struct cache_extent *entry;
9370                 struct root_item_info *rii;
9371
9372                 if (slot >= btrfs_header_nritems(leaf)) {
9373                         ret = btrfs_next_leaf(info->extent_root, path);
9374                         if (ret < 0) {
9375                                 break;
9376                         } else if (ret) {
9377                                 ret = 0;
9378                                 break;
9379                         }
9380                         leaf = path->nodes[0];
9381                         slot = path->slots[0];
9382                 }
9383
9384                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9385
9386                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9387                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9388                         goto next;
9389
9390                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9391                 flags = btrfs_extent_flags(leaf, ei);
9392
9393                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9394                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9395                         goto next;
9396
9397                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9398                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9399                         level = found_key.offset;
9400                 } else {
9401                         struct btrfs_tree_block_info *binfo;
9402
9403                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
9404                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9405                         level = btrfs_tree_block_level(leaf, binfo);
9406                 }
9407
9408                 /*
9409                  * For a root extent, it must be of the following type and the
9410                  * first (and only one) iref in the item.
9411                  */
9412                 type = btrfs_extent_inline_ref_type(leaf, iref);
9413                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9414                         goto next;
9415
9416                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9417                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9418                 if (!entry) {
9419                         rii = malloc(sizeof(struct root_item_info));
9420                         if (!rii) {
9421                                 ret = -ENOMEM;
9422                                 goto out;
9423                         }
9424                         rii->cache_extent.start = root_id;
9425                         rii->cache_extent.size = 1;
9426                         rii->level = (u8)-1;
9427                         entry = &rii->cache_extent;
9428                         ret = insert_cache_extent(roots_info_cache, entry);
9429                         ASSERT(ret == 0);
9430                 } else {
9431                         rii = container_of(entry, struct root_item_info,
9432                                            cache_extent);
9433                 }
9434
9435                 ASSERT(rii->cache_extent.start == root_id);
9436                 ASSERT(rii->cache_extent.size == 1);
9437
9438                 if (level > rii->level || rii->level == (u8)-1) {
9439                         rii->level = level;
9440                         rii->bytenr = found_key.objectid;
9441                         rii->gen = btrfs_extent_generation(leaf, ei);
9442                         rii->node_count = 1;
9443                 } else if (level == rii->level) {
9444                         rii->node_count++;
9445                 }
9446 next:
9447                 path->slots[0]++;
9448         }
9449
9450 out:
9451         btrfs_free_path(path);
9452
9453         return ret;
9454 }
9455
9456 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9457                                   struct btrfs_path *path,
9458                                   const struct btrfs_key *root_key,
9459                                   const int read_only_mode)
9460 {
9461         const u64 root_id = root_key->objectid;
9462         struct cache_extent *entry;
9463         struct root_item_info *rii;
9464         struct btrfs_root_item ri;
9465         unsigned long offset;
9466
9467         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9468         if (!entry) {
9469                 fprintf(stderr,
9470                         "Error: could not find extent items for root %llu\n",
9471                         root_key->objectid);
9472                 return -ENOENT;
9473         }
9474
9475         rii = container_of(entry, struct root_item_info, cache_extent);
9476         ASSERT(rii->cache_extent.start == root_id);
9477         ASSERT(rii->cache_extent.size == 1);
9478
9479         if (rii->node_count != 1) {
9480                 fprintf(stderr,
9481                         "Error: could not find btree root extent for root %llu\n",
9482                         root_id);
9483                 return -ENOENT;
9484         }
9485
9486         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9487         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9488
9489         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9490             btrfs_root_level(&ri) != rii->level ||
9491             btrfs_root_generation(&ri) != rii->gen) {
9492
9493                 /*
9494                  * If we're in repair mode but our caller told us to not update
9495                  * the root item, i.e. just check if it needs to be updated, don't
9496                  * print this message, since the caller will call us again shortly
9497                  * for the same root item without read only mode (the caller will
9498                  * open a transaction first).
9499                  */
9500                 if (!(read_only_mode && repair))
9501                         fprintf(stderr,
9502                                 "%sroot item for root %llu,"
9503                                 " current bytenr %llu, current gen %llu, current level %u,"
9504                                 " new bytenr %llu, new gen %llu, new level %u\n",
9505                                 (read_only_mode ? "" : "fixing "),
9506                                 root_id,
9507                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9508                                 btrfs_root_level(&ri),
9509                                 rii->bytenr, rii->gen, rii->level);
9510
9511                 if (btrfs_root_generation(&ri) > rii->gen) {
9512                         fprintf(stderr,
9513                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9514                                 root_id, btrfs_root_generation(&ri), rii->gen);
9515                         return -EINVAL;
9516                 }
9517
9518                 if (!read_only_mode) {
9519                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9520                         btrfs_set_root_level(&ri, rii->level);
9521                         btrfs_set_root_generation(&ri, rii->gen);
9522                         write_extent_buffer(path->nodes[0], &ri,
9523                                             offset, sizeof(ri));
9524                 }
9525
9526                 return 1;
9527         }
9528
9529         return 0;
9530 }
9531
9532 /*
9533  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9534  * caused read-only snapshots to be corrupted if they were created at a moment
9535  * when the source subvolume/snapshot had orphan items. The issue was that the
9536  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9537  * node instead of the post orphan cleanup root node.
9538  * So this function, and its callees, just detects and fixes those cases. Even
9539  * though the regression was for read-only snapshots, this function applies to
9540  * any snapshot/subvolume root.
9541  * This must be run before any other repair code - not doing it so, makes other
9542  * repair code delete or modify backrefs in the extent tree for example, which
9543  * will result in an inconsistent fs after repairing the root items.
9544  */
9545 static int repair_root_items(struct btrfs_fs_info *info)
9546 {
9547         struct btrfs_path *path = NULL;
9548         struct btrfs_key key;
9549         struct extent_buffer *leaf;
9550         struct btrfs_trans_handle *trans = NULL;
9551         int ret = 0;
9552         int bad_roots = 0;
9553         int need_trans = 0;
9554
9555         ret = build_roots_info_cache(info);
9556         if (ret)
9557                 goto out;
9558
9559         path = btrfs_alloc_path();
9560         if (!path) {
9561                 ret = -ENOMEM;
9562                 goto out;
9563         }
9564
9565         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9566         key.type = BTRFS_ROOT_ITEM_KEY;
9567         key.offset = 0;
9568
9569 again:
9570         /*
9571          * Avoid opening and committing transactions if a leaf doesn't have
9572          * any root items that need to be fixed, so that we avoid rotating
9573          * backup roots unnecessarily.
9574          */
9575         if (need_trans) {
9576                 trans = btrfs_start_transaction(info->tree_root, 1);
9577                 if (IS_ERR(trans)) {
9578                         ret = PTR_ERR(trans);
9579                         goto out;
9580                 }
9581         }
9582
9583         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9584                                 0, trans ? 1 : 0);
9585         if (ret < 0)
9586                 goto out;
9587         leaf = path->nodes[0];
9588
9589         while (1) {
9590                 struct btrfs_key found_key;
9591
9592                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9593                         int no_more_keys = find_next_key(path, &key);
9594
9595                         btrfs_release_path(path);
9596                         if (trans) {
9597                                 ret = btrfs_commit_transaction(trans,
9598                                                                info->tree_root);
9599                                 trans = NULL;
9600                                 if (ret < 0)
9601                                         goto out;
9602                         }
9603                         need_trans = 0;
9604                         if (no_more_keys)
9605                                 break;
9606                         goto again;
9607                 }
9608
9609                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9610
9611                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9612                         goto next;
9613                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9614                         goto next;
9615
9616                 ret = maybe_repair_root_item(info, path, &found_key,
9617                                              trans ? 0 : 1);
9618                 if (ret < 0)
9619                         goto out;
9620                 if (ret) {
9621                         if (!trans && repair) {
9622                                 need_trans = 1;
9623                                 key = found_key;
9624                                 btrfs_release_path(path);
9625                                 goto again;
9626                         }
9627                         bad_roots++;
9628                 }
9629 next:
9630                 path->slots[0]++;
9631         }
9632         ret = 0;
9633 out:
9634         free_roots_info_cache();
9635         btrfs_free_path(path);
9636         if (trans)
9637                 btrfs_commit_transaction(trans, info->tree_root);
9638         if (ret < 0)
9639                 return ret;
9640
9641         return bad_roots;
9642 }
9643
9644 const char * const cmd_check_usage[] = {
9645         "btrfs check [options] <device>",
9646         "Check structural integrity of a filesystem (unmounted).",
9647         "Check structural integrity of an unmounted filesystem. Verify internal",
9648         "trees' consistency and item connectivity. In the repair mode try to",
9649         "fix the problems found.",
9650         "WARNING: the repair mode is considered dangerous",
9651         "",
9652         "-s|--super <superblock>     use this superblock copy",
9653         "-b|--backup                 use the first valid backup root copy",
9654         "--repair                    try to repair the filesystem",
9655         "--readonly                  run in read-only mode (default)",
9656         "--init-csum-tree            create a new CRC tree",
9657         "--init-extent-tree          create a new extent tree",
9658         "--check-data-csum           verify checksums of data blocks",
9659         "-Q|--qgroup-report           print a report on qgroup consistency",
9660         "-E|--subvol-extents <subvolid>",
9661         "                            print subvolume extents and sharing state",
9662         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
9663         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
9664         "-p|--progress               indicate progress",
9665         NULL
9666 };
9667
9668 int cmd_check(int argc, char **argv)
9669 {
9670         struct cache_tree root_cache;
9671         struct btrfs_root *root;
9672         struct btrfs_fs_info *info;
9673         u64 bytenr = 0;
9674         u64 subvolid = 0;
9675         u64 tree_root_bytenr = 0;
9676         u64 chunk_root_bytenr = 0;
9677         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9678         int ret;
9679         u64 num;
9680         int init_csum_tree = 0;
9681         int readonly = 0;
9682         int qgroup_report = 0;
9683         int qgroups_repaired = 0;
9684         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9685
9686         while(1) {
9687                 int c;
9688                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
9689                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
9690                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE };
9691                 static const struct option long_options[] = {
9692                         { "super", required_argument, NULL, 's' },
9693                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
9694                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
9695                         { "init-csum-tree", no_argument, NULL,
9696                                 GETOPT_VAL_INIT_CSUM },
9697                         { "init-extent-tree", no_argument, NULL,
9698                                 GETOPT_VAL_INIT_EXTENT },
9699                         { "check-data-csum", no_argument, NULL,
9700                                 GETOPT_VAL_CHECK_CSUM },
9701                         { "backup", no_argument, NULL, 'b' },
9702                         { "subvol-extents", required_argument, NULL, 'E' },
9703                         { "qgroup-report", no_argument, NULL, 'Q' },
9704                         { "tree-root", required_argument, NULL, 'r' },
9705                         { "chunk-root", required_argument, NULL,
9706                                 GETOPT_VAL_CHUNK_TREE },
9707                         { "progress", no_argument, NULL, 'p' },
9708                         { NULL, 0, NULL, 0}
9709                 };
9710
9711                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9712                 if (c < 0)
9713                         break;
9714                 switch(c) {
9715                         case 'a': /* ignored */ break;
9716                         case 'b':
9717                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9718                                 break;
9719                         case 's':
9720                                 num = arg_strtou64(optarg);
9721                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9722                                         fprintf(stderr,
9723                                                 "ERROR: super mirror should be less than: %d\n",
9724                                                 BTRFS_SUPER_MIRROR_MAX);
9725                                         exit(1);
9726                                 }
9727                                 bytenr = btrfs_sb_offset(((int)num));
9728                                 printf("using SB copy %llu, bytenr %llu\n", num,
9729                                        (unsigned long long)bytenr);
9730                                 break;
9731                         case 'Q':
9732                                 qgroup_report = 1;
9733                                 break;
9734                         case 'E':
9735                                 subvolid = arg_strtou64(optarg);
9736                                 break;
9737                         case 'r':
9738                                 tree_root_bytenr = arg_strtou64(optarg);
9739                                 break;
9740                         case GETOPT_VAL_CHUNK_TREE:
9741                                 chunk_root_bytenr = arg_strtou64(optarg);
9742                                 break;
9743                         case 'p':
9744                                 ctx.progress_enabled = true;
9745                                 break;
9746                         case '?':
9747                         case 'h':
9748                                 usage(cmd_check_usage);
9749                         case GETOPT_VAL_REPAIR:
9750                                 printf("enabling repair mode\n");
9751                                 repair = 1;
9752                                 ctree_flags |= OPEN_CTREE_WRITES;
9753                                 break;
9754                         case GETOPT_VAL_READONLY:
9755                                 readonly = 1;
9756                                 break;
9757                         case GETOPT_VAL_INIT_CSUM:
9758                                 printf("Creating a new CRC tree\n");
9759                                 init_csum_tree = 1;
9760                                 repair = 1;
9761                                 ctree_flags |= OPEN_CTREE_WRITES;
9762                                 break;
9763                         case GETOPT_VAL_INIT_EXTENT:
9764                                 init_extent_tree = 1;
9765                                 ctree_flags |= (OPEN_CTREE_WRITES |
9766                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9767                                 repair = 1;
9768                                 break;
9769                         case GETOPT_VAL_CHECK_CSUM:
9770                                 check_data_csum = 1;
9771                                 break;
9772                 }
9773         }
9774
9775         if (check_argc_exact(argc - optind, 1))
9776                 usage(cmd_check_usage);
9777
9778         if (ctx.progress_enabled) {
9779                 ctx.tp = TASK_NOTHING;
9780                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9781         }
9782
9783         /* This check is the only reason for --readonly to exist */
9784         if (readonly && repair) {
9785                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9786                 exit(1);
9787         }
9788
9789         radix_tree_init();
9790         cache_tree_init(&root_cache);
9791
9792         if((ret = check_mounted(argv[optind])) < 0) {
9793                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9794                 goto err_out;
9795         } else if(ret) {
9796                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9797                 ret = -EBUSY;
9798                 goto err_out;
9799         }
9800
9801         /* only allow partial opening under repair mode */
9802         if (repair)
9803                 ctree_flags |= OPEN_CTREE_PARTIAL;
9804
9805         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9806                                   chunk_root_bytenr, ctree_flags);
9807         if (!info) {
9808                 fprintf(stderr, "Couldn't open file system\n");
9809                 ret = -EIO;
9810                 goto err_out;
9811         }
9812
9813         global_info = info;
9814         root = info->fs_root;
9815
9816         /*
9817          * repair mode will force us to commit transaction which
9818          * will make us fail to load log tree when mounting.
9819          */
9820         if (repair && btrfs_super_log_root(info->super_copy)) {
9821                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9822                 if (!ret) {
9823                         ret = 1;
9824                         goto close_out;
9825                 }
9826                 ret = zero_log_tree(root);
9827                 if (ret) {
9828                         fprintf(stderr, "fail to zero log tree\n");
9829                         goto close_out;
9830                 }
9831         }
9832
9833         uuid_unparse(info->super_copy->fsid, uuidbuf);
9834         if (qgroup_report) {
9835                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9836                        uuidbuf);
9837                 ret = qgroup_verify_all(info);
9838                 if (ret == 0)
9839                         report_qgroups(1);
9840                 goto close_out;
9841         }
9842         if (subvolid) {
9843                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9844                        subvolid, argv[optind], uuidbuf);
9845                 ret = print_extent_state(info, subvolid);
9846                 goto close_out;
9847         }
9848         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9849
9850         if (!extent_buffer_uptodate(info->tree_root->node) ||
9851             !extent_buffer_uptodate(info->dev_root->node) ||
9852             !extent_buffer_uptodate(info->chunk_root->node)) {
9853                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9854                 ret = -EIO;
9855                 goto close_out;
9856         }
9857
9858         if (init_extent_tree || init_csum_tree) {
9859                 struct btrfs_trans_handle *trans;
9860
9861                 trans = btrfs_start_transaction(info->extent_root, 0);
9862                 if (IS_ERR(trans)) {
9863                         fprintf(stderr, "Error starting transaction\n");
9864                         ret = PTR_ERR(trans);
9865                         goto close_out;
9866                 }
9867
9868                 if (init_extent_tree) {
9869                         printf("Creating a new extent tree\n");
9870                         ret = reinit_extent_tree(trans, info);
9871                         if (ret)
9872                                 goto close_out;
9873                 }
9874
9875                 if (init_csum_tree) {
9876                         fprintf(stderr, "Reinit crc root\n");
9877                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9878                         if (ret) {
9879                                 fprintf(stderr, "crc root initialization failed\n");
9880                                 ret = -EIO;
9881                                 goto close_out;
9882                         }
9883
9884                         ret = fill_csum_tree(trans, info->csum_root,
9885                                              init_extent_tree);
9886                         if (ret) {
9887                                 fprintf(stderr, "crc refilling failed\n");
9888                                 return -EIO;
9889                         }
9890                 }
9891                 /*
9892                  * Ok now we commit and run the normal fsck, which will add
9893                  * extent entries for all of the items it finds.
9894                  */
9895                 ret = btrfs_commit_transaction(trans, info->extent_root);
9896                 if (ret)
9897                         goto close_out;
9898         }
9899         if (!extent_buffer_uptodate(info->extent_root->node)) {
9900                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9901                 ret = -EIO;
9902                 goto close_out;
9903         }
9904         if (!extent_buffer_uptodate(info->csum_root->node)) {
9905                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9906                 ret = -EIO;
9907                 goto close_out;
9908         }
9909
9910         if (!ctx.progress_enabled)
9911                 fprintf(stderr, "checking extents\n");
9912         ret = check_chunks_and_extents(root);
9913         if (ret)
9914                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9915
9916         ret = repair_root_items(info);
9917         if (ret < 0)
9918                 goto close_out;
9919         if (repair) {
9920                 fprintf(stderr, "Fixed %d roots.\n", ret);
9921                 ret = 0;
9922         } else if (ret > 0) {
9923                 fprintf(stderr,
9924                        "Found %d roots with an outdated root item.\n",
9925                        ret);
9926                 fprintf(stderr,
9927                         "Please run a filesystem check with the option --repair to fix them.\n");
9928                 ret = 1;
9929                 goto close_out;
9930         }
9931
9932         if (!ctx.progress_enabled) {
9933                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
9934                         fprintf(stderr, "checking free space tree\n");
9935                 else
9936                         fprintf(stderr, "checking free space cache\n");
9937         }
9938         ret = check_space_cache(root);
9939         if (ret)
9940                 goto out;
9941
9942         /*
9943          * We used to have to have these hole extents in between our real
9944          * extents so if we don't have this flag set we need to make sure there
9945          * are no gaps in the file extents for inodes, otherwise we can just
9946          * ignore it when this happens.
9947          */
9948         no_holes = btrfs_fs_incompat(root->fs_info,
9949                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9950         if (!ctx.progress_enabled)
9951                 fprintf(stderr, "checking fs roots\n");
9952         ret = check_fs_roots(root, &root_cache);
9953         if (ret)
9954                 goto out;
9955
9956         fprintf(stderr, "checking csums\n");
9957         ret = check_csums(root);
9958         if (ret)
9959                 goto out;
9960
9961         fprintf(stderr, "checking root refs\n");
9962         ret = check_root_refs(root, &root_cache);
9963         if (ret)
9964                 goto out;
9965
9966         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9967                 struct extent_buffer *eb;
9968
9969                 eb = list_first_entry(&root->fs_info->recow_ebs,
9970                                       struct extent_buffer, recow);
9971                 list_del_init(&eb->recow);
9972                 ret = recow_extent_buffer(root, eb);
9973                 if (ret)
9974                         break;
9975         }
9976
9977         while (!list_empty(&delete_items)) {
9978                 struct bad_item *bad;
9979
9980                 bad = list_first_entry(&delete_items, struct bad_item, list);
9981                 list_del_init(&bad->list);
9982                 if (repair)
9983                         ret = delete_bad_item(root, bad);
9984                 free(bad);
9985         }
9986
9987         if (info->quota_enabled) {
9988                 int err;
9989                 fprintf(stderr, "checking quota groups\n");
9990                 err = qgroup_verify_all(info);
9991                 if (err)
9992                         goto out;
9993                 report_qgroups(0);
9994                 err = repair_qgroups(info, &qgroups_repaired);
9995                 if (err)
9996                         goto out;
9997         }
9998
9999         if (!list_empty(&root->fs_info->recow_ebs)) {
10000                 fprintf(stderr, "Transid errors in file system\n");
10001                 ret = 1;
10002         }
10003 out:
10004         /* Don't override original ret */
10005         if (!ret && qgroups_repaired)
10006                 ret = qgroups_repaired;
10007
10008         if (found_old_backref) { /*
10009                  * there was a disk format change when mixed
10010                  * backref was in testing tree. The old format
10011                  * existed about one week.
10012                  */
10013                 printf("\n * Found old mixed backref format. "
10014                        "The old format is not supported! *"
10015                        "\n * Please mount the FS in readonly mode, "
10016                        "backup data and re-format the FS. *\n\n");
10017                 ret = 1;
10018         }
10019         printf("found %llu bytes used err is %d\n",
10020                (unsigned long long)bytes_used, ret);
10021         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
10022         printf("total tree bytes: %llu\n",
10023                (unsigned long long)total_btree_bytes);
10024         printf("total fs tree bytes: %llu\n",
10025                (unsigned long long)total_fs_tree_bytes);
10026         printf("total extent tree bytes: %llu\n",
10027                (unsigned long long)total_extent_tree_bytes);
10028         printf("btree space waste bytes: %llu\n",
10029                (unsigned long long)btree_space_waste);
10030         printf("file data blocks allocated: %llu\n referenced %llu\n",
10031                 (unsigned long long)data_bytes_allocated,
10032                 (unsigned long long)data_bytes_referenced);
10033
10034         free_qgroup_counts();
10035         free_root_recs_tree(&root_cache);
10036 close_out:
10037         close_ctree(root);
10038 err_out:
10039         if (ctx.progress_enabled)
10040                 task_deinit(ctx.info);
10041
10042         return ret;
10043 }