btrfs-progs: check: switch to iterating over the backref_tree
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int repair = 0;
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 struct extent_backref {
79         struct rb_node node;
80         unsigned int is_data:1;
81         unsigned int found_extent_tree:1;
82         unsigned int full_backref:1;
83         unsigned int found_ref:1;
84         unsigned int broken:1;
85 };
86
87 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
88 {
89         return rb_entry(node, struct extent_backref, node);
90 }
91
92 struct data_backref {
93         struct extent_backref node;
94         union {
95                 u64 parent;
96                 u64 root;
97         };
98         u64 owner;
99         u64 offset;
100         u64 disk_bytenr;
101         u64 bytes;
102         u64 ram_bytes;
103         u32 num_refs;
104         u32 found_ref;
105 };
106
107 static inline struct data_backref* to_data_backref(struct extent_backref *back)
108 {
109         return container_of(back, struct data_backref, node);
110 }
111
112 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
113 {
114         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
115         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
116         struct data_backref *back1 = to_data_backref(ext1);
117         struct data_backref *back2 = to_data_backref(ext2);
118
119         WARN_ON(!ext1->is_data);
120         WARN_ON(!ext2->is_data);
121
122         /* parent and root are a union, so this covers both */
123         if (back1->parent > back2->parent)
124                 return 1;
125         if (back1->parent < back2->parent)
126                 return -1;
127
128         /* This is a full backref and the parents match. */
129         if (back1->node.full_backref)
130                 return 0;
131
132         if (back1->owner > back2->owner)
133                 return 1;
134         if (back1->owner < back2->owner)
135                 return -1;
136
137         if (back1->offset > back2->offset)
138                 return 1;
139         if (back1->offset < back2->offset)
140                 return -1;
141
142         if (back1->bytes > back2->bytes)
143                 return 1;
144         if (back1->bytes < back2->bytes)
145                 return -1;
146
147         if (back1->found_ref && back2->found_ref) {
148                 if (back1->disk_bytenr > back2->disk_bytenr)
149                         return 1;
150                 if (back1->disk_bytenr < back2->disk_bytenr)
151                         return -1;
152
153                 if (back1->found_ref > back2->found_ref)
154                         return 1;
155                 if (back1->found_ref < back2->found_ref)
156                         return -1;
157         }
158
159         return 0;
160 }
161
162 /*
163  * Much like data_backref, just removed the undetermined members
164  * and change it to use list_head.
165  * During extent scan, it is stored in root->orphan_data_extent.
166  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
167  */
168 struct orphan_data_extent {
169         struct list_head list;
170         u64 root;
171         u64 objectid;
172         u64 offset;
173         u64 disk_bytenr;
174         u64 disk_len;
175 };
176
177 struct tree_backref {
178         struct extent_backref node;
179         union {
180                 u64 parent;
181                 u64 root;
182         };
183 };
184
185 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
186 {
187         return container_of(back, struct tree_backref, node);
188 }
189
190 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
191 {
192         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
193         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
194         struct tree_backref *back1 = to_tree_backref(ext1);
195         struct tree_backref *back2 = to_tree_backref(ext2);
196
197         WARN_ON(ext1->is_data);
198         WARN_ON(ext2->is_data);
199
200         /* parent and root are a union, so this covers both */
201         if (back1->parent > back2->parent)
202                 return 1;
203         if (back1->parent < back2->parent)
204                 return -1;
205
206         return 0;
207 }
208
209 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
210 {
211         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
212         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
213
214         if (ext1->is_data > ext2->is_data)
215                 return 1;
216
217         if (ext1->is_data < ext2->is_data)
218                 return -1;
219
220         if (ext1->full_backref > ext2->full_backref)
221                 return 1;
222         if (ext1->full_backref < ext2->full_backref)
223                 return -1;
224
225         if (ext1->is_data)
226                 return compare_data_backref(node1, node2);
227         else
228                 return compare_tree_backref(node1, node2);
229 }
230
231 /* Explicit initialization for extent_record::flag_block_full_backref */
232 enum { FLAG_UNSET = 2 };
233
234 struct extent_record {
235         struct list_head backrefs;
236         struct list_head dups;
237         struct rb_root backref_tree;
238         struct list_head list;
239         struct cache_extent cache;
240         struct btrfs_disk_key parent_key;
241         u64 start;
242         u64 max_size;
243         u64 nr;
244         u64 refs;
245         u64 extent_item_refs;
246         u64 generation;
247         u64 parent_generation;
248         u64 info_objectid;
249         u32 num_duplicates;
250         u8 info_level;
251         unsigned int flag_block_full_backref:2;
252         unsigned int found_rec:1;
253         unsigned int content_checked:1;
254         unsigned int owner_ref_checked:1;
255         unsigned int is_root:1;
256         unsigned int metadata:1;
257         unsigned int bad_full_backref:1;
258         unsigned int crossing_stripes:1;
259         unsigned int wrong_chunk_type:1;
260 };
261
262 static inline struct extent_record* to_extent_record(struct list_head *entry)
263 {
264         return container_of(entry, struct extent_record, list);
265 }
266
267 struct inode_backref {
268         struct list_head list;
269         unsigned int found_dir_item:1;
270         unsigned int found_dir_index:1;
271         unsigned int found_inode_ref:1;
272         unsigned int filetype:8;
273         int errors;
274         unsigned int ref_type;
275         u64 dir;
276         u64 index;
277         u16 namelen;
278         char name[0];
279 };
280
281 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
282 {
283         return list_entry(entry, struct inode_backref, list);
284 }
285
286 struct root_item_record {
287         struct list_head list;
288         u64 objectid;
289         u64 bytenr;
290         u64 last_snapshot;
291         u8 level;
292         u8 drop_level;
293         int level_size;
294         struct btrfs_key drop_key;
295 };
296
297 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
298 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
299 #define REF_ERR_NO_INODE_REF            (1 << 2)
300 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
301 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
302 #define REF_ERR_DUP_INODE_REF           (1 << 5)
303 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
304 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
305 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
306 #define REF_ERR_NO_ROOT_REF             (1 << 9)
307 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
308 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
309 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
310
311 struct file_extent_hole {
312         struct rb_node node;
313         u64 start;
314         u64 len;
315 };
316
317 struct inode_record {
318         struct list_head backrefs;
319         unsigned int checked:1;
320         unsigned int merging:1;
321         unsigned int found_inode_item:1;
322         unsigned int found_dir_item:1;
323         unsigned int found_file_extent:1;
324         unsigned int found_csum_item:1;
325         unsigned int some_csum_missing:1;
326         unsigned int nodatasum:1;
327         int errors;
328
329         u64 ino;
330         u32 nlink;
331         u32 imode;
332         u64 isize;
333         u64 nbytes;
334
335         u32 found_link;
336         u64 found_size;
337         u64 extent_start;
338         u64 extent_end;
339         struct rb_root holes;
340         struct list_head orphan_extents;
341
342         u32 refs;
343 };
344
345 #define I_ERR_NO_INODE_ITEM             (1 << 0)
346 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
347 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
348 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
349 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
350 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
351 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
352 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
353 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
354 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
355 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
356 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
357 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
358 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
359 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
360
361 struct root_backref {
362         struct list_head list;
363         unsigned int found_dir_item:1;
364         unsigned int found_dir_index:1;
365         unsigned int found_back_ref:1;
366         unsigned int found_forward_ref:1;
367         unsigned int reachable:1;
368         int errors;
369         u64 ref_root;
370         u64 dir;
371         u64 index;
372         u16 namelen;
373         char name[0];
374 };
375
376 static inline struct root_backref* to_root_backref(struct list_head *entry)
377 {
378         return list_entry(entry, struct root_backref, list);
379 }
380
381 struct root_record {
382         struct list_head backrefs;
383         struct cache_extent cache;
384         unsigned int found_root_item:1;
385         u64 objectid;
386         u32 found_ref;
387 };
388
389 struct ptr_node {
390         struct cache_extent cache;
391         void *data;
392 };
393
394 struct shared_node {
395         struct cache_extent cache;
396         struct cache_tree root_cache;
397         struct cache_tree inode_cache;
398         struct inode_record *current;
399         u32 refs;
400 };
401
402 struct block_info {
403         u64 start;
404         u32 size;
405 };
406
407 struct walk_control {
408         struct cache_tree shared;
409         struct shared_node *nodes[BTRFS_MAX_LEVEL];
410         int active_node;
411         int root_level;
412 };
413
414 struct bad_item {
415         struct btrfs_key key;
416         u64 root_id;
417         struct list_head list;
418 };
419
420 struct extent_entry {
421         u64 bytenr;
422         u64 bytes;
423         int count;
424         int broken;
425         struct list_head list;
426 };
427
428 struct root_item_info {
429         /* level of the root */
430         u8 level;
431         /* number of nodes at this level, must be 1 for a root */
432         int node_count;
433         u64 bytenr;
434         u64 gen;
435         struct cache_extent cache_extent;
436 };
437
438 static void *print_status_check(void *p)
439 {
440         struct task_ctx *priv = p;
441         const char work_indicator[] = { '.', 'o', 'O', 'o' };
442         uint32_t count = 0;
443         static char *task_position_string[] = {
444                 "checking extents",
445                 "checking free space cache",
446                 "checking fs roots",
447         };
448
449         task_period_start(priv->info, 1000 /* 1s */);
450
451         if (priv->tp == TASK_NOTHING)
452                 return NULL;
453
454         while (1) {
455                 printf("%s [%c]\r", task_position_string[priv->tp],
456                                 work_indicator[count % 4]);
457                 count++;
458                 fflush(stdout);
459                 task_period_wait(priv->info);
460         }
461         return NULL;
462 }
463
464 static int print_status_return(void *p)
465 {
466         printf("\n");
467         fflush(stdout);
468
469         return 0;
470 }
471
472 /* Compatible function to allow reuse of old codes */
473 static u64 first_extent_gap(struct rb_root *holes)
474 {
475         struct file_extent_hole *hole;
476
477         if (RB_EMPTY_ROOT(holes))
478                 return (u64)-1;
479
480         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
481         return hole->start;
482 }
483
484 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
485 {
486         struct file_extent_hole *hole1;
487         struct file_extent_hole *hole2;
488
489         hole1 = rb_entry(node1, struct file_extent_hole, node);
490         hole2 = rb_entry(node2, struct file_extent_hole, node);
491
492         if (hole1->start > hole2->start)
493                 return -1;
494         if (hole1->start < hole2->start)
495                 return 1;
496         /* Now hole1->start == hole2->start */
497         if (hole1->len >= hole2->len)
498                 /*
499                  * Hole 1 will be merge center
500                  * Same hole will be merged later
501                  */
502                 return -1;
503         /* Hole 2 will be merge center */
504         return 1;
505 }
506
507 /*
508  * Add a hole to the record
509  *
510  * This will do hole merge for copy_file_extent_holes(),
511  * which will ensure there won't be continuous holes.
512  */
513 static int add_file_extent_hole(struct rb_root *holes,
514                                 u64 start, u64 len)
515 {
516         struct file_extent_hole *hole;
517         struct file_extent_hole *prev = NULL;
518         struct file_extent_hole *next = NULL;
519
520         hole = malloc(sizeof(*hole));
521         if (!hole)
522                 return -ENOMEM;
523         hole->start = start;
524         hole->len = len;
525         /* Since compare will not return 0, no -EEXIST will happen */
526         rb_insert(holes, &hole->node, compare_hole);
527
528         /* simple merge with previous hole */
529         if (rb_prev(&hole->node))
530                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
531                                 node);
532         if (prev && prev->start + prev->len >= hole->start) {
533                 hole->len = hole->start + hole->len - prev->start;
534                 hole->start = prev->start;
535                 rb_erase(&prev->node, holes);
536                 free(prev);
537                 prev = NULL;
538         }
539
540         /* iterate merge with next holes */
541         while (1) {
542                 if (!rb_next(&hole->node))
543                         break;
544                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
545                                         node);
546                 if (hole->start + hole->len >= next->start) {
547                         if (hole->start + hole->len <= next->start + next->len)
548                                 hole->len = next->start + next->len -
549                                             hole->start;
550                         rb_erase(&next->node, holes);
551                         free(next);
552                         next = NULL;
553                 } else
554                         break;
555         }
556         return 0;
557 }
558
559 static int compare_hole_range(struct rb_node *node, void *data)
560 {
561         struct file_extent_hole *hole;
562         u64 start;
563
564         hole = (struct file_extent_hole *)data;
565         start = hole->start;
566
567         hole = rb_entry(node, struct file_extent_hole, node);
568         if (start < hole->start)
569                 return -1;
570         if (start >= hole->start && start < hole->start + hole->len)
571                 return 0;
572         return 1;
573 }
574
575 /*
576  * Delete a hole in the record
577  *
578  * This will do the hole split and is much restrict than add.
579  */
580 static int del_file_extent_hole(struct rb_root *holes,
581                                 u64 start, u64 len)
582 {
583         struct file_extent_hole *hole;
584         struct file_extent_hole tmp;
585         u64 prev_start = 0;
586         u64 prev_len = 0;
587         u64 next_start = 0;
588         u64 next_len = 0;
589         struct rb_node *node;
590         int have_prev = 0;
591         int have_next = 0;
592         int ret = 0;
593
594         tmp.start = start;
595         tmp.len = len;
596         node = rb_search(holes, &tmp, compare_hole_range, NULL);
597         if (!node)
598                 return -EEXIST;
599         hole = rb_entry(node, struct file_extent_hole, node);
600         if (start + len > hole->start + hole->len)
601                 return -EEXIST;
602
603         /*
604          * Now there will be no overlap, delete the hole and re-add the
605          * split(s) if they exists.
606          */
607         if (start > hole->start) {
608                 prev_start = hole->start;
609                 prev_len = start - hole->start;
610                 have_prev = 1;
611         }
612         if (hole->start + hole->len > start + len) {
613                 next_start = start + len;
614                 next_len = hole->start + hole->len - start - len;
615                 have_next = 1;
616         }
617         rb_erase(node, holes);
618         free(hole);
619         if (have_prev) {
620                 ret = add_file_extent_hole(holes, prev_start, prev_len);
621                 if (ret < 0)
622                         return ret;
623         }
624         if (have_next) {
625                 ret = add_file_extent_hole(holes, next_start, next_len);
626                 if (ret < 0)
627                         return ret;
628         }
629         return 0;
630 }
631
632 static int copy_file_extent_holes(struct rb_root *dst,
633                                   struct rb_root *src)
634 {
635         struct file_extent_hole *hole;
636         struct rb_node *node;
637         int ret = 0;
638
639         node = rb_first(src);
640         while (node) {
641                 hole = rb_entry(node, struct file_extent_hole, node);
642                 ret = add_file_extent_hole(dst, hole->start, hole->len);
643                 if (ret)
644                         break;
645                 node = rb_next(node);
646         }
647         return ret;
648 }
649
650 static void free_file_extent_holes(struct rb_root *holes)
651 {
652         struct rb_node *node;
653         struct file_extent_hole *hole;
654
655         node = rb_first(holes);
656         while (node) {
657                 hole = rb_entry(node, struct file_extent_hole, node);
658                 rb_erase(node, holes);
659                 free(hole);
660                 node = rb_first(holes);
661         }
662 }
663
664 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
665
666 static void record_root_in_trans(struct btrfs_trans_handle *trans,
667                                  struct btrfs_root *root)
668 {
669         if (root->last_trans != trans->transid) {
670                 root->track_dirty = 1;
671                 root->last_trans = trans->transid;
672                 root->commit_root = root->node;
673                 extent_buffer_get(root->node);
674         }
675 }
676
677 static u8 imode_to_type(u32 imode)
678 {
679 #define S_SHIFT 12
680         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
681                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
682                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
683                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
684                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
685                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
686                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
687                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
688         };
689
690         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
691 #undef S_SHIFT
692 }
693
694 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
695 {
696         struct device_record *rec1;
697         struct device_record *rec2;
698
699         rec1 = rb_entry(node1, struct device_record, node);
700         rec2 = rb_entry(node2, struct device_record, node);
701         if (rec1->devid > rec2->devid)
702                 return -1;
703         else if (rec1->devid < rec2->devid)
704                 return 1;
705         else
706                 return 0;
707 }
708
709 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
710 {
711         struct inode_record *rec;
712         struct inode_backref *backref;
713         struct inode_backref *orig;
714         struct inode_backref *tmp;
715         struct orphan_data_extent *src_orphan;
716         struct orphan_data_extent *dst_orphan;
717         size_t size;
718         int ret;
719
720         rec = malloc(sizeof(*rec));
721         if (!rec)
722                 return ERR_PTR(-ENOMEM);
723         memcpy(rec, orig_rec, sizeof(*rec));
724         rec->refs = 1;
725         INIT_LIST_HEAD(&rec->backrefs);
726         INIT_LIST_HEAD(&rec->orphan_extents);
727         rec->holes = RB_ROOT;
728
729         list_for_each_entry(orig, &orig_rec->backrefs, list) {
730                 size = sizeof(*orig) + orig->namelen + 1;
731                 backref = malloc(size);
732                 if (!backref) {
733                         ret = -ENOMEM;
734                         goto cleanup;
735                 }
736                 memcpy(backref, orig, size);
737                 list_add_tail(&backref->list, &rec->backrefs);
738         }
739         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
740                 dst_orphan = malloc(sizeof(*dst_orphan));
741                 if (!dst_orphan) {
742                         ret = -ENOMEM;
743                         goto cleanup;
744                 }
745                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
746                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
747         }
748         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
749         BUG_ON(ret < 0);
750
751         return rec;
752
753 cleanup:
754         if (!list_empty(&rec->backrefs))
755                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
756                         list_del(&orig->list);
757                         free(orig);
758                 }
759
760         if (!list_empty(&rec->orphan_extents))
761                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
762                         list_del(&orig->list);
763                         free(orig);
764                 }
765
766         free(rec);
767
768         return ERR_PTR(ret);
769 }
770
771 static void print_orphan_data_extents(struct list_head *orphan_extents,
772                                       u64 objectid)
773 {
774         struct orphan_data_extent *orphan;
775
776         if (list_empty(orphan_extents))
777                 return;
778         printf("The following data extent is lost in tree %llu:\n",
779                objectid);
780         list_for_each_entry(orphan, orphan_extents, list) {
781                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
782                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
783                        orphan->disk_len);
784         }
785 }
786
787 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
788 {
789         u64 root_objectid = root->root_key.objectid;
790         int errors = rec->errors;
791
792         if (!errors)
793                 return;
794         /* reloc root errors, we print its corresponding fs root objectid*/
795         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
796                 root_objectid = root->root_key.offset;
797                 fprintf(stderr, "reloc");
798         }
799         fprintf(stderr, "root %llu inode %llu errors %x",
800                 (unsigned long long) root_objectid,
801                 (unsigned long long) rec->ino, rec->errors);
802
803         if (errors & I_ERR_NO_INODE_ITEM)
804                 fprintf(stderr, ", no inode item");
805         if (errors & I_ERR_NO_ORPHAN_ITEM)
806                 fprintf(stderr, ", no orphan item");
807         if (errors & I_ERR_DUP_INODE_ITEM)
808                 fprintf(stderr, ", dup inode item");
809         if (errors & I_ERR_DUP_DIR_INDEX)
810                 fprintf(stderr, ", dup dir index");
811         if (errors & I_ERR_ODD_DIR_ITEM)
812                 fprintf(stderr, ", odd dir item");
813         if (errors & I_ERR_ODD_FILE_EXTENT)
814                 fprintf(stderr, ", odd file extent");
815         if (errors & I_ERR_BAD_FILE_EXTENT)
816                 fprintf(stderr, ", bad file extent");
817         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
818                 fprintf(stderr, ", file extent overlap");
819         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
820                 fprintf(stderr, ", file extent discount");
821         if (errors & I_ERR_DIR_ISIZE_WRONG)
822                 fprintf(stderr, ", dir isize wrong");
823         if (errors & I_ERR_FILE_NBYTES_WRONG)
824                 fprintf(stderr, ", nbytes wrong");
825         if (errors & I_ERR_ODD_CSUM_ITEM)
826                 fprintf(stderr, ", odd csum item");
827         if (errors & I_ERR_SOME_CSUM_MISSING)
828                 fprintf(stderr, ", some csum missing");
829         if (errors & I_ERR_LINK_COUNT_WRONG)
830                 fprintf(stderr, ", link count wrong");
831         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
832                 fprintf(stderr, ", orphan file extent");
833         fprintf(stderr, "\n");
834         /* Print the orphan extents if needed */
835         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
836                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
837
838         /* Print the holes if needed */
839         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
840                 struct file_extent_hole *hole;
841                 struct rb_node *node;
842                 int found = 0;
843
844                 node = rb_first(&rec->holes);
845                 fprintf(stderr, "Found file extent holes:\n");
846                 while (node) {
847                         found = 1;
848                         hole = rb_entry(node, struct file_extent_hole, node);
849                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
850                                 hole->start, hole->len);
851                         node = rb_next(node);
852                 }
853                 if (!found)
854                         fprintf(stderr, "\tstart: 0, len: %llu\n",
855                                 round_up(rec->isize, root->sectorsize));
856         }
857 }
858
859 static void print_ref_error(int errors)
860 {
861         if (errors & REF_ERR_NO_DIR_ITEM)
862                 fprintf(stderr, ", no dir item");
863         if (errors & REF_ERR_NO_DIR_INDEX)
864                 fprintf(stderr, ", no dir index");
865         if (errors & REF_ERR_NO_INODE_REF)
866                 fprintf(stderr, ", no inode ref");
867         if (errors & REF_ERR_DUP_DIR_ITEM)
868                 fprintf(stderr, ", dup dir item");
869         if (errors & REF_ERR_DUP_DIR_INDEX)
870                 fprintf(stderr, ", dup dir index");
871         if (errors & REF_ERR_DUP_INODE_REF)
872                 fprintf(stderr, ", dup inode ref");
873         if (errors & REF_ERR_INDEX_UNMATCH)
874                 fprintf(stderr, ", index mismatch");
875         if (errors & REF_ERR_FILETYPE_UNMATCH)
876                 fprintf(stderr, ", filetype mismatch");
877         if (errors & REF_ERR_NAME_TOO_LONG)
878                 fprintf(stderr, ", name too long");
879         if (errors & REF_ERR_NO_ROOT_REF)
880                 fprintf(stderr, ", no root ref");
881         if (errors & REF_ERR_NO_ROOT_BACKREF)
882                 fprintf(stderr, ", no root backref");
883         if (errors & REF_ERR_DUP_ROOT_REF)
884                 fprintf(stderr, ", dup root ref");
885         if (errors & REF_ERR_DUP_ROOT_BACKREF)
886                 fprintf(stderr, ", dup root backref");
887         fprintf(stderr, "\n");
888 }
889
890 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
891                                           u64 ino, int mod)
892 {
893         struct ptr_node *node;
894         struct cache_extent *cache;
895         struct inode_record *rec = NULL;
896         int ret;
897
898         cache = lookup_cache_extent(inode_cache, ino, 1);
899         if (cache) {
900                 node = container_of(cache, struct ptr_node, cache);
901                 rec = node->data;
902                 if (mod && rec->refs > 1) {
903                         node->data = clone_inode_rec(rec);
904                         if (IS_ERR(node->data))
905                                 return node->data;
906                         rec->refs--;
907                         rec = node->data;
908                 }
909         } else if (mod) {
910                 rec = calloc(1, sizeof(*rec));
911                 if (!rec)
912                         return ERR_PTR(-ENOMEM);
913                 rec->ino = ino;
914                 rec->extent_start = (u64)-1;
915                 rec->refs = 1;
916                 INIT_LIST_HEAD(&rec->backrefs);
917                 INIT_LIST_HEAD(&rec->orphan_extents);
918                 rec->holes = RB_ROOT;
919
920                 node = malloc(sizeof(*node));
921                 if (!node) {
922                         free(rec);
923                         return ERR_PTR(-ENOMEM);
924                 }
925                 node->cache.start = ino;
926                 node->cache.size = 1;
927                 node->data = rec;
928
929                 if (ino == BTRFS_FREE_INO_OBJECTID)
930                         rec->found_link = 1;
931
932                 ret = insert_cache_extent(inode_cache, &node->cache);
933                 if (ret)
934                         return ERR_PTR(-EEXIST);
935         }
936         return rec;
937 }
938
939 static void free_orphan_data_extents(struct list_head *orphan_extents)
940 {
941         struct orphan_data_extent *orphan;
942
943         while (!list_empty(orphan_extents)) {
944                 orphan = list_entry(orphan_extents->next,
945                                     struct orphan_data_extent, list);
946                 list_del(&orphan->list);
947                 free(orphan);
948         }
949 }
950
951 static void free_inode_rec(struct inode_record *rec)
952 {
953         struct inode_backref *backref;
954
955         if (--rec->refs > 0)
956                 return;
957
958         while (!list_empty(&rec->backrefs)) {
959                 backref = to_inode_backref(rec->backrefs.next);
960                 list_del(&backref->list);
961                 free(backref);
962         }
963         free_orphan_data_extents(&rec->orphan_extents);
964         free_file_extent_holes(&rec->holes);
965         free(rec);
966 }
967
968 static int can_free_inode_rec(struct inode_record *rec)
969 {
970         if (!rec->errors && rec->checked && rec->found_inode_item &&
971             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
972                 return 1;
973         return 0;
974 }
975
976 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
977                                  struct inode_record *rec)
978 {
979         struct cache_extent *cache;
980         struct inode_backref *tmp, *backref;
981         struct ptr_node *node;
982         unsigned char filetype;
983
984         if (!rec->found_inode_item)
985                 return;
986
987         filetype = imode_to_type(rec->imode);
988         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
989                 if (backref->found_dir_item && backref->found_dir_index) {
990                         if (backref->filetype != filetype)
991                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
992                         if (!backref->errors && backref->found_inode_ref &&
993                             rec->nlink == rec->found_link) {
994                                 list_del(&backref->list);
995                                 free(backref);
996                         }
997                 }
998         }
999
1000         if (!rec->checked || rec->merging)
1001                 return;
1002
1003         if (S_ISDIR(rec->imode)) {
1004                 if (rec->found_size != rec->isize)
1005                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1006                 if (rec->found_file_extent)
1007                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1008         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1009                 if (rec->found_dir_item)
1010                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1011                 if (rec->found_size != rec->nbytes)
1012                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1013                 if (rec->nlink > 0 && !no_holes &&
1014                     (rec->extent_end < rec->isize ||
1015                      first_extent_gap(&rec->holes) < rec->isize))
1016                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1017         }
1018
1019         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1020                 if (rec->found_csum_item && rec->nodatasum)
1021                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1022                 if (rec->some_csum_missing && !rec->nodatasum)
1023                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1024         }
1025
1026         BUG_ON(rec->refs != 1);
1027         if (can_free_inode_rec(rec)) {
1028                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1029                 node = container_of(cache, struct ptr_node, cache);
1030                 BUG_ON(node->data != rec);
1031                 remove_cache_extent(inode_cache, &node->cache);
1032                 free(node);
1033                 free_inode_rec(rec);
1034         }
1035 }
1036
1037 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1038 {
1039         struct btrfs_path path;
1040         struct btrfs_key key;
1041         int ret;
1042
1043         key.objectid = BTRFS_ORPHAN_OBJECTID;
1044         key.type = BTRFS_ORPHAN_ITEM_KEY;
1045         key.offset = ino;
1046
1047         btrfs_init_path(&path);
1048         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1049         btrfs_release_path(&path);
1050         if (ret > 0)
1051                 ret = -ENOENT;
1052         return ret;
1053 }
1054
1055 static int process_inode_item(struct extent_buffer *eb,
1056                               int slot, struct btrfs_key *key,
1057                               struct shared_node *active_node)
1058 {
1059         struct inode_record *rec;
1060         struct btrfs_inode_item *item;
1061
1062         rec = active_node->current;
1063         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1064         if (rec->found_inode_item) {
1065                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1066                 return 1;
1067         }
1068         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1069         rec->nlink = btrfs_inode_nlink(eb, item);
1070         rec->isize = btrfs_inode_size(eb, item);
1071         rec->nbytes = btrfs_inode_nbytes(eb, item);
1072         rec->imode = btrfs_inode_mode(eb, item);
1073         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1074                 rec->nodatasum = 1;
1075         rec->found_inode_item = 1;
1076         if (rec->nlink == 0)
1077                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1078         maybe_free_inode_rec(&active_node->inode_cache, rec);
1079         return 0;
1080 }
1081
1082 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1083                                                 const char *name,
1084                                                 int namelen, u64 dir)
1085 {
1086         struct inode_backref *backref;
1087
1088         list_for_each_entry(backref, &rec->backrefs, list) {
1089                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1090                         break;
1091                 if (backref->dir != dir || backref->namelen != namelen)
1092                         continue;
1093                 if (memcmp(name, backref->name, namelen))
1094                         continue;
1095                 return backref;
1096         }
1097
1098         backref = malloc(sizeof(*backref) + namelen + 1);
1099         if (!backref)
1100                 return NULL;
1101         memset(backref, 0, sizeof(*backref));
1102         backref->dir = dir;
1103         backref->namelen = namelen;
1104         memcpy(backref->name, name, namelen);
1105         backref->name[namelen] = '\0';
1106         list_add_tail(&backref->list, &rec->backrefs);
1107         return backref;
1108 }
1109
1110 static int add_inode_backref(struct cache_tree *inode_cache,
1111                              u64 ino, u64 dir, u64 index,
1112                              const char *name, int namelen,
1113                              int filetype, int itemtype, int errors)
1114 {
1115         struct inode_record *rec;
1116         struct inode_backref *backref;
1117
1118         rec = get_inode_rec(inode_cache, ino, 1);
1119         BUG_ON(IS_ERR(rec));
1120         backref = get_inode_backref(rec, name, namelen, dir);
1121         BUG_ON(!backref);
1122         if (errors)
1123                 backref->errors |= errors;
1124         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1125                 if (backref->found_dir_index)
1126                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1127                 if (backref->found_inode_ref && backref->index != index)
1128                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1129                 if (backref->found_dir_item && backref->filetype != filetype)
1130                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1131
1132                 backref->index = index;
1133                 backref->filetype = filetype;
1134                 backref->found_dir_index = 1;
1135         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1136                 rec->found_link++;
1137                 if (backref->found_dir_item)
1138                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1139                 if (backref->found_dir_index && backref->filetype != filetype)
1140                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1141
1142                 backref->filetype = filetype;
1143                 backref->found_dir_item = 1;
1144         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1145                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1146                 if (backref->found_inode_ref)
1147                         backref->errors |= REF_ERR_DUP_INODE_REF;
1148                 if (backref->found_dir_index && backref->index != index)
1149                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1150                 else
1151                         backref->index = index;
1152
1153                 backref->ref_type = itemtype;
1154                 backref->found_inode_ref = 1;
1155         } else {
1156                 BUG_ON(1);
1157         }
1158
1159         maybe_free_inode_rec(inode_cache, rec);
1160         return 0;
1161 }
1162
1163 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1164                             struct cache_tree *dst_cache)
1165 {
1166         struct inode_backref *backref;
1167         u32 dir_count = 0;
1168         int ret = 0;
1169
1170         dst->merging = 1;
1171         list_for_each_entry(backref, &src->backrefs, list) {
1172                 if (backref->found_dir_index) {
1173                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1174                                         backref->index, backref->name,
1175                                         backref->namelen, backref->filetype,
1176                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1177                 }
1178                 if (backref->found_dir_item) {
1179                         dir_count++;
1180                         add_inode_backref(dst_cache, dst->ino,
1181                                         backref->dir, 0, backref->name,
1182                                         backref->namelen, backref->filetype,
1183                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1184                 }
1185                 if (backref->found_inode_ref) {
1186                         add_inode_backref(dst_cache, dst->ino,
1187                                         backref->dir, backref->index,
1188                                         backref->name, backref->namelen, 0,
1189                                         backref->ref_type, backref->errors);
1190                 }
1191         }
1192
1193         if (src->found_dir_item)
1194                 dst->found_dir_item = 1;
1195         if (src->found_file_extent)
1196                 dst->found_file_extent = 1;
1197         if (src->found_csum_item)
1198                 dst->found_csum_item = 1;
1199         if (src->some_csum_missing)
1200                 dst->some_csum_missing = 1;
1201         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1202                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1203                 if (ret < 0)
1204                         return ret;
1205         }
1206
1207         BUG_ON(src->found_link < dir_count);
1208         dst->found_link += src->found_link - dir_count;
1209         dst->found_size += src->found_size;
1210         if (src->extent_start != (u64)-1) {
1211                 if (dst->extent_start == (u64)-1) {
1212                         dst->extent_start = src->extent_start;
1213                         dst->extent_end = src->extent_end;
1214                 } else {
1215                         if (dst->extent_end > src->extent_start)
1216                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1217                         else if (dst->extent_end < src->extent_start) {
1218                                 ret = add_file_extent_hole(&dst->holes,
1219                                         dst->extent_end,
1220                                         src->extent_start - dst->extent_end);
1221                         }
1222                         if (dst->extent_end < src->extent_end)
1223                                 dst->extent_end = src->extent_end;
1224                 }
1225         }
1226
1227         dst->errors |= src->errors;
1228         if (src->found_inode_item) {
1229                 if (!dst->found_inode_item) {
1230                         dst->nlink = src->nlink;
1231                         dst->isize = src->isize;
1232                         dst->nbytes = src->nbytes;
1233                         dst->imode = src->imode;
1234                         dst->nodatasum = src->nodatasum;
1235                         dst->found_inode_item = 1;
1236                 } else {
1237                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1238                 }
1239         }
1240         dst->merging = 0;
1241
1242         return 0;
1243 }
1244
1245 static int splice_shared_node(struct shared_node *src_node,
1246                               struct shared_node *dst_node)
1247 {
1248         struct cache_extent *cache;
1249         struct ptr_node *node, *ins;
1250         struct cache_tree *src, *dst;
1251         struct inode_record *rec, *conflict;
1252         u64 current_ino = 0;
1253         int splice = 0;
1254         int ret;
1255
1256         if (--src_node->refs == 0)
1257                 splice = 1;
1258         if (src_node->current)
1259                 current_ino = src_node->current->ino;
1260
1261         src = &src_node->root_cache;
1262         dst = &dst_node->root_cache;
1263 again:
1264         cache = search_cache_extent(src, 0);
1265         while (cache) {
1266                 node = container_of(cache, struct ptr_node, cache);
1267                 rec = node->data;
1268                 cache = next_cache_extent(cache);
1269
1270                 if (splice) {
1271                         remove_cache_extent(src, &node->cache);
1272                         ins = node;
1273                 } else {
1274                         ins = malloc(sizeof(*ins));
1275                         BUG_ON(!ins);
1276                         ins->cache.start = node->cache.start;
1277                         ins->cache.size = node->cache.size;
1278                         ins->data = rec;
1279                         rec->refs++;
1280                 }
1281                 ret = insert_cache_extent(dst, &ins->cache);
1282                 if (ret == -EEXIST) {
1283                         conflict = get_inode_rec(dst, rec->ino, 1);
1284                         BUG_ON(IS_ERR(conflict));
1285                         merge_inode_recs(rec, conflict, dst);
1286                         if (rec->checked) {
1287                                 conflict->checked = 1;
1288                                 if (dst_node->current == conflict)
1289                                         dst_node->current = NULL;
1290                         }
1291                         maybe_free_inode_rec(dst, conflict);
1292                         free_inode_rec(rec);
1293                         free(ins);
1294                 } else {
1295                         BUG_ON(ret);
1296                 }
1297         }
1298
1299         if (src == &src_node->root_cache) {
1300                 src = &src_node->inode_cache;
1301                 dst = &dst_node->inode_cache;
1302                 goto again;
1303         }
1304
1305         if (current_ino > 0 && (!dst_node->current ||
1306             current_ino > dst_node->current->ino)) {
1307                 if (dst_node->current) {
1308                         dst_node->current->checked = 1;
1309                         maybe_free_inode_rec(dst, dst_node->current);
1310                 }
1311                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1312                 BUG_ON(IS_ERR(dst_node->current));
1313         }
1314         return 0;
1315 }
1316
1317 static void free_inode_ptr(struct cache_extent *cache)
1318 {
1319         struct ptr_node *node;
1320         struct inode_record *rec;
1321
1322         node = container_of(cache, struct ptr_node, cache);
1323         rec = node->data;
1324         free_inode_rec(rec);
1325         free(node);
1326 }
1327
1328 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1329
1330 static struct shared_node *find_shared_node(struct cache_tree *shared,
1331                                             u64 bytenr)
1332 {
1333         struct cache_extent *cache;
1334         struct shared_node *node;
1335
1336         cache = lookup_cache_extent(shared, bytenr, 1);
1337         if (cache) {
1338                 node = container_of(cache, struct shared_node, cache);
1339                 return node;
1340         }
1341         return NULL;
1342 }
1343
1344 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1345 {
1346         int ret;
1347         struct shared_node *node;
1348
1349         node = calloc(1, sizeof(*node));
1350         if (!node)
1351                 return -ENOMEM;
1352         node->cache.start = bytenr;
1353         node->cache.size = 1;
1354         cache_tree_init(&node->root_cache);
1355         cache_tree_init(&node->inode_cache);
1356         node->refs = refs;
1357
1358         ret = insert_cache_extent(shared, &node->cache);
1359
1360         return ret;
1361 }
1362
1363 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1364                              struct walk_control *wc, int level)
1365 {
1366         struct shared_node *node;
1367         struct shared_node *dest;
1368         int ret;
1369
1370         if (level == wc->active_node)
1371                 return 0;
1372
1373         BUG_ON(wc->active_node <= level);
1374         node = find_shared_node(&wc->shared, bytenr);
1375         if (!node) {
1376                 ret = add_shared_node(&wc->shared, bytenr, refs);
1377                 BUG_ON(ret);
1378                 node = find_shared_node(&wc->shared, bytenr);
1379                 wc->nodes[level] = node;
1380                 wc->active_node = level;
1381                 return 0;
1382         }
1383
1384         if (wc->root_level == wc->active_node &&
1385             btrfs_root_refs(&root->root_item) == 0) {
1386                 if (--node->refs == 0) {
1387                         free_inode_recs_tree(&node->root_cache);
1388                         free_inode_recs_tree(&node->inode_cache);
1389                         remove_cache_extent(&wc->shared, &node->cache);
1390                         free(node);
1391                 }
1392                 return 1;
1393         }
1394
1395         dest = wc->nodes[wc->active_node];
1396         splice_shared_node(node, dest);
1397         if (node->refs == 0) {
1398                 remove_cache_extent(&wc->shared, &node->cache);
1399                 free(node);
1400         }
1401         return 1;
1402 }
1403
1404 static int leave_shared_node(struct btrfs_root *root,
1405                              struct walk_control *wc, int level)
1406 {
1407         struct shared_node *node;
1408         struct shared_node *dest;
1409         int i;
1410
1411         if (level == wc->root_level)
1412                 return 0;
1413
1414         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1415                 if (wc->nodes[i])
1416                         break;
1417         }
1418         BUG_ON(i >= BTRFS_MAX_LEVEL);
1419
1420         node = wc->nodes[wc->active_node];
1421         wc->nodes[wc->active_node] = NULL;
1422         wc->active_node = i;
1423
1424         dest = wc->nodes[wc->active_node];
1425         if (wc->active_node < wc->root_level ||
1426             btrfs_root_refs(&root->root_item) > 0) {
1427                 BUG_ON(node->refs <= 1);
1428                 splice_shared_node(node, dest);
1429         } else {
1430                 BUG_ON(node->refs < 2);
1431                 node->refs--;
1432         }
1433         return 0;
1434 }
1435
1436 /*
1437  * Returns:
1438  * < 0 - on error
1439  * 1   - if the root with id child_root_id is a child of root parent_root_id
1440  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1441  *       has other root(s) as parent(s)
1442  * 2   - if the root child_root_id doesn't have any parent roots
1443  */
1444 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1445                          u64 child_root_id)
1446 {
1447         struct btrfs_path path;
1448         struct btrfs_key key;
1449         struct extent_buffer *leaf;
1450         int has_parent = 0;
1451         int ret;
1452
1453         btrfs_init_path(&path);
1454
1455         key.objectid = parent_root_id;
1456         key.type = BTRFS_ROOT_REF_KEY;
1457         key.offset = child_root_id;
1458         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1459                                 0, 0);
1460         if (ret < 0)
1461                 return ret;
1462         btrfs_release_path(&path);
1463         if (!ret)
1464                 return 1;
1465
1466         key.objectid = child_root_id;
1467         key.type = BTRFS_ROOT_BACKREF_KEY;
1468         key.offset = 0;
1469         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1470                                 0, 0);
1471         if (ret < 0)
1472                 goto out;
1473
1474         while (1) {
1475                 leaf = path.nodes[0];
1476                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1477                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1478                         if (ret)
1479                                 break;
1480                         leaf = path.nodes[0];
1481                 }
1482
1483                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1484                 if (key.objectid != child_root_id ||
1485                     key.type != BTRFS_ROOT_BACKREF_KEY)
1486                         break;
1487
1488                 has_parent = 1;
1489
1490                 if (key.offset == parent_root_id) {
1491                         btrfs_release_path(&path);
1492                         return 1;
1493                 }
1494
1495                 path.slots[0]++;
1496         }
1497 out:
1498         btrfs_release_path(&path);
1499         if (ret < 0)
1500                 return ret;
1501         return has_parent ? 0 : 2;
1502 }
1503
1504 static int process_dir_item(struct btrfs_root *root,
1505                             struct extent_buffer *eb,
1506                             int slot, struct btrfs_key *key,
1507                             struct shared_node *active_node)
1508 {
1509         u32 total;
1510         u32 cur = 0;
1511         u32 len;
1512         u32 name_len;
1513         u32 data_len;
1514         int error;
1515         int nritems = 0;
1516         int filetype;
1517         struct btrfs_dir_item *di;
1518         struct inode_record *rec;
1519         struct cache_tree *root_cache;
1520         struct cache_tree *inode_cache;
1521         struct btrfs_key location;
1522         char namebuf[BTRFS_NAME_LEN];
1523
1524         root_cache = &active_node->root_cache;
1525         inode_cache = &active_node->inode_cache;
1526         rec = active_node->current;
1527         rec->found_dir_item = 1;
1528
1529         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1530         total = btrfs_item_size_nr(eb, slot);
1531         while (cur < total) {
1532                 nritems++;
1533                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1534                 name_len = btrfs_dir_name_len(eb, di);
1535                 data_len = btrfs_dir_data_len(eb, di);
1536                 filetype = btrfs_dir_type(eb, di);
1537
1538                 rec->found_size += name_len;
1539                 if (name_len <= BTRFS_NAME_LEN) {
1540                         len = name_len;
1541                         error = 0;
1542                 } else {
1543                         len = BTRFS_NAME_LEN;
1544                         error = REF_ERR_NAME_TOO_LONG;
1545                 }
1546                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1547
1548                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1549                         add_inode_backref(inode_cache, location.objectid,
1550                                           key->objectid, key->offset, namebuf,
1551                                           len, filetype, key->type, error);
1552                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1553                         add_inode_backref(root_cache, location.objectid,
1554                                           key->objectid, key->offset,
1555                                           namebuf, len, filetype,
1556                                           key->type, error);
1557                 } else {
1558                         fprintf(stderr, "invalid location in dir item %u\n",
1559                                 location.type);
1560                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1561                                           key->objectid, key->offset, namebuf,
1562                                           len, filetype, key->type, error);
1563                 }
1564
1565                 len = sizeof(*di) + name_len + data_len;
1566                 di = (struct btrfs_dir_item *)((char *)di + len);
1567                 cur += len;
1568         }
1569         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1570                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1571
1572         return 0;
1573 }
1574
1575 static int process_inode_ref(struct extent_buffer *eb,
1576                              int slot, struct btrfs_key *key,
1577                              struct shared_node *active_node)
1578 {
1579         u32 total;
1580         u32 cur = 0;
1581         u32 len;
1582         u32 name_len;
1583         u64 index;
1584         int error;
1585         struct cache_tree *inode_cache;
1586         struct btrfs_inode_ref *ref;
1587         char namebuf[BTRFS_NAME_LEN];
1588
1589         inode_cache = &active_node->inode_cache;
1590
1591         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1592         total = btrfs_item_size_nr(eb, slot);
1593         while (cur < total) {
1594                 name_len = btrfs_inode_ref_name_len(eb, ref);
1595                 index = btrfs_inode_ref_index(eb, ref);
1596                 if (name_len <= BTRFS_NAME_LEN) {
1597                         len = name_len;
1598                         error = 0;
1599                 } else {
1600                         len = BTRFS_NAME_LEN;
1601                         error = REF_ERR_NAME_TOO_LONG;
1602                 }
1603                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1604                 add_inode_backref(inode_cache, key->objectid, key->offset,
1605                                   index, namebuf, len, 0, key->type, error);
1606
1607                 len = sizeof(*ref) + name_len;
1608                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1609                 cur += len;
1610         }
1611         return 0;
1612 }
1613
1614 static int process_inode_extref(struct extent_buffer *eb,
1615                                 int slot, struct btrfs_key *key,
1616                                 struct shared_node *active_node)
1617 {
1618         u32 total;
1619         u32 cur = 0;
1620         u32 len;
1621         u32 name_len;
1622         u64 index;
1623         u64 parent;
1624         int error;
1625         struct cache_tree *inode_cache;
1626         struct btrfs_inode_extref *extref;
1627         char namebuf[BTRFS_NAME_LEN];
1628
1629         inode_cache = &active_node->inode_cache;
1630
1631         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1632         total = btrfs_item_size_nr(eb, slot);
1633         while (cur < total) {
1634                 name_len = btrfs_inode_extref_name_len(eb, extref);
1635                 index = btrfs_inode_extref_index(eb, extref);
1636                 parent = btrfs_inode_extref_parent(eb, extref);
1637                 if (name_len <= BTRFS_NAME_LEN) {
1638                         len = name_len;
1639                         error = 0;
1640                 } else {
1641                         len = BTRFS_NAME_LEN;
1642                         error = REF_ERR_NAME_TOO_LONG;
1643                 }
1644                 read_extent_buffer(eb, namebuf,
1645                                    (unsigned long)(extref + 1), len);
1646                 add_inode_backref(inode_cache, key->objectid, parent,
1647                                   index, namebuf, len, 0, key->type, error);
1648
1649                 len = sizeof(*extref) + name_len;
1650                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1651                 cur += len;
1652         }
1653         return 0;
1654
1655 }
1656
1657 static int count_csum_range(struct btrfs_root *root, u64 start,
1658                             u64 len, u64 *found)
1659 {
1660         struct btrfs_key key;
1661         struct btrfs_path path;
1662         struct extent_buffer *leaf;
1663         int ret;
1664         size_t size;
1665         *found = 0;
1666         u64 csum_end;
1667         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1668
1669         btrfs_init_path(&path);
1670
1671         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1672         key.offset = start;
1673         key.type = BTRFS_EXTENT_CSUM_KEY;
1674
1675         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1676                                 &key, &path, 0, 0);
1677         if (ret < 0)
1678                 goto out;
1679         if (ret > 0 && path.slots[0] > 0) {
1680                 leaf = path.nodes[0];
1681                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1682                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1683                     key.type == BTRFS_EXTENT_CSUM_KEY)
1684                         path.slots[0]--;
1685         }
1686
1687         while (len > 0) {
1688                 leaf = path.nodes[0];
1689                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1690                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1691                         if (ret > 0)
1692                                 break;
1693                         else if (ret < 0)
1694                                 goto out;
1695                         leaf = path.nodes[0];
1696                 }
1697
1698                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1699                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1700                     key.type != BTRFS_EXTENT_CSUM_KEY)
1701                         break;
1702
1703                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1704                 if (key.offset >= start + len)
1705                         break;
1706
1707                 if (key.offset > start)
1708                         start = key.offset;
1709
1710                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1711                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1712                 if (csum_end > start) {
1713                         size = min(csum_end - start, len);
1714                         len -= size;
1715                         start += size;
1716                         *found += size;
1717                 }
1718
1719                 path.slots[0]++;
1720         }
1721 out:
1722         btrfs_release_path(&path);
1723         if (ret < 0)
1724                 return ret;
1725         return 0;
1726 }
1727
1728 static int process_file_extent(struct btrfs_root *root,
1729                                 struct extent_buffer *eb,
1730                                 int slot, struct btrfs_key *key,
1731                                 struct shared_node *active_node)
1732 {
1733         struct inode_record *rec;
1734         struct btrfs_file_extent_item *fi;
1735         u64 num_bytes = 0;
1736         u64 disk_bytenr = 0;
1737         u64 extent_offset = 0;
1738         u64 mask = root->sectorsize - 1;
1739         int extent_type;
1740         int ret;
1741
1742         rec = active_node->current;
1743         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1744         rec->found_file_extent = 1;
1745
1746         if (rec->extent_start == (u64)-1) {
1747                 rec->extent_start = key->offset;
1748                 rec->extent_end = key->offset;
1749         }
1750
1751         if (rec->extent_end > key->offset)
1752                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1753         else if (rec->extent_end < key->offset) {
1754                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1755                                            key->offset - rec->extent_end);
1756                 if (ret < 0)
1757                         return ret;
1758         }
1759
1760         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1761         extent_type = btrfs_file_extent_type(eb, fi);
1762
1763         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1764                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1765                 if (num_bytes == 0)
1766                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1767                 rec->found_size += num_bytes;
1768                 num_bytes = (num_bytes + mask) & ~mask;
1769         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1770                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1771                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1772                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1773                 extent_offset = btrfs_file_extent_offset(eb, fi);
1774                 if (num_bytes == 0 || (num_bytes & mask))
1775                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1776                 if (num_bytes + extent_offset >
1777                     btrfs_file_extent_ram_bytes(eb, fi))
1778                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1779                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1780                     (btrfs_file_extent_compression(eb, fi) ||
1781                      btrfs_file_extent_encryption(eb, fi) ||
1782                      btrfs_file_extent_other_encoding(eb, fi)))
1783                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1784                 if (disk_bytenr > 0)
1785                         rec->found_size += num_bytes;
1786         } else {
1787                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1788         }
1789         rec->extent_end = key->offset + num_bytes;
1790
1791         /*
1792          * The data reloc tree will copy full extents into its inode and then
1793          * copy the corresponding csums.  Because the extent it copied could be
1794          * a preallocated extent that hasn't been written to yet there may be no
1795          * csums to copy, ergo we won't have csums for our file extent.  This is
1796          * ok so just don't bother checking csums if the inode belongs to the
1797          * data reloc tree.
1798          */
1799         if (disk_bytenr > 0 &&
1800             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1801                 u64 found;
1802                 if (btrfs_file_extent_compression(eb, fi))
1803                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1804                 else
1805                         disk_bytenr += extent_offset;
1806
1807                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1808                 if (ret < 0)
1809                         return ret;
1810                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1811                         if (found > 0)
1812                                 rec->found_csum_item = 1;
1813                         if (found < num_bytes)
1814                                 rec->some_csum_missing = 1;
1815                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1816                         if (found > 0)
1817                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1818                 }
1819         }
1820         return 0;
1821 }
1822
1823 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1824                             struct walk_control *wc)
1825 {
1826         struct btrfs_key key;
1827         u32 nritems;
1828         int i;
1829         int ret = 0;
1830         struct cache_tree *inode_cache;
1831         struct shared_node *active_node;
1832
1833         if (wc->root_level == wc->active_node &&
1834             btrfs_root_refs(&root->root_item) == 0)
1835                 return 0;
1836
1837         active_node = wc->nodes[wc->active_node];
1838         inode_cache = &active_node->inode_cache;
1839         nritems = btrfs_header_nritems(eb);
1840         for (i = 0; i < nritems; i++) {
1841                 btrfs_item_key_to_cpu(eb, &key, i);
1842
1843                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1844                         continue;
1845                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1846                         continue;
1847
1848                 if (active_node->current == NULL ||
1849                     active_node->current->ino < key.objectid) {
1850                         if (active_node->current) {
1851                                 active_node->current->checked = 1;
1852                                 maybe_free_inode_rec(inode_cache,
1853                                                      active_node->current);
1854                         }
1855                         active_node->current = get_inode_rec(inode_cache,
1856                                                              key.objectid, 1);
1857                         BUG_ON(IS_ERR(active_node->current));
1858                 }
1859                 switch (key.type) {
1860                 case BTRFS_DIR_ITEM_KEY:
1861                 case BTRFS_DIR_INDEX_KEY:
1862                         ret = process_dir_item(root, eb, i, &key, active_node);
1863                         break;
1864                 case BTRFS_INODE_REF_KEY:
1865                         ret = process_inode_ref(eb, i, &key, active_node);
1866                         break;
1867                 case BTRFS_INODE_EXTREF_KEY:
1868                         ret = process_inode_extref(eb, i, &key, active_node);
1869                         break;
1870                 case BTRFS_INODE_ITEM_KEY:
1871                         ret = process_inode_item(eb, i, &key, active_node);
1872                         break;
1873                 case BTRFS_EXTENT_DATA_KEY:
1874                         ret = process_file_extent(root, eb, i, &key,
1875                                                   active_node);
1876                         break;
1877                 default:
1878                         break;
1879                 };
1880         }
1881         return ret;
1882 }
1883
1884 static void reada_walk_down(struct btrfs_root *root,
1885                             struct extent_buffer *node, int slot)
1886 {
1887         u64 bytenr;
1888         u64 ptr_gen;
1889         u32 nritems;
1890         u32 blocksize;
1891         int i;
1892         int level;
1893
1894         level = btrfs_header_level(node);
1895         if (level != 1)
1896                 return;
1897
1898         nritems = btrfs_header_nritems(node);
1899         blocksize = root->nodesize;
1900         for (i = slot; i < nritems; i++) {
1901                 bytenr = btrfs_node_blockptr(node, i);
1902                 ptr_gen = btrfs_node_ptr_generation(node, i);
1903                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1904         }
1905 }
1906
1907 /*
1908  * Check the child node/leaf by the following condition:
1909  * 1. the first item key of the node/leaf should be the same with the one
1910  *    in parent.
1911  * 2. block in parent node should match the child node/leaf.
1912  * 3. generation of parent node and child's header should be consistent.
1913  *
1914  * Or the child node/leaf pointed by the key in parent is not valid.
1915  *
1916  * We hope to check leaf owner too, but since subvol may share leaves,
1917  * which makes leaf owner check not so strong, key check should be
1918  * sufficient enough for that case.
1919  */
1920 static int check_child_node(struct btrfs_root *root,
1921                             struct extent_buffer *parent, int slot,
1922                             struct extent_buffer *child)
1923 {
1924         struct btrfs_key parent_key;
1925         struct btrfs_key child_key;
1926         int ret = 0;
1927
1928         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1929         if (btrfs_header_level(child) == 0)
1930                 btrfs_item_key_to_cpu(child, &child_key, 0);
1931         else
1932                 btrfs_node_key_to_cpu(child, &child_key, 0);
1933
1934         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1935                 ret = -EINVAL;
1936                 fprintf(stderr,
1937                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1938                         parent_key.objectid, parent_key.type, parent_key.offset,
1939                         child_key.objectid, child_key.type, child_key.offset);
1940         }
1941         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1942                 ret = -EINVAL;
1943                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1944                         btrfs_node_blockptr(parent, slot),
1945                         btrfs_header_bytenr(child));
1946         }
1947         if (btrfs_node_ptr_generation(parent, slot) !=
1948             btrfs_header_generation(child)) {
1949                 ret = -EINVAL;
1950                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1951                         btrfs_header_generation(child),
1952                         btrfs_node_ptr_generation(parent, slot));
1953         }
1954         return ret;
1955 }
1956
1957 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1958                           struct walk_control *wc, int *level)
1959 {
1960         enum btrfs_tree_block_status status;
1961         u64 bytenr;
1962         u64 ptr_gen;
1963         struct extent_buffer *next;
1964         struct extent_buffer *cur;
1965         u32 blocksize;
1966         int ret, err = 0;
1967         u64 refs;
1968
1969         WARN_ON(*level < 0);
1970         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1971         ret = btrfs_lookup_extent_info(NULL, root,
1972                                        path->nodes[*level]->start,
1973                                        *level, 1, &refs, NULL);
1974         if (ret < 0) {
1975                 err = ret;
1976                 goto out;
1977         }
1978
1979         if (refs > 1) {
1980                 ret = enter_shared_node(root, path->nodes[*level]->start,
1981                                         refs, wc, *level);
1982                 if (ret > 0) {
1983                         err = ret;
1984                         goto out;
1985                 }
1986         }
1987
1988         while (*level >= 0) {
1989                 WARN_ON(*level < 0);
1990                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1991                 cur = path->nodes[*level];
1992
1993                 if (btrfs_header_level(cur) != *level)
1994                         WARN_ON(1);
1995
1996                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1997                         break;
1998                 if (*level == 0) {
1999                         ret = process_one_leaf(root, cur, wc);
2000                         if (ret < 0)
2001                                 err = ret;
2002                         break;
2003                 }
2004                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2005                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2006                 blocksize = root->nodesize;
2007                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
2008                                                1, &refs, NULL);
2009                 if (ret < 0)
2010                         refs = 0;
2011
2012                 if (refs > 1) {
2013                         ret = enter_shared_node(root, bytenr, refs,
2014                                                 wc, *level - 1);
2015                         if (ret > 0) {
2016                                 path->slots[*level]++;
2017                                 continue;
2018                         }
2019                 }
2020
2021                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2022                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2023                         free_extent_buffer(next);
2024                         reada_walk_down(root, cur, path->slots[*level]);
2025                         next = read_tree_block(root, bytenr, blocksize,
2026                                                ptr_gen);
2027                         if (!extent_buffer_uptodate(next)) {
2028                                 struct btrfs_key node_key;
2029
2030                                 btrfs_node_key_to_cpu(path->nodes[*level],
2031                                                       &node_key,
2032                                                       path->slots[*level]);
2033                                 btrfs_add_corrupt_extent_record(root->fs_info,
2034                                                 &node_key,
2035                                                 path->nodes[*level]->start,
2036                                                 root->nodesize, *level);
2037                                 err = -EIO;
2038                                 goto out;
2039                         }
2040                 }
2041
2042                 ret = check_child_node(root, cur, path->slots[*level], next);
2043                 if (ret) {
2044                         err = ret;
2045                         goto out;
2046                 }
2047
2048                 if (btrfs_is_leaf(next))
2049                         status = btrfs_check_leaf(root, NULL, next);
2050                 else
2051                         status = btrfs_check_node(root, NULL, next);
2052                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2053                         free_extent_buffer(next);
2054                         err = -EIO;
2055                         goto out;
2056                 }
2057
2058                 *level = *level - 1;
2059                 free_extent_buffer(path->nodes[*level]);
2060                 path->nodes[*level] = next;
2061                 path->slots[*level] = 0;
2062         }
2063 out:
2064         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2065         return err;
2066 }
2067
2068 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2069                         struct walk_control *wc, int *level)
2070 {
2071         int i;
2072         struct extent_buffer *leaf;
2073
2074         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2075                 leaf = path->nodes[i];
2076                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2077                         path->slots[i]++;
2078                         *level = i;
2079                         return 0;
2080                 } else {
2081                         free_extent_buffer(path->nodes[*level]);
2082                         path->nodes[*level] = NULL;
2083                         BUG_ON(*level > wc->active_node);
2084                         if (*level == wc->active_node)
2085                                 leave_shared_node(root, wc, *level);
2086                         *level = i + 1;
2087                 }
2088         }
2089         return 1;
2090 }
2091
2092 static int check_root_dir(struct inode_record *rec)
2093 {
2094         struct inode_backref *backref;
2095         int ret = -1;
2096
2097         if (!rec->found_inode_item || rec->errors)
2098                 goto out;
2099         if (rec->nlink != 1 || rec->found_link != 0)
2100                 goto out;
2101         if (list_empty(&rec->backrefs))
2102                 goto out;
2103         backref = to_inode_backref(rec->backrefs.next);
2104         if (!backref->found_inode_ref)
2105                 goto out;
2106         if (backref->index != 0 || backref->namelen != 2 ||
2107             memcmp(backref->name, "..", 2))
2108                 goto out;
2109         if (backref->found_dir_index || backref->found_dir_item)
2110                 goto out;
2111         ret = 0;
2112 out:
2113         return ret;
2114 }
2115
2116 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2117                               struct btrfs_root *root, struct btrfs_path *path,
2118                               struct inode_record *rec)
2119 {
2120         struct btrfs_inode_item *ei;
2121         struct btrfs_key key;
2122         int ret;
2123
2124         key.objectid = rec->ino;
2125         key.type = BTRFS_INODE_ITEM_KEY;
2126         key.offset = (u64)-1;
2127
2128         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2129         if (ret < 0)
2130                 goto out;
2131         if (ret) {
2132                 if (!path->slots[0]) {
2133                         ret = -ENOENT;
2134                         goto out;
2135                 }
2136                 path->slots[0]--;
2137                 ret = 0;
2138         }
2139         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2140         if (key.objectid != rec->ino) {
2141                 ret = -ENOENT;
2142                 goto out;
2143         }
2144
2145         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2146                             struct btrfs_inode_item);
2147         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2148         btrfs_mark_buffer_dirty(path->nodes[0]);
2149         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2150         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2151                root->root_key.objectid);
2152 out:
2153         btrfs_release_path(path);
2154         return ret;
2155 }
2156
2157 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2158                                     struct btrfs_root *root,
2159                                     struct btrfs_path *path,
2160                                     struct inode_record *rec)
2161 {
2162         int ret;
2163
2164         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2165         btrfs_release_path(path);
2166         if (!ret)
2167                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2168         return ret;
2169 }
2170
2171 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2172                                struct btrfs_root *root,
2173                                struct btrfs_path *path,
2174                                struct inode_record *rec)
2175 {
2176         struct btrfs_inode_item *ei;
2177         struct btrfs_key key;
2178         int ret = 0;
2179
2180         key.objectid = rec->ino;
2181         key.type = BTRFS_INODE_ITEM_KEY;
2182         key.offset = 0;
2183
2184         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2185         if (ret) {
2186                 if (ret > 0)
2187                         ret = -ENOENT;
2188                 goto out;
2189         }
2190
2191         /* Since ret == 0, no need to check anything */
2192         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2193                             struct btrfs_inode_item);
2194         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2195         btrfs_mark_buffer_dirty(path->nodes[0]);
2196         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2197         printf("reset nbytes for ino %llu root %llu\n",
2198                rec->ino, root->root_key.objectid);
2199 out:
2200         btrfs_release_path(path);
2201         return ret;
2202 }
2203
2204 static int add_missing_dir_index(struct btrfs_root *root,
2205                                  struct cache_tree *inode_cache,
2206                                  struct inode_record *rec,
2207                                  struct inode_backref *backref)
2208 {
2209         struct btrfs_path *path;
2210         struct btrfs_trans_handle *trans;
2211         struct btrfs_dir_item *dir_item;
2212         struct extent_buffer *leaf;
2213         struct btrfs_key key;
2214         struct btrfs_disk_key disk_key;
2215         struct inode_record *dir_rec;
2216         unsigned long name_ptr;
2217         u32 data_size = sizeof(*dir_item) + backref->namelen;
2218         int ret;
2219
2220         path = btrfs_alloc_path();
2221         if (!path)
2222                 return -ENOMEM;
2223
2224         trans = btrfs_start_transaction(root, 1);
2225         if (IS_ERR(trans)) {
2226                 btrfs_free_path(path);
2227                 return PTR_ERR(trans);
2228         }
2229
2230         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2231                 (unsigned long long)rec->ino);
2232         key.objectid = backref->dir;
2233         key.type = BTRFS_DIR_INDEX_KEY;
2234         key.offset = backref->index;
2235
2236         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2237         BUG_ON(ret);
2238
2239         leaf = path->nodes[0];
2240         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2241
2242         disk_key.objectid = cpu_to_le64(rec->ino);
2243         disk_key.type = BTRFS_INODE_ITEM_KEY;
2244         disk_key.offset = 0;
2245
2246         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2247         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2248         btrfs_set_dir_data_len(leaf, dir_item, 0);
2249         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2250         name_ptr = (unsigned long)(dir_item + 1);
2251         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2252         btrfs_mark_buffer_dirty(leaf);
2253         btrfs_free_path(path);
2254         btrfs_commit_transaction(trans, root);
2255
2256         backref->found_dir_index = 1;
2257         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2258         BUG_ON(IS_ERR(dir_rec));
2259         if (!dir_rec)
2260                 return 0;
2261         dir_rec->found_size += backref->namelen;
2262         if (dir_rec->found_size == dir_rec->isize &&
2263             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2264                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2265         if (dir_rec->found_size != dir_rec->isize)
2266                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2267
2268         return 0;
2269 }
2270
2271 static int delete_dir_index(struct btrfs_root *root,
2272                             struct cache_tree *inode_cache,
2273                             struct inode_record *rec,
2274                             struct inode_backref *backref)
2275 {
2276         struct btrfs_trans_handle *trans;
2277         struct btrfs_dir_item *di;
2278         struct btrfs_path *path;
2279         int ret = 0;
2280
2281         path = btrfs_alloc_path();
2282         if (!path)
2283                 return -ENOMEM;
2284
2285         trans = btrfs_start_transaction(root, 1);
2286         if (IS_ERR(trans)) {
2287                 btrfs_free_path(path);
2288                 return PTR_ERR(trans);
2289         }
2290
2291
2292         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2293                 (unsigned long long)backref->dir,
2294                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2295                 (unsigned long long)root->objectid);
2296
2297         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2298                                     backref->name, backref->namelen,
2299                                     backref->index, -1);
2300         if (IS_ERR(di)) {
2301                 ret = PTR_ERR(di);
2302                 btrfs_free_path(path);
2303                 btrfs_commit_transaction(trans, root);
2304                 if (ret == -ENOENT)
2305                         return 0;
2306                 return ret;
2307         }
2308
2309         if (!di)
2310                 ret = btrfs_del_item(trans, root, path);
2311         else
2312                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2313         BUG_ON(ret);
2314         btrfs_free_path(path);
2315         btrfs_commit_transaction(trans, root);
2316         return ret;
2317 }
2318
2319 static int create_inode_item(struct btrfs_root *root,
2320                              struct inode_record *rec,
2321                              struct inode_backref *backref, int root_dir)
2322 {
2323         struct btrfs_trans_handle *trans;
2324         struct btrfs_inode_item inode_item;
2325         time_t now = time(NULL);
2326         int ret;
2327
2328         trans = btrfs_start_transaction(root, 1);
2329         if (IS_ERR(trans)) {
2330                 ret = PTR_ERR(trans);
2331                 return ret;
2332         }
2333
2334         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2335                 "be incomplete, please check permissions and content after "
2336                 "the fsck completes.\n", (unsigned long long)root->objectid,
2337                 (unsigned long long)rec->ino);
2338
2339         memset(&inode_item, 0, sizeof(inode_item));
2340         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2341         if (root_dir)
2342                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2343         else
2344                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2345         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2346         if (rec->found_dir_item) {
2347                 if (rec->found_file_extent)
2348                         fprintf(stderr, "root %llu inode %llu has both a dir "
2349                                 "item and extents, unsure if it is a dir or a "
2350                                 "regular file so setting it as a directory\n",
2351                                 (unsigned long long)root->objectid,
2352                                 (unsigned long long)rec->ino);
2353                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2354                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2355         } else if (!rec->found_dir_item) {
2356                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2357                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2358         }
2359         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2360         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2361         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2362         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2363         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2364         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2365         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2366         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2367
2368         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2369         BUG_ON(ret);
2370         btrfs_commit_transaction(trans, root);
2371         return 0;
2372 }
2373
2374 static int repair_inode_backrefs(struct btrfs_root *root,
2375                                  struct inode_record *rec,
2376                                  struct cache_tree *inode_cache,
2377                                  int delete)
2378 {
2379         struct inode_backref *tmp, *backref;
2380         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2381         int ret = 0;
2382         int repaired = 0;
2383
2384         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2385                 if (!delete && rec->ino == root_dirid) {
2386                         if (!rec->found_inode_item) {
2387                                 ret = create_inode_item(root, rec, backref, 1);
2388                                 if (ret)
2389                                         break;
2390                                 repaired++;
2391                         }
2392                 }
2393
2394                 /* Index 0 for root dir's are special, don't mess with it */
2395                 if (rec->ino == root_dirid && backref->index == 0)
2396                         continue;
2397
2398                 if (delete &&
2399                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2400                      (backref->found_dir_index && backref->found_inode_ref &&
2401                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2402                         ret = delete_dir_index(root, inode_cache, rec, backref);
2403                         if (ret)
2404                                 break;
2405                         repaired++;
2406                         list_del(&backref->list);
2407                         free(backref);
2408                 }
2409
2410                 if (!delete && !backref->found_dir_index &&
2411                     backref->found_dir_item && backref->found_inode_ref) {
2412                         ret = add_missing_dir_index(root, inode_cache, rec,
2413                                                     backref);
2414                         if (ret)
2415                                 break;
2416                         repaired++;
2417                         if (backref->found_dir_item &&
2418                             backref->found_dir_index &&
2419                             backref->found_dir_index) {
2420                                 if (!backref->errors &&
2421                                     backref->found_inode_ref) {
2422                                         list_del(&backref->list);
2423                                         free(backref);
2424                                 }
2425                         }
2426                 }
2427
2428                 if (!delete && (!backref->found_dir_index &&
2429                                 !backref->found_dir_item &&
2430                                 backref->found_inode_ref)) {
2431                         struct btrfs_trans_handle *trans;
2432                         struct btrfs_key location;
2433
2434                         ret = check_dir_conflict(root, backref->name,
2435                                                  backref->namelen,
2436                                                  backref->dir,
2437                                                  backref->index);
2438                         if (ret) {
2439                                 /*
2440                                  * let nlink fixing routine to handle it,
2441                                  * which can do it better.
2442                                  */
2443                                 ret = 0;
2444                                 break;
2445                         }
2446                         location.objectid = rec->ino;
2447                         location.type = BTRFS_INODE_ITEM_KEY;
2448                         location.offset = 0;
2449
2450                         trans = btrfs_start_transaction(root, 1);
2451                         if (IS_ERR(trans)) {
2452                                 ret = PTR_ERR(trans);
2453                                 break;
2454                         }
2455                         fprintf(stderr, "adding missing dir index/item pair "
2456                                 "for inode %llu\n",
2457                                 (unsigned long long)rec->ino);
2458                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2459                                                     backref->namelen,
2460                                                     backref->dir, &location,
2461                                                     imode_to_type(rec->imode),
2462                                                     backref->index);
2463                         BUG_ON(ret);
2464                         btrfs_commit_transaction(trans, root);
2465                         repaired++;
2466                 }
2467
2468                 if (!delete && (backref->found_inode_ref &&
2469                                 backref->found_dir_index &&
2470                                 backref->found_dir_item &&
2471                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2472                                 !rec->found_inode_item)) {
2473                         ret = create_inode_item(root, rec, backref, 0);
2474                         if (ret)
2475                                 break;
2476                         repaired++;
2477                 }
2478
2479         }
2480         return ret ? ret : repaired;
2481 }
2482
2483 /*
2484  * To determine the file type for nlink/inode_item repair
2485  *
2486  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2487  * Return -ENOENT if file type is not found.
2488  */
2489 static int find_file_type(struct inode_record *rec, u8 *type)
2490 {
2491         struct inode_backref *backref;
2492
2493         /* For inode item recovered case */
2494         if (rec->found_inode_item) {
2495                 *type = imode_to_type(rec->imode);
2496                 return 0;
2497         }
2498
2499         list_for_each_entry(backref, &rec->backrefs, list) {
2500                 if (backref->found_dir_index || backref->found_dir_item) {
2501                         *type = backref->filetype;
2502                         return 0;
2503                 }
2504         }
2505         return -ENOENT;
2506 }
2507
2508 /*
2509  * To determine the file name for nlink repair
2510  *
2511  * Return 0 if file name is found, set name and namelen.
2512  * Return -ENOENT if file name is not found.
2513  */
2514 static int find_file_name(struct inode_record *rec,
2515                           char *name, int *namelen)
2516 {
2517         struct inode_backref *backref;
2518
2519         list_for_each_entry(backref, &rec->backrefs, list) {
2520                 if (backref->found_dir_index || backref->found_dir_item ||
2521                     backref->found_inode_ref) {
2522                         memcpy(name, backref->name, backref->namelen);
2523                         *namelen = backref->namelen;
2524                         return 0;
2525                 }
2526         }
2527         return -ENOENT;
2528 }
2529
2530 /* Reset the nlink of the inode to the correct one */
2531 static int reset_nlink(struct btrfs_trans_handle *trans,
2532                        struct btrfs_root *root,
2533                        struct btrfs_path *path,
2534                        struct inode_record *rec)
2535 {
2536         struct inode_backref *backref;
2537         struct inode_backref *tmp;
2538         struct btrfs_key key;
2539         struct btrfs_inode_item *inode_item;
2540         int ret = 0;
2541
2542         /* We don't believe this either, reset it and iterate backref */
2543         rec->found_link = 0;
2544
2545         /* Remove all backref including the valid ones */
2546         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2547                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2548                                    backref->index, backref->name,
2549                                    backref->namelen, 0);
2550                 if (ret < 0)
2551                         goto out;
2552
2553                 /* remove invalid backref, so it won't be added back */
2554                 if (!(backref->found_dir_index &&
2555                       backref->found_dir_item &&
2556                       backref->found_inode_ref)) {
2557                         list_del(&backref->list);
2558                         free(backref);
2559                 } else {
2560                         rec->found_link++;
2561                 }
2562         }
2563
2564         /* Set nlink to 0 */
2565         key.objectid = rec->ino;
2566         key.type = BTRFS_INODE_ITEM_KEY;
2567         key.offset = 0;
2568         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2569         if (ret < 0)
2570                 goto out;
2571         if (ret > 0) {
2572                 ret = -ENOENT;
2573                 goto out;
2574         }
2575         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2576                                     struct btrfs_inode_item);
2577         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2578         btrfs_mark_buffer_dirty(path->nodes[0]);
2579         btrfs_release_path(path);
2580
2581         /*
2582          * Add back valid inode_ref/dir_item/dir_index,
2583          * add_link() will handle the nlink inc, so new nlink must be correct
2584          */
2585         list_for_each_entry(backref, &rec->backrefs, list) {
2586                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2587                                      backref->name, backref->namelen,
2588                                      backref->filetype, &backref->index, 1);
2589                 if (ret < 0)
2590                         goto out;
2591         }
2592 out:
2593         btrfs_release_path(path);
2594         return ret;
2595 }
2596
2597 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2598                                struct btrfs_root *root,
2599                                struct btrfs_path *path,
2600                                struct inode_record *rec)
2601 {
2602         char *dir_name = "lost+found";
2603         char namebuf[BTRFS_NAME_LEN] = {0};
2604         u64 lost_found_ino;
2605         u32 mode = 0700;
2606         u8 type = 0;
2607         int namelen = 0;
2608         int name_recovered = 0;
2609         int type_recovered = 0;
2610         int ret = 0;
2611
2612         /*
2613          * Get file name and type first before these invalid inode ref
2614          * are deleted by remove_all_invalid_backref()
2615          */
2616         name_recovered = !find_file_name(rec, namebuf, &namelen);
2617         type_recovered = !find_file_type(rec, &type);
2618
2619         if (!name_recovered) {
2620                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2621                        rec->ino, rec->ino);
2622                 namelen = count_digits(rec->ino);
2623                 sprintf(namebuf, "%llu", rec->ino);
2624                 name_recovered = 1;
2625         }
2626         if (!type_recovered) {
2627                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2628                        rec->ino);
2629                 type = BTRFS_FT_REG_FILE;
2630                 type_recovered = 1;
2631         }
2632
2633         ret = reset_nlink(trans, root, path, rec);
2634         if (ret < 0) {
2635                 fprintf(stderr,
2636                         "Failed to reset nlink for inode %llu: %s\n",
2637                         rec->ino, strerror(-ret));
2638                 goto out;
2639         }
2640
2641         if (rec->found_link == 0) {
2642                 lost_found_ino = root->highest_inode;
2643                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2644                         ret = -EOVERFLOW;
2645                         goto out;
2646                 }
2647                 lost_found_ino++;
2648                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2649                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2650                                   mode);
2651                 if (ret < 0) {
2652                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2653                                 dir_name, strerror(-ret));
2654                         goto out;
2655                 }
2656                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2657                                      namebuf, namelen, type, NULL, 1);
2658                 /*
2659                  * Add ".INO" suffix several times to handle case where
2660                  * "FILENAME.INO" is already taken by another file.
2661                  */
2662                 while (ret == -EEXIST) {
2663                         /*
2664                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2665                          */
2666                         if (namelen + count_digits(rec->ino) + 1 >
2667                             BTRFS_NAME_LEN) {
2668                                 ret = -EFBIG;
2669                                 goto out;
2670                         }
2671                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2672                                  ".%llu", rec->ino);
2673                         namelen += count_digits(rec->ino) + 1;
2674                         ret = btrfs_add_link(trans, root, rec->ino,
2675                                              lost_found_ino, namebuf,
2676                                              namelen, type, NULL, 1);
2677                 }
2678                 if (ret < 0) {
2679                         fprintf(stderr,
2680                                 "Failed to link the inode %llu to %s dir: %s\n",
2681                                 rec->ino, dir_name, strerror(-ret));
2682                         goto out;
2683                 }
2684                 /*
2685                  * Just increase the found_link, don't actually add the
2686                  * backref. This will make things easier and this inode
2687                  * record will be freed after the repair is done.
2688                  * So fsck will not report problem about this inode.
2689                  */
2690                 rec->found_link++;
2691                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2692                        namelen, namebuf, dir_name);
2693         }
2694         printf("Fixed the nlink of inode %llu\n", rec->ino);
2695 out:
2696         /*
2697          * Clear the flag anyway, or we will loop forever for the same inode
2698          * as it will not be removed from the bad inode list and the dead loop
2699          * happens.
2700          */
2701         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2702         btrfs_release_path(path);
2703         return ret;
2704 }
2705
2706 /*
2707  * Check if there is any normal(reg or prealloc) file extent for given
2708  * ino.
2709  * This is used to determine the file type when neither its dir_index/item or
2710  * inode_item exists.
2711  *
2712  * This will *NOT* report error, if any error happens, just consider it does
2713  * not have any normal file extent.
2714  */
2715 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2716 {
2717         struct btrfs_path *path;
2718         struct btrfs_key key;
2719         struct btrfs_key found_key;
2720         struct btrfs_file_extent_item *fi;
2721         u8 type;
2722         int ret = 0;
2723
2724         path = btrfs_alloc_path();
2725         if (!path)
2726                 goto out;
2727         key.objectid = ino;
2728         key.type = BTRFS_EXTENT_DATA_KEY;
2729         key.offset = 0;
2730
2731         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2732         if (ret < 0) {
2733                 ret = 0;
2734                 goto out;
2735         }
2736         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2737                 ret = btrfs_next_leaf(root, path);
2738                 if (ret) {
2739                         ret = 0;
2740                         goto out;
2741                 }
2742         }
2743         while (1) {
2744                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2745                                       path->slots[0]);
2746                 if (found_key.objectid != ino ||
2747                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2748                         break;
2749                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2750                                     struct btrfs_file_extent_item);
2751                 type = btrfs_file_extent_type(path->nodes[0], fi);
2752                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2753                         ret = 1;
2754                         goto out;
2755                 }
2756         }
2757 out:
2758         btrfs_free_path(path);
2759         return ret;
2760 }
2761
2762 static u32 btrfs_type_to_imode(u8 type)
2763 {
2764         static u32 imode_by_btrfs_type[] = {
2765                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2766                 [BTRFS_FT_DIR]          = S_IFDIR,
2767                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2768                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2769                 [BTRFS_FT_FIFO]         = S_IFIFO,
2770                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2771                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2772         };
2773
2774         return imode_by_btrfs_type[(type)];
2775 }
2776
2777 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2778                                 struct btrfs_root *root,
2779                                 struct btrfs_path *path,
2780                                 struct inode_record *rec)
2781 {
2782         u8 filetype;
2783         u32 mode = 0700;
2784         int type_recovered = 0;
2785         int ret = 0;
2786
2787         printf("Trying to rebuild inode:%llu\n", rec->ino);
2788
2789         type_recovered = !find_file_type(rec, &filetype);
2790
2791         /*
2792          * Try to determine inode type if type not found.
2793          *
2794          * For found regular file extent, it must be FILE.
2795          * For found dir_item/index, it must be DIR.
2796          *
2797          * For undetermined one, use FILE as fallback.
2798          *
2799          * TODO:
2800          * 1. If found backref(inode_index/item is already handled) to it,
2801          *    it must be DIR.
2802          *    Need new inode-inode ref structure to allow search for that.
2803          */
2804         if (!type_recovered) {
2805                 if (rec->found_file_extent &&
2806                     find_normal_file_extent(root, rec->ino)) {
2807                         type_recovered = 1;
2808                         filetype = BTRFS_FT_REG_FILE;
2809                 } else if (rec->found_dir_item) {
2810                         type_recovered = 1;
2811                         filetype = BTRFS_FT_DIR;
2812                 } else if (!list_empty(&rec->orphan_extents)) {
2813                         type_recovered = 1;
2814                         filetype = BTRFS_FT_REG_FILE;
2815                 } else{
2816                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2817                                rec->ino);
2818                         type_recovered = 1;
2819                         filetype = BTRFS_FT_REG_FILE;
2820                 }
2821         }
2822
2823         ret = btrfs_new_inode(trans, root, rec->ino,
2824                               mode | btrfs_type_to_imode(filetype));
2825         if (ret < 0)
2826                 goto out;
2827
2828         /*
2829          * Here inode rebuild is done, we only rebuild the inode item,
2830          * don't repair the nlink(like move to lost+found).
2831          * That is the job of nlink repair.
2832          *
2833          * We just fill the record and return
2834          */
2835         rec->found_dir_item = 1;
2836         rec->imode = mode | btrfs_type_to_imode(filetype);
2837         rec->nlink = 0;
2838         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2839         /* Ensure the inode_nlinks repair function will be called */
2840         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2841 out:
2842         return ret;
2843 }
2844
2845 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2846                                       struct btrfs_root *root,
2847                                       struct btrfs_path *path,
2848                                       struct inode_record *rec)
2849 {
2850         struct orphan_data_extent *orphan;
2851         struct orphan_data_extent *tmp;
2852         int ret = 0;
2853
2854         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2855                 /*
2856                  * Check for conflicting file extents
2857                  *
2858                  * Here we don't know whether the extents is compressed or not,
2859                  * so we can only assume it not compressed nor data offset,
2860                  * and use its disk_len as extent length.
2861                  */
2862                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2863                                        orphan->offset, orphan->disk_len, 0);
2864                 btrfs_release_path(path);
2865                 if (ret < 0)
2866                         goto out;
2867                 if (!ret) {
2868                         fprintf(stderr,
2869                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2870                                 orphan->disk_bytenr, orphan->disk_len);
2871                         ret = btrfs_free_extent(trans,
2872                                         root->fs_info->extent_root,
2873                                         orphan->disk_bytenr, orphan->disk_len,
2874                                         0, root->objectid, orphan->objectid,
2875                                         orphan->offset);
2876                         if (ret < 0)
2877                                 goto out;
2878                 }
2879                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2880                                 orphan->offset, orphan->disk_bytenr,
2881                                 orphan->disk_len, orphan->disk_len);
2882                 if (ret < 0)
2883                         goto out;
2884
2885                 /* Update file size info */
2886                 rec->found_size += orphan->disk_len;
2887                 if (rec->found_size == rec->nbytes)
2888                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2889
2890                 /* Update the file extent hole info too */
2891                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2892                                            orphan->disk_len);
2893                 if (ret < 0)
2894                         goto out;
2895                 if (RB_EMPTY_ROOT(&rec->holes))
2896                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2897
2898                 list_del(&orphan->list);
2899                 free(orphan);
2900         }
2901         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2902 out:
2903         return ret;
2904 }
2905
2906 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2907                                         struct btrfs_root *root,
2908                                         struct btrfs_path *path,
2909                                         struct inode_record *rec)
2910 {
2911         struct rb_node *node;
2912         struct file_extent_hole *hole;
2913         int found = 0;
2914         int ret = 0;
2915
2916         node = rb_first(&rec->holes);
2917
2918         while (node) {
2919                 found = 1;
2920                 hole = rb_entry(node, struct file_extent_hole, node);
2921                 ret = btrfs_punch_hole(trans, root, rec->ino,
2922                                        hole->start, hole->len);
2923                 if (ret < 0)
2924                         goto out;
2925                 ret = del_file_extent_hole(&rec->holes, hole->start,
2926                                            hole->len);
2927                 if (ret < 0)
2928                         goto out;
2929                 if (RB_EMPTY_ROOT(&rec->holes))
2930                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2931                 node = rb_first(&rec->holes);
2932         }
2933         /* special case for a file losing all its file extent */
2934         if (!found) {
2935                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2936                                        round_up(rec->isize, root->sectorsize));
2937                 if (ret < 0)
2938                         goto out;
2939         }
2940         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2941                rec->ino, root->objectid);
2942 out:
2943         return ret;
2944 }
2945
2946 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2947 {
2948         struct btrfs_trans_handle *trans;
2949         struct btrfs_path *path;
2950         int ret = 0;
2951
2952         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2953                              I_ERR_NO_ORPHAN_ITEM |
2954                              I_ERR_LINK_COUNT_WRONG |
2955                              I_ERR_NO_INODE_ITEM |
2956                              I_ERR_FILE_EXTENT_ORPHAN |
2957                              I_ERR_FILE_EXTENT_DISCOUNT|
2958                              I_ERR_FILE_NBYTES_WRONG)))
2959                 return rec->errors;
2960
2961         path = btrfs_alloc_path();
2962         if (!path)
2963                 return -ENOMEM;
2964
2965         /*
2966          * For nlink repair, it may create a dir and add link, so
2967          * 2 for parent(256)'s dir_index and dir_item
2968          * 2 for lost+found dir's inode_item and inode_ref
2969          * 1 for the new inode_ref of the file
2970          * 2 for lost+found dir's dir_index and dir_item for the file
2971          */
2972         trans = btrfs_start_transaction(root, 7);
2973         if (IS_ERR(trans)) {
2974                 btrfs_free_path(path);
2975                 return PTR_ERR(trans);
2976         }
2977
2978         if (rec->errors & I_ERR_NO_INODE_ITEM)
2979                 ret = repair_inode_no_item(trans, root, path, rec);
2980         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2981                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2982         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2983                 ret = repair_inode_discount_extent(trans, root, path, rec);
2984         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2985                 ret = repair_inode_isize(trans, root, path, rec);
2986         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2987                 ret = repair_inode_orphan_item(trans, root, path, rec);
2988         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2989                 ret = repair_inode_nlinks(trans, root, path, rec);
2990         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2991                 ret = repair_inode_nbytes(trans, root, path, rec);
2992         btrfs_commit_transaction(trans, root);
2993         btrfs_free_path(path);
2994         return ret;
2995 }
2996
2997 static int check_inode_recs(struct btrfs_root *root,
2998                             struct cache_tree *inode_cache)
2999 {
3000         struct cache_extent *cache;
3001         struct ptr_node *node;
3002         struct inode_record *rec;
3003         struct inode_backref *backref;
3004         int stage = 0;
3005         int ret = 0;
3006         int err = 0;
3007         u64 error = 0;
3008         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3009
3010         if (btrfs_root_refs(&root->root_item) == 0) {
3011                 if (!cache_tree_empty(inode_cache))
3012                         fprintf(stderr, "warning line %d\n", __LINE__);
3013                 return 0;
3014         }
3015
3016         /*
3017          * We need to record the highest inode number for later 'lost+found'
3018          * dir creation.
3019          * We must select an ino not used/referred by any existing inode, or
3020          * 'lost+found' ino may be a missing ino in a corrupted leaf,
3021          * this may cause 'lost+found' dir has wrong nlinks.
3022          */
3023         cache = last_cache_extent(inode_cache);
3024         if (cache) {
3025                 node = container_of(cache, struct ptr_node, cache);
3026                 rec = node->data;
3027                 if (rec->ino > root->highest_inode)
3028                         root->highest_inode = rec->ino;
3029         }
3030
3031         /*
3032          * We need to repair backrefs first because we could change some of the
3033          * errors in the inode recs.
3034          *
3035          * We also need to go through and delete invalid backrefs first and then
3036          * add the correct ones second.  We do this because we may get EEXIST
3037          * when adding back the correct index because we hadn't yet deleted the
3038          * invalid index.
3039          *
3040          * For example, if we were missing a dir index then the directories
3041          * isize would be wrong, so if we fixed the isize to what we thought it
3042          * would be and then fixed the backref we'd still have a invalid fs, so
3043          * we need to add back the dir index and then check to see if the isize
3044          * is still wrong.
3045          */
3046         while (stage < 3) {
3047                 stage++;
3048                 if (stage == 3 && !err)
3049                         break;
3050
3051                 cache = search_cache_extent(inode_cache, 0);
3052                 while (repair && cache) {
3053                         node = container_of(cache, struct ptr_node, cache);
3054                         rec = node->data;
3055                         cache = next_cache_extent(cache);
3056
3057                         /* Need to free everything up and rescan */
3058                         if (stage == 3) {
3059                                 remove_cache_extent(inode_cache, &node->cache);
3060                                 free(node);
3061                                 free_inode_rec(rec);
3062                                 continue;
3063                         }
3064
3065                         if (list_empty(&rec->backrefs))
3066                                 continue;
3067
3068                         ret = repair_inode_backrefs(root, rec, inode_cache,
3069                                                     stage == 1);
3070                         if (ret < 0) {
3071                                 err = ret;
3072                                 stage = 2;
3073                                 break;
3074                         } if (ret > 0) {
3075                                 err = -EAGAIN;
3076                         }
3077                 }
3078         }
3079         if (err)
3080                 return err;
3081
3082         rec = get_inode_rec(inode_cache, root_dirid, 0);
3083         BUG_ON(IS_ERR(rec));
3084         if (rec) {
3085                 ret = check_root_dir(rec);
3086                 if (ret) {
3087                         fprintf(stderr, "root %llu root dir %llu error\n",
3088                                 (unsigned long long)root->root_key.objectid,
3089                                 (unsigned long long)root_dirid);
3090                         print_inode_error(root, rec);
3091                         error++;
3092                 }
3093         } else {
3094                 if (repair) {
3095                         struct btrfs_trans_handle *trans;
3096
3097                         trans = btrfs_start_transaction(root, 1);
3098                         if (IS_ERR(trans)) {
3099                                 err = PTR_ERR(trans);
3100                                 return err;
3101                         }
3102
3103                         fprintf(stderr,
3104                                 "root %llu missing its root dir, recreating\n",
3105                                 (unsigned long long)root->objectid);
3106
3107                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3108                         BUG_ON(ret);
3109
3110                         btrfs_commit_transaction(trans, root);
3111                         return -EAGAIN;
3112                 }
3113
3114                 fprintf(stderr, "root %llu root dir %llu not found\n",
3115                         (unsigned long long)root->root_key.objectid,
3116                         (unsigned long long)root_dirid);
3117         }
3118
3119         while (1) {
3120                 cache = search_cache_extent(inode_cache, 0);
3121                 if (!cache)
3122                         break;
3123                 node = container_of(cache, struct ptr_node, cache);
3124                 rec = node->data;
3125                 remove_cache_extent(inode_cache, &node->cache);
3126                 free(node);
3127                 if (rec->ino == root_dirid ||
3128                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3129                         free_inode_rec(rec);
3130                         continue;
3131                 }
3132
3133                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3134                         ret = check_orphan_item(root, rec->ino);
3135                         if (ret == 0)
3136                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3137                         if (can_free_inode_rec(rec)) {
3138                                 free_inode_rec(rec);
3139                                 continue;
3140                         }
3141                 }
3142
3143                 if (!rec->found_inode_item)
3144                         rec->errors |= I_ERR_NO_INODE_ITEM;
3145                 if (rec->found_link != rec->nlink)
3146                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3147                 if (repair) {
3148                         ret = try_repair_inode(root, rec);
3149                         if (ret == 0 && can_free_inode_rec(rec)) {
3150                                 free_inode_rec(rec);
3151                                 continue;
3152                         }
3153                         ret = 0;
3154                 }
3155
3156                 if (!(repair && ret == 0))
3157                         error++;
3158                 print_inode_error(root, rec);
3159                 list_for_each_entry(backref, &rec->backrefs, list) {
3160                         if (!backref->found_dir_item)
3161                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3162                         if (!backref->found_dir_index)
3163                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3164                         if (!backref->found_inode_ref)
3165                                 backref->errors |= REF_ERR_NO_INODE_REF;
3166                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3167                                 " namelen %u name %s filetype %d errors %x",
3168                                 (unsigned long long)backref->dir,
3169                                 (unsigned long long)backref->index,
3170                                 backref->namelen, backref->name,
3171                                 backref->filetype, backref->errors);
3172                         print_ref_error(backref->errors);
3173                 }
3174                 free_inode_rec(rec);
3175         }
3176         return (error > 0) ? -1 : 0;
3177 }
3178
3179 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3180                                         u64 objectid)
3181 {
3182         struct cache_extent *cache;
3183         struct root_record *rec = NULL;
3184         int ret;
3185
3186         cache = lookup_cache_extent(root_cache, objectid, 1);
3187         if (cache) {
3188                 rec = container_of(cache, struct root_record, cache);
3189         } else {
3190                 rec = calloc(1, sizeof(*rec));
3191                 if (!rec)
3192                         return ERR_PTR(-ENOMEM);
3193                 rec->objectid = objectid;
3194                 INIT_LIST_HEAD(&rec->backrefs);
3195                 rec->cache.start = objectid;
3196                 rec->cache.size = 1;
3197
3198                 ret = insert_cache_extent(root_cache, &rec->cache);
3199                 if (ret)
3200                         return ERR_PTR(-EEXIST);
3201         }
3202         return rec;
3203 }
3204
3205 static struct root_backref *get_root_backref(struct root_record *rec,
3206                                              u64 ref_root, u64 dir, u64 index,
3207                                              const char *name, int namelen)
3208 {
3209         struct root_backref *backref;
3210
3211         list_for_each_entry(backref, &rec->backrefs, list) {
3212                 if (backref->ref_root != ref_root || backref->dir != dir ||
3213                     backref->namelen != namelen)
3214                         continue;
3215                 if (memcmp(name, backref->name, namelen))
3216                         continue;
3217                 return backref;
3218         }
3219
3220         backref = calloc(1, sizeof(*backref) + namelen + 1);
3221         if (!backref)
3222                 return NULL;
3223         backref->ref_root = ref_root;
3224         backref->dir = dir;
3225         backref->index = index;
3226         backref->namelen = namelen;
3227         memcpy(backref->name, name, namelen);
3228         backref->name[namelen] = '\0';
3229         list_add_tail(&backref->list, &rec->backrefs);
3230         return backref;
3231 }
3232
3233 static void free_root_record(struct cache_extent *cache)
3234 {
3235         struct root_record *rec;
3236         struct root_backref *backref;
3237
3238         rec = container_of(cache, struct root_record, cache);
3239         while (!list_empty(&rec->backrefs)) {
3240                 backref = to_root_backref(rec->backrefs.next);
3241                 list_del(&backref->list);
3242                 free(backref);
3243         }
3244
3245         kfree(rec);
3246 }
3247
3248 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3249
3250 static int add_root_backref(struct cache_tree *root_cache,
3251                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3252                             const char *name, int namelen,
3253                             int item_type, int errors)
3254 {
3255         struct root_record *rec;
3256         struct root_backref *backref;
3257
3258         rec = get_root_rec(root_cache, root_id);
3259         BUG_ON(IS_ERR(rec));
3260         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3261         BUG_ON(!backref);
3262
3263         backref->errors |= errors;
3264
3265         if (item_type != BTRFS_DIR_ITEM_KEY) {
3266                 if (backref->found_dir_index || backref->found_back_ref ||
3267                     backref->found_forward_ref) {
3268                         if (backref->index != index)
3269                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3270                 } else {
3271                         backref->index = index;
3272                 }
3273         }
3274
3275         if (item_type == BTRFS_DIR_ITEM_KEY) {
3276                 if (backref->found_forward_ref)
3277                         rec->found_ref++;
3278                 backref->found_dir_item = 1;
3279         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3280                 backref->found_dir_index = 1;
3281         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3282                 if (backref->found_forward_ref)
3283                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3284                 else if (backref->found_dir_item)
3285                         rec->found_ref++;
3286                 backref->found_forward_ref = 1;
3287         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3288                 if (backref->found_back_ref)
3289                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3290                 backref->found_back_ref = 1;
3291         } else {
3292                 BUG_ON(1);
3293         }
3294
3295         if (backref->found_forward_ref && backref->found_dir_item)
3296                 backref->reachable = 1;
3297         return 0;
3298 }
3299
3300 static int merge_root_recs(struct btrfs_root *root,
3301                            struct cache_tree *src_cache,
3302                            struct cache_tree *dst_cache)
3303 {
3304         struct cache_extent *cache;
3305         struct ptr_node *node;
3306         struct inode_record *rec;
3307         struct inode_backref *backref;
3308         int ret = 0;
3309
3310         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3311                 free_inode_recs_tree(src_cache);
3312                 return 0;
3313         }
3314
3315         while (1) {
3316                 cache = search_cache_extent(src_cache, 0);
3317                 if (!cache)
3318                         break;
3319                 node = container_of(cache, struct ptr_node, cache);
3320                 rec = node->data;
3321                 remove_cache_extent(src_cache, &node->cache);
3322                 free(node);
3323
3324                 ret = is_child_root(root, root->objectid, rec->ino);
3325                 if (ret < 0)
3326                         break;
3327                 else if (ret == 0)
3328                         goto skip;
3329
3330                 list_for_each_entry(backref, &rec->backrefs, list) {
3331                         BUG_ON(backref->found_inode_ref);
3332                         if (backref->found_dir_item)
3333                                 add_root_backref(dst_cache, rec->ino,
3334                                         root->root_key.objectid, backref->dir,
3335                                         backref->index, backref->name,
3336                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3337                                         backref->errors);
3338                         if (backref->found_dir_index)
3339                                 add_root_backref(dst_cache, rec->ino,
3340                                         root->root_key.objectid, backref->dir,
3341                                         backref->index, backref->name,
3342                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3343                                         backref->errors);
3344                 }
3345 skip:
3346                 free_inode_rec(rec);
3347         }
3348         if (ret < 0)
3349                 return ret;
3350         return 0;
3351 }
3352
3353 static int check_root_refs(struct btrfs_root *root,
3354                            struct cache_tree *root_cache)
3355 {
3356         struct root_record *rec;
3357         struct root_record *ref_root;
3358         struct root_backref *backref;
3359         struct cache_extent *cache;
3360         int loop = 1;
3361         int ret;
3362         int error;
3363         int errors = 0;
3364
3365         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3366         BUG_ON(IS_ERR(rec));
3367         rec->found_ref = 1;
3368
3369         /* fixme: this can not detect circular references */
3370         while (loop) {
3371                 loop = 0;
3372                 cache = search_cache_extent(root_cache, 0);
3373                 while (1) {
3374                         if (!cache)
3375                                 break;
3376                         rec = container_of(cache, struct root_record, cache);
3377                         cache = next_cache_extent(cache);
3378
3379                         if (rec->found_ref == 0)
3380                                 continue;
3381
3382                         list_for_each_entry(backref, &rec->backrefs, list) {
3383                                 if (!backref->reachable)
3384                                         continue;
3385
3386                                 ref_root = get_root_rec(root_cache,
3387                                                         backref->ref_root);
3388                                 BUG_ON(IS_ERR(ref_root));
3389                                 if (ref_root->found_ref > 0)
3390                                         continue;
3391
3392                                 backref->reachable = 0;
3393                                 rec->found_ref--;
3394                                 if (rec->found_ref == 0)
3395                                         loop = 1;
3396                         }
3397                 }
3398         }
3399
3400         cache = search_cache_extent(root_cache, 0);
3401         while (1) {
3402                 if (!cache)
3403                         break;
3404                 rec = container_of(cache, struct root_record, cache);
3405                 cache = next_cache_extent(cache);
3406
3407                 if (rec->found_ref == 0 &&
3408                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3409                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3410                         ret = check_orphan_item(root->fs_info->tree_root,
3411                                                 rec->objectid);
3412                         if (ret == 0)
3413                                 continue;
3414
3415                         /*
3416                          * If we don't have a root item then we likely just have
3417                          * a dir item in a snapshot for this root but no actual
3418                          * ref key or anything so it's meaningless.
3419                          */
3420                         if (!rec->found_root_item)
3421                                 continue;
3422                         errors++;
3423                         fprintf(stderr, "fs tree %llu not referenced\n",
3424                                 (unsigned long long)rec->objectid);
3425                 }
3426
3427                 error = 0;
3428                 if (rec->found_ref > 0 && !rec->found_root_item)
3429                         error = 1;
3430                 list_for_each_entry(backref, &rec->backrefs, list) {
3431                         if (!backref->found_dir_item)
3432                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3433                         if (!backref->found_dir_index)
3434                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3435                         if (!backref->found_back_ref)
3436                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3437                         if (!backref->found_forward_ref)
3438                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3439                         if (backref->reachable && backref->errors)
3440                                 error = 1;
3441                 }
3442                 if (!error)
3443                         continue;
3444
3445                 errors++;
3446                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3447                         (unsigned long long)rec->objectid, rec->found_ref,
3448                          rec->found_root_item ? "" : "not found");
3449
3450                 list_for_each_entry(backref, &rec->backrefs, list) {
3451                         if (!backref->reachable)
3452                                 continue;
3453                         if (!backref->errors && rec->found_root_item)
3454                                 continue;
3455                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3456                                 " index %llu namelen %u name %s errors %x\n",
3457                                 (unsigned long long)backref->ref_root,
3458                                 (unsigned long long)backref->dir,
3459                                 (unsigned long long)backref->index,
3460                                 backref->namelen, backref->name,
3461                                 backref->errors);
3462                         print_ref_error(backref->errors);
3463                 }
3464         }
3465         return errors > 0 ? 1 : 0;
3466 }
3467
3468 static int process_root_ref(struct extent_buffer *eb, int slot,
3469                             struct btrfs_key *key,
3470                             struct cache_tree *root_cache)
3471 {
3472         u64 dirid;
3473         u64 index;
3474         u32 len;
3475         u32 name_len;
3476         struct btrfs_root_ref *ref;
3477         char namebuf[BTRFS_NAME_LEN];
3478         int error;
3479
3480         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3481
3482         dirid = btrfs_root_ref_dirid(eb, ref);
3483         index = btrfs_root_ref_sequence(eb, ref);
3484         name_len = btrfs_root_ref_name_len(eb, ref);
3485
3486         if (name_len <= BTRFS_NAME_LEN) {
3487                 len = name_len;
3488                 error = 0;
3489         } else {
3490                 len = BTRFS_NAME_LEN;
3491                 error = REF_ERR_NAME_TOO_LONG;
3492         }
3493         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3494
3495         if (key->type == BTRFS_ROOT_REF_KEY) {
3496                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3497                                  index, namebuf, len, key->type, error);
3498         } else {
3499                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3500                                  index, namebuf, len, key->type, error);
3501         }
3502         return 0;
3503 }
3504
3505 static void free_corrupt_block(struct cache_extent *cache)
3506 {
3507         struct btrfs_corrupt_block *corrupt;
3508
3509         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3510         free(corrupt);
3511 }
3512
3513 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3514
3515 /*
3516  * Repair the btree of the given root.
3517  *
3518  * The fix is to remove the node key in corrupt_blocks cache_tree.
3519  * and rebalance the tree.
3520  * After the fix, the btree should be writeable.
3521  */
3522 static int repair_btree(struct btrfs_root *root,
3523                         struct cache_tree *corrupt_blocks)
3524 {
3525         struct btrfs_trans_handle *trans;
3526         struct btrfs_path *path;
3527         struct btrfs_corrupt_block *corrupt;
3528         struct cache_extent *cache;
3529         struct btrfs_key key;
3530         u64 offset;
3531         int level;
3532         int ret = 0;
3533
3534         if (cache_tree_empty(corrupt_blocks))
3535                 return 0;
3536
3537         path = btrfs_alloc_path();
3538         if (!path)
3539                 return -ENOMEM;
3540
3541         trans = btrfs_start_transaction(root, 1);
3542         if (IS_ERR(trans)) {
3543                 ret = PTR_ERR(trans);
3544                 fprintf(stderr, "Error starting transaction: %s\n",
3545                         strerror(-ret));
3546                 goto out_free_path;
3547         }
3548         cache = first_cache_extent(corrupt_blocks);
3549         while (cache) {
3550                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3551                                        cache);
3552                 level = corrupt->level;
3553                 path->lowest_level = level;
3554                 key.objectid = corrupt->key.objectid;
3555                 key.type = corrupt->key.type;
3556                 key.offset = corrupt->key.offset;
3557
3558                 /*
3559                  * Here we don't want to do any tree balance, since it may
3560                  * cause a balance with corrupted brother leaf/node,
3561                  * so ins_len set to 0 here.
3562                  * Balance will be done after all corrupt node/leaf is deleted.
3563                  */
3564                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3565                 if (ret < 0)
3566                         goto out;
3567                 offset = btrfs_node_blockptr(path->nodes[level],
3568                                              path->slots[level]);
3569
3570                 /* Remove the ptr */
3571                 ret = btrfs_del_ptr(trans, root, path, level,
3572                                     path->slots[level]);
3573                 if (ret < 0)
3574                         goto out;
3575                 /*
3576                  * Remove the corresponding extent
3577                  * return value is not concerned.
3578                  */
3579                 btrfs_release_path(path);
3580                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3581                                         0, root->root_key.objectid,
3582                                         level - 1, 0);
3583                 cache = next_cache_extent(cache);
3584         }
3585
3586         /* Balance the btree using btrfs_search_slot() */
3587         cache = first_cache_extent(corrupt_blocks);
3588         while (cache) {
3589                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3590                                        cache);
3591                 memcpy(&key, &corrupt->key, sizeof(key));
3592                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3593                 if (ret < 0)
3594                         goto out;
3595                 /* return will always >0 since it won't find the item */
3596                 ret = 0;
3597                 btrfs_release_path(path);
3598                 cache = next_cache_extent(cache);
3599         }
3600 out:
3601         btrfs_commit_transaction(trans, root);
3602 out_free_path:
3603         btrfs_free_path(path);
3604         return ret;
3605 }
3606
3607 static int check_fs_root(struct btrfs_root *root,
3608                          struct cache_tree *root_cache,
3609                          struct walk_control *wc)
3610 {
3611         int ret = 0;
3612         int err = 0;
3613         int wret;
3614         int level;
3615         struct btrfs_path path;
3616         struct shared_node root_node;
3617         struct root_record *rec;
3618         struct btrfs_root_item *root_item = &root->root_item;
3619         struct cache_tree corrupt_blocks;
3620         struct orphan_data_extent *orphan;
3621         struct orphan_data_extent *tmp;
3622         enum btrfs_tree_block_status status;
3623
3624         /*
3625          * Reuse the corrupt_block cache tree to record corrupted tree block
3626          *
3627          * Unlike the usage in extent tree check, here we do it in a per
3628          * fs/subvol tree base.
3629          */
3630         cache_tree_init(&corrupt_blocks);
3631         root->fs_info->corrupt_blocks = &corrupt_blocks;
3632
3633         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3634                 rec = get_root_rec(root_cache, root->root_key.objectid);
3635                 BUG_ON(IS_ERR(rec));
3636                 if (btrfs_root_refs(root_item) > 0)
3637                         rec->found_root_item = 1;
3638         }
3639
3640         btrfs_init_path(&path);
3641         memset(&root_node, 0, sizeof(root_node));
3642         cache_tree_init(&root_node.root_cache);
3643         cache_tree_init(&root_node.inode_cache);
3644
3645         /* Move the orphan extent record to corresponding inode_record */
3646         list_for_each_entry_safe(orphan, tmp,
3647                                  &root->orphan_data_extents, list) {
3648                 struct inode_record *inode;
3649
3650                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3651                                       1);
3652                 BUG_ON(IS_ERR(inode));
3653                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3654                 list_move(&orphan->list, &inode->orphan_extents);
3655         }
3656
3657         level = btrfs_header_level(root->node);
3658         memset(wc->nodes, 0, sizeof(wc->nodes));
3659         wc->nodes[level] = &root_node;
3660         wc->active_node = level;
3661         wc->root_level = level;
3662
3663         /* We may not have checked the root block, lets do that now */
3664         if (btrfs_is_leaf(root->node))
3665                 status = btrfs_check_leaf(root, NULL, root->node);
3666         else
3667                 status = btrfs_check_node(root, NULL, root->node);
3668         if (status != BTRFS_TREE_BLOCK_CLEAN)
3669                 return -EIO;
3670
3671         if (btrfs_root_refs(root_item) > 0 ||
3672             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3673                 path.nodes[level] = root->node;
3674                 extent_buffer_get(root->node);
3675                 path.slots[level] = 0;
3676         } else {
3677                 struct btrfs_key key;
3678                 struct btrfs_disk_key found_key;
3679
3680                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3681                 level = root_item->drop_level;
3682                 path.lowest_level = level;
3683                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3684                 if (wret < 0)
3685                         goto skip_walking;
3686                 btrfs_node_key(path.nodes[level], &found_key,
3687                                 path.slots[level]);
3688                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3689                                         sizeof(found_key)));
3690         }
3691
3692         while (1) {
3693                 wret = walk_down_tree(root, &path, wc, &level);
3694                 if (wret < 0)
3695                         ret = wret;
3696                 if (wret != 0)
3697                         break;
3698
3699                 wret = walk_up_tree(root, &path, wc, &level);
3700                 if (wret < 0)
3701                         ret = wret;
3702                 if (wret != 0)
3703                         break;
3704         }
3705 skip_walking:
3706         btrfs_release_path(&path);
3707
3708         if (!cache_tree_empty(&corrupt_blocks)) {
3709                 struct cache_extent *cache;
3710                 struct btrfs_corrupt_block *corrupt;
3711
3712                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3713                        root->root_key.objectid);
3714                 cache = first_cache_extent(&corrupt_blocks);
3715                 while (cache) {
3716                         corrupt = container_of(cache,
3717                                                struct btrfs_corrupt_block,
3718                                                cache);
3719                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3720                                cache->start, corrupt->level,
3721                                corrupt->key.objectid, corrupt->key.type,
3722                                corrupt->key.offset);
3723                         cache = next_cache_extent(cache);
3724                 }
3725                 if (repair) {
3726                         printf("Try to repair the btree for root %llu\n",
3727                                root->root_key.objectid);
3728                         ret = repair_btree(root, &corrupt_blocks);
3729                         if (ret < 0)
3730                                 fprintf(stderr, "Failed to repair btree: %s\n",
3731                                         strerror(-ret));
3732                         if (!ret)
3733                                 printf("Btree for root %llu is fixed\n",
3734                                        root->root_key.objectid);
3735                 }
3736         }
3737
3738         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3739         if (err < 0)
3740                 ret = err;
3741
3742         if (root_node.current) {
3743                 root_node.current->checked = 1;
3744                 maybe_free_inode_rec(&root_node.inode_cache,
3745                                 root_node.current);
3746         }
3747
3748         err = check_inode_recs(root, &root_node.inode_cache);
3749         if (!ret)
3750                 ret = err;
3751
3752         free_corrupt_blocks_tree(&corrupt_blocks);
3753         root->fs_info->corrupt_blocks = NULL;
3754         free_orphan_data_extents(&root->orphan_data_extents);
3755         return ret;
3756 }
3757
3758 static int fs_root_objectid(u64 objectid)
3759 {
3760         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3761             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3762                 return 1;
3763         return is_fstree(objectid);
3764 }
3765
3766 static int check_fs_roots(struct btrfs_root *root,
3767                           struct cache_tree *root_cache)
3768 {
3769         struct btrfs_path path;
3770         struct btrfs_key key;
3771         struct walk_control wc;
3772         struct extent_buffer *leaf, *tree_node;
3773         struct btrfs_root *tmp_root;
3774         struct btrfs_root *tree_root = root->fs_info->tree_root;
3775         int ret;
3776         int err = 0;
3777
3778         if (ctx.progress_enabled) {
3779                 ctx.tp = TASK_FS_ROOTS;
3780                 task_start(ctx.info);
3781         }
3782
3783         /*
3784          * Just in case we made any changes to the extent tree that weren't
3785          * reflected into the free space cache yet.
3786          */
3787         if (repair)
3788                 reset_cached_block_groups(root->fs_info);
3789         memset(&wc, 0, sizeof(wc));
3790         cache_tree_init(&wc.shared);
3791         btrfs_init_path(&path);
3792
3793 again:
3794         key.offset = 0;
3795         key.objectid = 0;
3796         key.type = BTRFS_ROOT_ITEM_KEY;
3797         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3798         if (ret < 0) {
3799                 err = 1;
3800                 goto out;
3801         }
3802         tree_node = tree_root->node;
3803         while (1) {
3804                 if (tree_node != tree_root->node) {
3805                         free_root_recs_tree(root_cache);
3806                         btrfs_release_path(&path);
3807                         goto again;
3808                 }
3809                 leaf = path.nodes[0];
3810                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3811                         ret = btrfs_next_leaf(tree_root, &path);
3812                         if (ret) {
3813                                 if (ret < 0)
3814                                         err = 1;
3815                                 break;
3816                         }
3817                         leaf = path.nodes[0];
3818                 }
3819                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3820                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3821                     fs_root_objectid(key.objectid)) {
3822                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3823                                 tmp_root = btrfs_read_fs_root_no_cache(
3824                                                 root->fs_info, &key);
3825                         } else {
3826                                 key.offset = (u64)-1;
3827                                 tmp_root = btrfs_read_fs_root(
3828                                                 root->fs_info, &key);
3829                         }
3830                         if (IS_ERR(tmp_root)) {
3831                                 err = 1;
3832                                 goto next;
3833                         }
3834                         ret = check_fs_root(tmp_root, root_cache, &wc);
3835                         if (ret == -EAGAIN) {
3836                                 free_root_recs_tree(root_cache);
3837                                 btrfs_release_path(&path);
3838                                 goto again;
3839                         }
3840                         if (ret)
3841                                 err = 1;
3842                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3843                                 btrfs_free_fs_root(tmp_root);
3844                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3845                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3846                         process_root_ref(leaf, path.slots[0], &key,
3847                                          root_cache);
3848                 }
3849 next:
3850                 path.slots[0]++;
3851         }
3852 out:
3853         btrfs_release_path(&path);
3854         if (err)
3855                 free_extent_cache_tree(&wc.shared);
3856         if (!cache_tree_empty(&wc.shared))
3857                 fprintf(stderr, "warning line %d\n", __LINE__);
3858
3859         task_stop(ctx.info);
3860
3861         return err;
3862 }
3863
3864 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3865 {
3866         struct rb_node *n;
3867         struct extent_backref *back;
3868         struct tree_backref *tback;
3869         struct data_backref *dback;
3870         u64 found = 0;
3871         int err = 0;
3872
3873         for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
3874                 back = rb_node_to_extent_backref(n);
3875                 if (!back->found_extent_tree) {
3876                         err = 1;
3877                         if (!print_errs)
3878                                 goto out;
3879                         if (back->is_data) {
3880                                 dback = to_data_backref(back);
3881                                 fprintf(stderr, "Backref %llu %s %llu"
3882                                         " owner %llu offset %llu num_refs %lu"
3883                                         " not found in extent tree\n",
3884                                         (unsigned long long)rec->start,
3885                                         back->full_backref ?
3886                                         "parent" : "root",
3887                                         back->full_backref ?
3888                                         (unsigned long long)dback->parent:
3889                                         (unsigned long long)dback->root,
3890                                         (unsigned long long)dback->owner,
3891                                         (unsigned long long)dback->offset,
3892                                         (unsigned long)dback->num_refs);
3893                         } else {
3894                                 tback = to_tree_backref(back);
3895                                 fprintf(stderr, "Backref %llu parent %llu"
3896                                         " root %llu not found in extent tree\n",
3897                                         (unsigned long long)rec->start,
3898                                         (unsigned long long)tback->parent,
3899                                         (unsigned long long)tback->root);
3900                         }
3901                 }
3902                 if (!back->is_data && !back->found_ref) {
3903                         err = 1;
3904                         if (!print_errs)
3905                                 goto out;
3906                         tback = to_tree_backref(back);
3907                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3908                                 (unsigned long long)rec->start,
3909                                 back->full_backref ? "parent" : "root",
3910                                 back->full_backref ?
3911                                 (unsigned long long)tback->parent :
3912                                 (unsigned long long)tback->root, back);
3913                 }
3914                 if (back->is_data) {
3915                         dback = to_data_backref(back);
3916                         if (dback->found_ref != dback->num_refs) {
3917                                 err = 1;
3918                                 if (!print_errs)
3919                                         goto out;
3920                                 fprintf(stderr, "Incorrect local backref count"
3921                                         " on %llu %s %llu owner %llu"
3922                                         " offset %llu found %u wanted %u back %p\n",
3923                                         (unsigned long long)rec->start,
3924                                         back->full_backref ?
3925                                         "parent" : "root",
3926                                         back->full_backref ?
3927                                         (unsigned long long)dback->parent:
3928                                         (unsigned long long)dback->root,
3929                                         (unsigned long long)dback->owner,
3930                                         (unsigned long long)dback->offset,
3931                                         dback->found_ref, dback->num_refs, back);
3932                         }
3933                         if (dback->disk_bytenr != rec->start) {
3934                                 err = 1;
3935                                 if (!print_errs)
3936                                         goto out;
3937                                 fprintf(stderr, "Backref disk bytenr does not"
3938                                         " match extent record, bytenr=%llu, "
3939                                         "ref bytenr=%llu\n",
3940                                         (unsigned long long)rec->start,
3941                                         (unsigned long long)dback->disk_bytenr);
3942                         }
3943
3944                         if (dback->bytes != rec->nr) {
3945                                 err = 1;
3946                                 if (!print_errs)
3947                                         goto out;
3948                                 fprintf(stderr, "Backref bytes do not match "
3949                                         "extent backref, bytenr=%llu, ref "
3950                                         "bytes=%llu, backref bytes=%llu\n",
3951                                         (unsigned long long)rec->start,
3952                                         (unsigned long long)rec->nr,
3953                                         (unsigned long long)dback->bytes);
3954                         }
3955                 }
3956                 if (!back->is_data) {
3957                         found += 1;
3958                 } else {
3959                         dback = to_data_backref(back);
3960                         found += dback->found_ref;
3961                 }
3962         }
3963         if (found != rec->refs) {
3964                 err = 1;
3965                 if (!print_errs)
3966                         goto out;
3967                 fprintf(stderr, "Incorrect global backref count "
3968                         "on %llu found %llu wanted %llu\n",
3969                         (unsigned long long)rec->start,
3970                         (unsigned long long)found,
3971                         (unsigned long long)rec->refs);
3972         }
3973 out:
3974         return err;
3975 }
3976
3977 static void __free_one_backref(struct rb_node *node)
3978 {
3979         struct extent_backref *back = rb_node_to_extent_backref(node);
3980
3981         free(back);
3982 }
3983
3984 static void free_all_extent_backrefs(struct extent_record *rec)
3985 {
3986         rb_free_nodes(&rec->backref_tree, __free_one_backref);
3987 }
3988
3989 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3990                                      struct cache_tree *extent_cache)
3991 {
3992         struct cache_extent *cache;
3993         struct extent_record *rec;
3994
3995         while (1) {
3996                 cache = first_cache_extent(extent_cache);
3997                 if (!cache)
3998                         break;
3999                 rec = container_of(cache, struct extent_record, cache);
4000                 remove_cache_extent(extent_cache, cache);
4001                 free_all_extent_backrefs(rec);
4002                 free(rec);
4003         }
4004 }
4005
4006 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4007                                  struct extent_record *rec)
4008 {
4009         if (rec->content_checked && rec->owner_ref_checked &&
4010             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4011             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4012             !rec->bad_full_backref && !rec->crossing_stripes &&
4013             !rec->wrong_chunk_type) {
4014                 remove_cache_extent(extent_cache, &rec->cache);
4015                 free_all_extent_backrefs(rec);
4016                 list_del_init(&rec->list);
4017                 free(rec);
4018         }
4019         return 0;
4020 }
4021
4022 static int check_owner_ref(struct btrfs_root *root,
4023                             struct extent_record *rec,
4024                             struct extent_buffer *buf)
4025 {
4026         struct extent_backref *node, *tmp;
4027         struct tree_backref *back;
4028         struct btrfs_root *ref_root;
4029         struct btrfs_key key;
4030         struct btrfs_path path;
4031         struct extent_buffer *parent;
4032         int level;
4033         int found = 0;
4034         int ret;
4035
4036         rbtree_postorder_for_each_entry_safe(node, tmp,
4037                                              &rec->backref_tree, node) {
4038                 if (node->is_data)
4039                         continue;
4040                 if (!node->found_ref)
4041                         continue;
4042                 if (node->full_backref)
4043                         continue;
4044                 back = to_tree_backref(node);
4045                 if (btrfs_header_owner(buf) == back->root)
4046                         return 0;
4047         }
4048         BUG_ON(rec->is_root);
4049
4050         /* try to find the block by search corresponding fs tree */
4051         key.objectid = btrfs_header_owner(buf);
4052         key.type = BTRFS_ROOT_ITEM_KEY;
4053         key.offset = (u64)-1;
4054
4055         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4056         if (IS_ERR(ref_root))
4057                 return 1;
4058
4059         level = btrfs_header_level(buf);
4060         if (level == 0)
4061                 btrfs_item_key_to_cpu(buf, &key, 0);
4062         else
4063                 btrfs_node_key_to_cpu(buf, &key, 0);
4064
4065         btrfs_init_path(&path);
4066         path.lowest_level = level + 1;
4067         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4068         if (ret < 0)
4069                 return 0;
4070
4071         parent = path.nodes[level + 1];
4072         if (parent && buf->start == btrfs_node_blockptr(parent,
4073                                                         path.slots[level + 1]))
4074                 found = 1;
4075
4076         btrfs_release_path(&path);
4077         return found ? 0 : 1;
4078 }
4079
4080 static int is_extent_tree_record(struct extent_record *rec)
4081 {
4082         struct extent_backref *ref, *tmp;
4083         struct tree_backref *back;
4084         int is_extent = 0;
4085
4086         rbtree_postorder_for_each_entry_safe(ref, tmp,
4087                                              &rec->backref_tree, node) {
4088                 if (ref->is_data)
4089                         return 0;
4090                 back = to_tree_backref(ref);
4091                 if (ref->full_backref)
4092                         return 0;
4093                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4094                         is_extent = 1;
4095         }
4096         return is_extent;
4097 }
4098
4099
4100 static int record_bad_block_io(struct btrfs_fs_info *info,
4101                                struct cache_tree *extent_cache,
4102                                u64 start, u64 len)
4103 {
4104         struct extent_record *rec;
4105         struct cache_extent *cache;
4106         struct btrfs_key key;
4107
4108         cache = lookup_cache_extent(extent_cache, start, len);
4109         if (!cache)
4110                 return 0;
4111
4112         rec = container_of(cache, struct extent_record, cache);
4113         if (!is_extent_tree_record(rec))
4114                 return 0;
4115
4116         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4117         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4118 }
4119
4120 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4121                        struct extent_buffer *buf, int slot)
4122 {
4123         if (btrfs_header_level(buf)) {
4124                 struct btrfs_key_ptr ptr1, ptr2;
4125
4126                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4127                                    sizeof(struct btrfs_key_ptr));
4128                 read_extent_buffer(buf, &ptr2,
4129                                    btrfs_node_key_ptr_offset(slot + 1),
4130                                    sizeof(struct btrfs_key_ptr));
4131                 write_extent_buffer(buf, &ptr1,
4132                                     btrfs_node_key_ptr_offset(slot + 1),
4133                                     sizeof(struct btrfs_key_ptr));
4134                 write_extent_buffer(buf, &ptr2,
4135                                     btrfs_node_key_ptr_offset(slot),
4136                                     sizeof(struct btrfs_key_ptr));
4137                 if (slot == 0) {
4138                         struct btrfs_disk_key key;
4139                         btrfs_node_key(buf, &key, 0);
4140                         btrfs_fixup_low_keys(root, path, &key,
4141                                              btrfs_header_level(buf) + 1);
4142                 }
4143         } else {
4144                 struct btrfs_item *item1, *item2;
4145                 struct btrfs_key k1, k2;
4146                 char *item1_data, *item2_data;
4147                 u32 item1_offset, item2_offset, item1_size, item2_size;
4148
4149                 item1 = btrfs_item_nr(slot);
4150                 item2 = btrfs_item_nr(slot + 1);
4151                 btrfs_item_key_to_cpu(buf, &k1, slot);
4152                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4153                 item1_offset = btrfs_item_offset(buf, item1);
4154                 item2_offset = btrfs_item_offset(buf, item2);
4155                 item1_size = btrfs_item_size(buf, item1);
4156                 item2_size = btrfs_item_size(buf, item2);
4157
4158                 item1_data = malloc(item1_size);
4159                 if (!item1_data)
4160                         return -ENOMEM;
4161                 item2_data = malloc(item2_size);
4162                 if (!item2_data) {
4163                         free(item1_data);
4164                         return -ENOMEM;
4165                 }
4166
4167                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4168                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4169
4170                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4171                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4172                 free(item1_data);
4173                 free(item2_data);
4174
4175                 btrfs_set_item_offset(buf, item1, item2_offset);
4176                 btrfs_set_item_offset(buf, item2, item1_offset);
4177                 btrfs_set_item_size(buf, item1, item2_size);
4178                 btrfs_set_item_size(buf, item2, item1_size);
4179
4180                 path->slots[0] = slot;
4181                 btrfs_set_item_key_unsafe(root, path, &k2);
4182                 path->slots[0] = slot + 1;
4183                 btrfs_set_item_key_unsafe(root, path, &k1);
4184         }
4185         return 0;
4186 }
4187
4188 static int fix_key_order(struct btrfs_trans_handle *trans,
4189                          struct btrfs_root *root,
4190                          struct btrfs_path *path)
4191 {
4192         struct extent_buffer *buf;
4193         struct btrfs_key k1, k2;
4194         int i;
4195         int level = path->lowest_level;
4196         int ret = -EIO;
4197
4198         buf = path->nodes[level];
4199         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4200                 if (level) {
4201                         btrfs_node_key_to_cpu(buf, &k1, i);
4202                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4203                 } else {
4204                         btrfs_item_key_to_cpu(buf, &k1, i);
4205                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4206                 }
4207                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4208                         continue;
4209                 ret = swap_values(root, path, buf, i);
4210                 if (ret)
4211                         break;
4212                 btrfs_mark_buffer_dirty(buf);
4213                 i = 0;
4214         }
4215         return ret;
4216 }
4217
4218 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4219                              struct btrfs_root *root,
4220                              struct btrfs_path *path,
4221                              struct extent_buffer *buf, int slot)
4222 {
4223         struct btrfs_key key;
4224         int nritems = btrfs_header_nritems(buf);
4225
4226         btrfs_item_key_to_cpu(buf, &key, slot);
4227
4228         /* These are all the keys we can deal with missing. */
4229         if (key.type != BTRFS_DIR_INDEX_KEY &&
4230             key.type != BTRFS_EXTENT_ITEM_KEY &&
4231             key.type != BTRFS_METADATA_ITEM_KEY &&
4232             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4233             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4234                 return -1;
4235
4236         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4237                (unsigned long long)key.objectid, key.type,
4238                (unsigned long long)key.offset, slot, buf->start);
4239         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4240                               btrfs_item_nr_offset(slot + 1),
4241                               sizeof(struct btrfs_item) *
4242                               (nritems - slot - 1));
4243         btrfs_set_header_nritems(buf, nritems - 1);
4244         if (slot == 0) {
4245                 struct btrfs_disk_key disk_key;
4246
4247                 btrfs_item_key(buf, &disk_key, 0);
4248                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4249         }
4250         btrfs_mark_buffer_dirty(buf);
4251         return 0;
4252 }
4253
4254 static int fix_item_offset(struct btrfs_trans_handle *trans,
4255                            struct btrfs_root *root,
4256                            struct btrfs_path *path)
4257 {
4258         struct extent_buffer *buf;
4259         int i;
4260         int ret = 0;
4261
4262         /* We should only get this for leaves */
4263         BUG_ON(path->lowest_level);
4264         buf = path->nodes[0];
4265 again:
4266         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4267                 unsigned int shift = 0, offset;
4268
4269                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4270                     BTRFS_LEAF_DATA_SIZE(root)) {
4271                         if (btrfs_item_end_nr(buf, i) >
4272                             BTRFS_LEAF_DATA_SIZE(root)) {
4273                                 ret = delete_bogus_item(trans, root, path,
4274                                                         buf, i);
4275                                 if (!ret)
4276                                         goto again;
4277                                 fprintf(stderr, "item is off the end of the "
4278                                         "leaf, can't fix\n");
4279                                 ret = -EIO;
4280                                 break;
4281                         }
4282                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4283                                 btrfs_item_end_nr(buf, i);
4284                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4285                            btrfs_item_offset_nr(buf, i - 1)) {
4286                         if (btrfs_item_end_nr(buf, i) >
4287                             btrfs_item_offset_nr(buf, i - 1)) {
4288                                 ret = delete_bogus_item(trans, root, path,
4289                                                         buf, i);
4290                                 if (!ret)
4291                                         goto again;
4292                                 fprintf(stderr, "items overlap, can't fix\n");
4293                                 ret = -EIO;
4294                                 break;
4295                         }
4296                         shift = btrfs_item_offset_nr(buf, i - 1) -
4297                                 btrfs_item_end_nr(buf, i);
4298                 }
4299                 if (!shift)
4300                         continue;
4301
4302                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4303                        i, shift, (unsigned long long)buf->start);
4304                 offset = btrfs_item_offset_nr(buf, i);
4305                 memmove_extent_buffer(buf,
4306                                       btrfs_leaf_data(buf) + offset + shift,
4307                                       btrfs_leaf_data(buf) + offset,
4308                                       btrfs_item_size_nr(buf, i));
4309                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4310                                       offset + shift);
4311                 btrfs_mark_buffer_dirty(buf);
4312         }
4313
4314         /*
4315          * We may have moved things, in which case we want to exit so we don't
4316          * write those changes out.  Once we have proper abort functionality in
4317          * progs this can be changed to something nicer.
4318          */
4319         BUG_ON(ret);
4320         return ret;
4321 }
4322
4323 /*
4324  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4325  * then just return -EIO.
4326  */
4327 static int try_to_fix_bad_block(struct btrfs_root *root,
4328                                 struct extent_buffer *buf,
4329                                 enum btrfs_tree_block_status status)
4330 {
4331         struct btrfs_trans_handle *trans;
4332         struct ulist *roots;
4333         struct ulist_node *node;
4334         struct btrfs_root *search_root;
4335         struct btrfs_path *path;
4336         struct ulist_iterator iter;
4337         struct btrfs_key root_key, key;
4338         int ret;
4339
4340         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4341             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4342                 return -EIO;
4343
4344         path = btrfs_alloc_path();
4345         if (!path)
4346                 return -EIO;
4347
4348         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4349                                    0, &roots);
4350         if (ret) {
4351                 btrfs_free_path(path);
4352                 return -EIO;
4353         }
4354
4355         ULIST_ITER_INIT(&iter);
4356         while ((node = ulist_next(roots, &iter))) {
4357                 root_key.objectid = node->val;
4358                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4359                 root_key.offset = (u64)-1;
4360
4361                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4362                 if (IS_ERR(root)) {
4363                         ret = -EIO;
4364                         break;
4365                 }
4366
4367
4368                 trans = btrfs_start_transaction(search_root, 0);
4369                 if (IS_ERR(trans)) {
4370                         ret = PTR_ERR(trans);
4371                         break;
4372                 }
4373
4374                 path->lowest_level = btrfs_header_level(buf);
4375                 path->skip_check_block = 1;
4376                 if (path->lowest_level)
4377                         btrfs_node_key_to_cpu(buf, &key, 0);
4378                 else
4379                         btrfs_item_key_to_cpu(buf, &key, 0);
4380                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4381                 if (ret) {
4382                         ret = -EIO;
4383                         btrfs_commit_transaction(trans, search_root);
4384                         break;
4385                 }
4386                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4387                         ret = fix_key_order(trans, search_root, path);
4388                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4389                         ret = fix_item_offset(trans, search_root, path);
4390                 if (ret) {
4391                         btrfs_commit_transaction(trans, search_root);
4392                         break;
4393                 }
4394                 btrfs_release_path(path);
4395                 btrfs_commit_transaction(trans, search_root);
4396         }
4397         ulist_free(roots);
4398         btrfs_free_path(path);
4399         return ret;
4400 }
4401
4402 static int check_block(struct btrfs_root *root,
4403                        struct cache_tree *extent_cache,
4404                        struct extent_buffer *buf, u64 flags)
4405 {
4406         struct extent_record *rec;
4407         struct cache_extent *cache;
4408         struct btrfs_key key;
4409         enum btrfs_tree_block_status status;
4410         int ret = 0;
4411         int level;
4412
4413         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4414         if (!cache)
4415                 return 1;
4416         rec = container_of(cache, struct extent_record, cache);
4417         rec->generation = btrfs_header_generation(buf);
4418
4419         level = btrfs_header_level(buf);
4420         if (btrfs_header_nritems(buf) > 0) {
4421
4422                 if (level == 0)
4423                         btrfs_item_key_to_cpu(buf, &key, 0);
4424                 else
4425                         btrfs_node_key_to_cpu(buf, &key, 0);
4426
4427                 rec->info_objectid = key.objectid;
4428         }
4429         rec->info_level = level;
4430
4431         if (btrfs_is_leaf(buf))
4432                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4433         else
4434                 status = btrfs_check_node(root, &rec->parent_key, buf);
4435
4436         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4437                 if (repair)
4438                         status = try_to_fix_bad_block(root, buf, status);
4439                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4440                         ret = -EIO;
4441                         fprintf(stderr, "bad block %llu\n",
4442                                 (unsigned long long)buf->start);
4443                 } else {
4444                         /*
4445                          * Signal to callers we need to start the scan over
4446                          * again since we'll have cowed blocks.
4447                          */
4448                         ret = -EAGAIN;
4449                 }
4450         } else {
4451                 rec->content_checked = 1;
4452                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4453                         rec->owner_ref_checked = 1;
4454                 else {
4455                         ret = check_owner_ref(root, rec, buf);
4456                         if (!ret)
4457                                 rec->owner_ref_checked = 1;
4458                 }
4459         }
4460         if (!ret)
4461                 maybe_free_extent_rec(extent_cache, rec);
4462         return ret;
4463 }
4464
4465
4466 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4467                                                 u64 parent, u64 root)
4468 {
4469         struct rb_node *node;
4470         struct tree_backref *back = NULL;
4471         struct tree_backref match = {
4472                 .node = {
4473                         .is_data = 0,
4474                 },
4475         };
4476
4477         if (parent) {
4478                 match.parent = parent;
4479                 match.node.full_backref = 1;
4480         } else {
4481                 match.root = root;
4482         }
4483
4484         node = rb_search(&rec->backref_tree, &match.node.node,
4485                          (rb_compare_keys)compare_extent_backref, NULL);
4486         if (node)
4487                 back = to_tree_backref(rb_node_to_extent_backref(node));
4488
4489         return back;
4490 }
4491
4492 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4493                                                 u64 parent, u64 root)
4494 {
4495         struct tree_backref *ref = malloc(sizeof(*ref));
4496
4497         if (!ref)
4498                 return NULL;
4499         memset(&ref->node, 0, sizeof(ref->node));
4500         if (parent > 0) {
4501                 ref->parent = parent;
4502                 ref->node.full_backref = 1;
4503         } else {
4504                 ref->root = root;
4505                 ref->node.full_backref = 0;
4506         }
4507         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4508
4509         return ref;
4510 }
4511
4512 static struct data_backref *find_data_backref(struct extent_record *rec,
4513                                                 u64 parent, u64 root,
4514                                                 u64 owner, u64 offset,
4515                                                 int found_ref,
4516                                                 u64 disk_bytenr, u64 bytes)
4517 {
4518         struct rb_node *node;
4519         struct data_backref *back = NULL;
4520         struct data_backref match = {
4521                 .node = {
4522                         .is_data = 1,
4523                 },
4524                 .owner = owner,
4525                 .offset = offset,
4526                 .bytes = bytes,
4527                 .found_ref = found_ref,
4528                 .disk_bytenr = disk_bytenr,
4529         };
4530
4531         if (parent) {
4532                 match.parent = parent;
4533                 match.node.full_backref = 1;
4534         } else {
4535                 match.root = root;
4536         }
4537
4538         node = rb_search(&rec->backref_tree, &match.node.node,
4539                          (rb_compare_keys)compare_extent_backref, NULL);
4540         if (node)
4541                 back = to_data_backref(rb_node_to_extent_backref(node));
4542
4543         return back;
4544 }
4545
4546 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4547                                                 u64 parent, u64 root,
4548                                                 u64 owner, u64 offset,
4549                                                 u64 max_size)
4550 {
4551         struct data_backref *ref = malloc(sizeof(*ref));
4552
4553         if (!ref)
4554                 return NULL;
4555         memset(&ref->node, 0, sizeof(ref->node));
4556         ref->node.is_data = 1;
4557
4558         if (parent > 0) {
4559                 ref->parent = parent;
4560                 ref->owner = 0;
4561                 ref->offset = 0;
4562                 ref->node.full_backref = 1;
4563         } else {
4564                 ref->root = root;
4565                 ref->owner = owner;
4566                 ref->offset = offset;
4567                 ref->node.full_backref = 0;
4568         }
4569         ref->bytes = max_size;
4570         ref->found_ref = 0;
4571         ref->num_refs = 0;
4572         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4573         if (max_size > rec->max_size)
4574                 rec->max_size = max_size;
4575         return ref;
4576 }
4577
4578 /* Check if the type of extent matches with its chunk */
4579 static void check_extent_type(struct extent_record *rec)
4580 {
4581         struct btrfs_block_group_cache *bg_cache;
4582
4583         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4584         if (!bg_cache)
4585                 return;
4586
4587         /* data extent, check chunk directly*/
4588         if (!rec->metadata) {
4589                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4590                         rec->wrong_chunk_type = 1;
4591                 return;
4592         }
4593
4594         /* metadata extent, check the obvious case first */
4595         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4596                                  BTRFS_BLOCK_GROUP_METADATA))) {
4597                 rec->wrong_chunk_type = 1;
4598                 return;
4599         }
4600
4601         /*
4602          * Check SYSTEM extent, as it's also marked as metadata, we can only
4603          * make sure it's a SYSTEM extent by its backref
4604          */
4605         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4606                 struct extent_backref *node;
4607                 struct tree_backref *tback;
4608                 u64 bg_type;
4609
4610                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4611                 if (node->is_data) {
4612                         /* tree block shouldn't have data backref */
4613                         rec->wrong_chunk_type = 1;
4614                         return;
4615                 }
4616                 tback = container_of(node, struct tree_backref, node);
4617
4618                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4619                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4620                 else
4621                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4622                 if (!(bg_cache->flags & bg_type))
4623                         rec->wrong_chunk_type = 1;
4624         }
4625 }
4626
4627 /*
4628  * Allocate a new extent record, fill default values from @tmpl and insert int
4629  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4630  * the cache, otherwise it fails.
4631  */
4632 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4633                 struct extent_record *tmpl)
4634 {
4635         struct extent_record *rec;
4636         int ret = 0;
4637
4638         rec = malloc(sizeof(*rec));
4639         if (!rec)
4640                 return -ENOMEM;
4641         rec->start = tmpl->start;
4642         rec->max_size = tmpl->max_size;
4643         rec->nr = max(tmpl->nr, tmpl->max_size);
4644         rec->found_rec = tmpl->found_rec;
4645         rec->content_checked = tmpl->content_checked;
4646         rec->owner_ref_checked = tmpl->owner_ref_checked;
4647         rec->num_duplicates = 0;
4648         rec->metadata = tmpl->metadata;
4649         rec->flag_block_full_backref = FLAG_UNSET;
4650         rec->bad_full_backref = 0;
4651         rec->crossing_stripes = 0;
4652         rec->wrong_chunk_type = 0;
4653         rec->is_root = tmpl->is_root;
4654         rec->refs = tmpl->refs;
4655         rec->extent_item_refs = tmpl->extent_item_refs;
4656         rec->parent_generation = tmpl->parent_generation;
4657         INIT_LIST_HEAD(&rec->backrefs);
4658         INIT_LIST_HEAD(&rec->dups);
4659         INIT_LIST_HEAD(&rec->list);
4660         rec->backref_tree = RB_ROOT;
4661         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4662         rec->cache.start = tmpl->start;
4663         rec->cache.size = tmpl->nr;
4664         ret = insert_cache_extent(extent_cache, &rec->cache);
4665         BUG_ON(ret);
4666         bytes_used += rec->nr;
4667
4668         if (tmpl->metadata)
4669                 rec->crossing_stripes = check_crossing_stripes(rec->start,
4670                                 global_info->tree_root->nodesize);
4671         check_extent_type(rec);
4672         return ret;
4673 }
4674
4675 /*
4676  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4677  * some are hints:
4678  * - refs              - if found, increase refs
4679  * - is_root           - if found, set
4680  * - content_checked   - if found, set
4681  * - owner_ref_checked - if found, set
4682  *
4683  * If not found, create a new one, initialize and insert.
4684  */
4685 static int add_extent_rec(struct cache_tree *extent_cache,
4686                 struct extent_record *tmpl)
4687 {
4688         struct extent_record *rec;
4689         struct cache_extent *cache;
4690         int ret = 0;
4691         int dup = 0;
4692
4693         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4694         if (cache) {
4695                 rec = container_of(cache, struct extent_record, cache);
4696                 if (tmpl->refs)
4697                         rec->refs++;
4698                 if (rec->nr == 1)
4699                         rec->nr = max(tmpl->nr, tmpl->max_size);
4700
4701                 /*
4702                  * We need to make sure to reset nr to whatever the extent
4703                  * record says was the real size, this way we can compare it to
4704                  * the backrefs.
4705                  */
4706                 if (tmpl->found_rec) {
4707                         if (tmpl->start != rec->start || rec->found_rec) {
4708                                 struct extent_record *tmp;
4709
4710                                 dup = 1;
4711                                 if (list_empty(&rec->list))
4712                                         list_add_tail(&rec->list,
4713                                                       &duplicate_extents);
4714
4715                                 /*
4716                                  * We have to do this song and dance in case we
4717                                  * find an extent record that falls inside of
4718                                  * our current extent record but does not have
4719                                  * the same objectid.
4720                                  */
4721                                 tmp = malloc(sizeof(*tmp));
4722                                 if (!tmp)
4723                                         return -ENOMEM;
4724                                 tmp->start = tmpl->start;
4725                                 tmp->max_size = tmpl->max_size;
4726                                 tmp->nr = tmpl->nr;
4727                                 tmp->found_rec = 1;
4728                                 tmp->metadata = tmpl->metadata;
4729                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4730                                 INIT_LIST_HEAD(&tmp->list);
4731                                 list_add_tail(&tmp->list, &rec->dups);
4732                                 rec->num_duplicates++;
4733                         } else {
4734                                 rec->nr = tmpl->nr;
4735                                 rec->found_rec = 1;
4736                         }
4737                 }
4738
4739                 if (tmpl->extent_item_refs && !dup) {
4740                         if (rec->extent_item_refs) {
4741                                 fprintf(stderr, "block %llu rec "
4742                                         "extent_item_refs %llu, passed %llu\n",
4743                                         (unsigned long long)tmpl->start,
4744                                         (unsigned long long)
4745                                                         rec->extent_item_refs,
4746                                         (unsigned long long)tmpl->extent_item_refs);
4747                         }
4748                         rec->extent_item_refs = tmpl->extent_item_refs;
4749                 }
4750                 if (tmpl->is_root)
4751                         rec->is_root = 1;
4752                 if (tmpl->content_checked)
4753                         rec->content_checked = 1;
4754                 if (tmpl->owner_ref_checked)
4755                         rec->owner_ref_checked = 1;
4756                 memcpy(&rec->parent_key, &tmpl->parent_key,
4757                                 sizeof(tmpl->parent_key));
4758                 if (tmpl->parent_generation)
4759                         rec->parent_generation = tmpl->parent_generation;
4760                 if (rec->max_size < tmpl->max_size)
4761                         rec->max_size = tmpl->max_size;
4762
4763                 /*
4764                  * A metadata extent can't cross stripe_len boundary, otherwise
4765                  * kernel scrub won't be able to handle it.
4766                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4767                  * it.
4768                  */
4769                 if (tmpl->metadata)
4770                         rec->crossing_stripes = check_crossing_stripes(
4771                                 rec->start, global_info->tree_root->nodesize);
4772                 check_extent_type(rec);
4773                 maybe_free_extent_rec(extent_cache, rec);
4774                 return ret;
4775         }
4776
4777         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4778
4779         return ret;
4780 }
4781
4782 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4783                             u64 parent, u64 root, int found_ref)
4784 {
4785         struct extent_record *rec;
4786         struct tree_backref *back;
4787         struct cache_extent *cache;
4788
4789         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4790         if (!cache) {
4791                 struct extent_record tmpl;
4792
4793                 memset(&tmpl, 0, sizeof(tmpl));
4794                 tmpl.start = bytenr;
4795                 tmpl.nr = 1;
4796                 tmpl.metadata = 1;
4797
4798                 add_extent_rec_nolookup(extent_cache, &tmpl);
4799
4800                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4801                 if (!cache)
4802                         abort();
4803         }
4804
4805         rec = container_of(cache, struct extent_record, cache);
4806         if (rec->start != bytenr) {
4807                 abort();
4808         }
4809
4810         back = find_tree_backref(rec, parent, root);
4811         if (!back) {
4812                 back = alloc_tree_backref(rec, parent, root);
4813                 BUG_ON(!back);
4814         }
4815
4816         if (found_ref) {
4817                 if (back->node.found_ref) {
4818                         fprintf(stderr, "Extent back ref already exists "
4819                                 "for %llu parent %llu root %llu \n",
4820                                 (unsigned long long)bytenr,
4821                                 (unsigned long long)parent,
4822                                 (unsigned long long)root);
4823                 }
4824                 back->node.found_ref = 1;
4825         } else {
4826                 if (back->node.found_extent_tree) {
4827                         fprintf(stderr, "Extent back ref already exists "
4828                                 "for %llu parent %llu root %llu \n",
4829                                 (unsigned long long)bytenr,
4830                                 (unsigned long long)parent,
4831                                 (unsigned long long)root);
4832                 }
4833                 back->node.found_extent_tree = 1;
4834         }
4835         check_extent_type(rec);
4836         maybe_free_extent_rec(extent_cache, rec);
4837         return 0;
4838 }
4839
4840 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4841                             u64 parent, u64 root, u64 owner, u64 offset,
4842                             u32 num_refs, int found_ref, u64 max_size)
4843 {
4844         struct extent_record *rec;
4845         struct data_backref *back;
4846         struct cache_extent *cache;
4847
4848         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4849         if (!cache) {
4850                 struct extent_record tmpl;
4851
4852                 memset(&tmpl, 0, sizeof(tmpl));
4853                 tmpl.start = bytenr;
4854                 tmpl.nr = 1;
4855                 tmpl.max_size = max_size;
4856
4857                 add_extent_rec_nolookup(extent_cache, &tmpl);
4858
4859                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4860                 if (!cache)
4861                         abort();
4862         }
4863
4864         rec = container_of(cache, struct extent_record, cache);
4865         if (rec->max_size < max_size)
4866                 rec->max_size = max_size;
4867
4868         /*
4869          * If found_ref is set then max_size is the real size and must match the
4870          * existing refs.  So if we have already found a ref then we need to
4871          * make sure that this ref matches the existing one, otherwise we need
4872          * to add a new backref so we can notice that the backrefs don't match
4873          * and we need to figure out who is telling the truth.  This is to
4874          * account for that awful fsync bug I introduced where we'd end up with
4875          * a btrfs_file_extent_item that would have its length include multiple
4876          * prealloc extents or point inside of a prealloc extent.
4877          */
4878         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4879                                  bytenr, max_size);
4880         if (!back) {
4881                 back = alloc_data_backref(rec, parent, root, owner, offset,
4882                                           max_size);
4883                 BUG_ON(!back);
4884         }
4885
4886         if (found_ref) {
4887                 BUG_ON(num_refs != 1);
4888                 if (back->node.found_ref)
4889                         BUG_ON(back->bytes != max_size);
4890                 back->node.found_ref = 1;
4891                 back->found_ref += 1;
4892                 back->bytes = max_size;
4893                 back->disk_bytenr = bytenr;
4894                 rec->refs += 1;
4895                 rec->content_checked = 1;
4896                 rec->owner_ref_checked = 1;
4897         } else {
4898                 if (back->node.found_extent_tree) {
4899                         fprintf(stderr, "Extent back ref already exists "
4900                                 "for %llu parent %llu root %llu "
4901                                 "owner %llu offset %llu num_refs %lu\n",
4902                                 (unsigned long long)bytenr,
4903                                 (unsigned long long)parent,
4904                                 (unsigned long long)root,
4905                                 (unsigned long long)owner,
4906                                 (unsigned long long)offset,
4907                                 (unsigned long)num_refs);
4908                 }
4909                 back->num_refs = num_refs;
4910                 back->node.found_extent_tree = 1;
4911         }
4912         maybe_free_extent_rec(extent_cache, rec);
4913         return 0;
4914 }
4915
4916 static int add_pending(struct cache_tree *pending,
4917                        struct cache_tree *seen, u64 bytenr, u32 size)
4918 {
4919         int ret;
4920         ret = add_cache_extent(seen, bytenr, size);
4921         if (ret)
4922                 return ret;
4923         add_cache_extent(pending, bytenr, size);
4924         return 0;
4925 }
4926
4927 static int pick_next_pending(struct cache_tree *pending,
4928                         struct cache_tree *reada,
4929                         struct cache_tree *nodes,
4930                         u64 last, struct block_info *bits, int bits_nr,
4931                         int *reada_bits)
4932 {
4933         unsigned long node_start = last;
4934         struct cache_extent *cache;
4935         int ret;
4936
4937         cache = search_cache_extent(reada, 0);
4938         if (cache) {
4939                 bits[0].start = cache->start;
4940                 bits[0].size = cache->size;
4941                 *reada_bits = 1;
4942                 return 1;
4943         }
4944         *reada_bits = 0;
4945         if (node_start > 32768)
4946                 node_start -= 32768;
4947
4948         cache = search_cache_extent(nodes, node_start);
4949         if (!cache)
4950                 cache = search_cache_extent(nodes, 0);
4951
4952         if (!cache) {
4953                  cache = search_cache_extent(pending, 0);
4954                  if (!cache)
4955                          return 0;
4956                  ret = 0;
4957                  do {
4958                          bits[ret].start = cache->start;
4959                          bits[ret].size = cache->size;
4960                          cache = next_cache_extent(cache);
4961                          ret++;
4962                  } while (cache && ret < bits_nr);
4963                  return ret;
4964         }
4965
4966         ret = 0;
4967         do {
4968                 bits[ret].start = cache->start;
4969                 bits[ret].size = cache->size;
4970                 cache = next_cache_extent(cache);
4971                 ret++;
4972         } while (cache && ret < bits_nr);
4973
4974         if (bits_nr - ret > 8) {
4975                 u64 lookup = bits[0].start + bits[0].size;
4976                 struct cache_extent *next;
4977                 next = search_cache_extent(pending, lookup);
4978                 while(next) {
4979                         if (next->start - lookup > 32768)
4980                                 break;
4981                         bits[ret].start = next->start;
4982                         bits[ret].size = next->size;
4983                         lookup = next->start + next->size;
4984                         ret++;
4985                         if (ret == bits_nr)
4986                                 break;
4987                         next = next_cache_extent(next);
4988                         if (!next)
4989                                 break;
4990                 }
4991         }
4992         return ret;
4993 }
4994
4995 static void free_chunk_record(struct cache_extent *cache)
4996 {
4997         struct chunk_record *rec;
4998
4999         rec = container_of(cache, struct chunk_record, cache);
5000         list_del_init(&rec->list);
5001         list_del_init(&rec->dextents);
5002         free(rec);
5003 }
5004
5005 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5006 {
5007         cache_tree_free_extents(chunk_cache, free_chunk_record);
5008 }
5009
5010 static void free_device_record(struct rb_node *node)
5011 {
5012         struct device_record *rec;
5013
5014         rec = container_of(node, struct device_record, node);
5015         free(rec);
5016 }
5017
5018 FREE_RB_BASED_TREE(device_cache, free_device_record);
5019
5020 int insert_block_group_record(struct block_group_tree *tree,
5021                               struct block_group_record *bg_rec)
5022 {
5023         int ret;
5024
5025         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5026         if (ret)
5027                 return ret;
5028
5029         list_add_tail(&bg_rec->list, &tree->block_groups);
5030         return 0;
5031 }
5032
5033 static void free_block_group_record(struct cache_extent *cache)
5034 {
5035         struct block_group_record *rec;
5036
5037         rec = container_of(cache, struct block_group_record, cache);
5038         list_del_init(&rec->list);
5039         free(rec);
5040 }
5041
5042 void free_block_group_tree(struct block_group_tree *tree)
5043 {
5044         cache_tree_free_extents(&tree->tree, free_block_group_record);
5045 }
5046
5047 int insert_device_extent_record(struct device_extent_tree *tree,
5048                                 struct device_extent_record *de_rec)
5049 {
5050         int ret;
5051
5052         /*
5053          * Device extent is a bit different from the other extents, because
5054          * the extents which belong to the different devices may have the
5055          * same start and size, so we need use the special extent cache
5056          * search/insert functions.
5057          */
5058         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5059         if (ret)
5060                 return ret;
5061
5062         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5063         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5064         return 0;
5065 }
5066
5067 static void free_device_extent_record(struct cache_extent *cache)
5068 {
5069         struct device_extent_record *rec;
5070
5071         rec = container_of(cache, struct device_extent_record, cache);
5072         if (!list_empty(&rec->chunk_list))
5073                 list_del_init(&rec->chunk_list);
5074         if (!list_empty(&rec->device_list))
5075                 list_del_init(&rec->device_list);
5076         free(rec);
5077 }
5078
5079 void free_device_extent_tree(struct device_extent_tree *tree)
5080 {
5081         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5082 }
5083
5084 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5085 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5086                                  struct extent_buffer *leaf, int slot)
5087 {
5088         struct btrfs_extent_ref_v0 *ref0;
5089         struct btrfs_key key;
5090
5091         btrfs_item_key_to_cpu(leaf, &key, slot);
5092         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5093         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5094                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5095         } else {
5096                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5097                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5098         }
5099         return 0;
5100 }
5101 #endif
5102
5103 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5104                                             struct btrfs_key *key,
5105                                             int slot)
5106 {
5107         struct btrfs_chunk *ptr;
5108         struct chunk_record *rec;
5109         int num_stripes, i;
5110
5111         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5112         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5113
5114         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5115         if (!rec) {
5116                 fprintf(stderr, "memory allocation failed\n");
5117                 exit(-1);
5118         }
5119
5120         INIT_LIST_HEAD(&rec->list);
5121         INIT_LIST_HEAD(&rec->dextents);
5122         rec->bg_rec = NULL;
5123
5124         rec->cache.start = key->offset;
5125         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5126
5127         rec->generation = btrfs_header_generation(leaf);
5128
5129         rec->objectid = key->objectid;
5130         rec->type = key->type;
5131         rec->offset = key->offset;
5132
5133         rec->length = rec->cache.size;
5134         rec->owner = btrfs_chunk_owner(leaf, ptr);
5135         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5136         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5137         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5138         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5139         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5140         rec->num_stripes = num_stripes;
5141         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5142
5143         for (i = 0; i < rec->num_stripes; ++i) {
5144                 rec->stripes[i].devid =
5145                         btrfs_stripe_devid_nr(leaf, ptr, i);
5146                 rec->stripes[i].offset =
5147                         btrfs_stripe_offset_nr(leaf, ptr, i);
5148                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5149                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5150                                 BTRFS_UUID_SIZE);
5151         }
5152
5153         return rec;
5154 }
5155
5156 static int process_chunk_item(struct cache_tree *chunk_cache,
5157                               struct btrfs_key *key, struct extent_buffer *eb,
5158                               int slot)
5159 {
5160         struct chunk_record *rec;
5161         int ret = 0;
5162
5163         rec = btrfs_new_chunk_record(eb, key, slot);
5164         ret = insert_cache_extent(chunk_cache, &rec->cache);
5165         if (ret) {
5166                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5167                         rec->offset, rec->length);
5168                 free(rec);
5169         }
5170
5171         return ret;
5172 }
5173
5174 static int process_device_item(struct rb_root *dev_cache,
5175                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5176 {
5177         struct btrfs_dev_item *ptr;
5178         struct device_record *rec;
5179         int ret = 0;
5180
5181         ptr = btrfs_item_ptr(eb,
5182                 slot, struct btrfs_dev_item);
5183
5184         rec = malloc(sizeof(*rec));
5185         if (!rec) {
5186                 fprintf(stderr, "memory allocation failed\n");
5187                 return -ENOMEM;
5188         }
5189
5190         rec->devid = key->offset;
5191         rec->generation = btrfs_header_generation(eb);
5192
5193         rec->objectid = key->objectid;
5194         rec->type = key->type;
5195         rec->offset = key->offset;
5196
5197         rec->devid = btrfs_device_id(eb, ptr);
5198         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5199         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5200
5201         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5202         if (ret) {
5203                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5204                 free(rec);
5205         }
5206
5207         return ret;
5208 }
5209
5210 struct block_group_record *
5211 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5212                              int slot)
5213 {
5214         struct btrfs_block_group_item *ptr;
5215         struct block_group_record *rec;
5216
5217         rec = calloc(1, sizeof(*rec));
5218         if (!rec) {
5219                 fprintf(stderr, "memory allocation failed\n");
5220                 exit(-1);
5221         }
5222
5223         rec->cache.start = key->objectid;
5224         rec->cache.size = key->offset;
5225
5226         rec->generation = btrfs_header_generation(leaf);
5227
5228         rec->objectid = key->objectid;
5229         rec->type = key->type;
5230         rec->offset = key->offset;
5231
5232         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5233         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5234
5235         INIT_LIST_HEAD(&rec->list);
5236
5237         return rec;
5238 }
5239
5240 static int process_block_group_item(struct block_group_tree *block_group_cache,
5241                                     struct btrfs_key *key,
5242                                     struct extent_buffer *eb, int slot)
5243 {
5244         struct block_group_record *rec;
5245         int ret = 0;
5246
5247         rec = btrfs_new_block_group_record(eb, key, slot);
5248         ret = insert_block_group_record(block_group_cache, rec);
5249         if (ret) {
5250                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5251                         rec->objectid, rec->offset);
5252                 free(rec);
5253         }
5254
5255         return ret;
5256 }
5257
5258 struct device_extent_record *
5259 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5260                                struct btrfs_key *key, int slot)
5261 {
5262         struct device_extent_record *rec;
5263         struct btrfs_dev_extent *ptr;
5264
5265         rec = calloc(1, sizeof(*rec));
5266         if (!rec) {
5267                 fprintf(stderr, "memory allocation failed\n");
5268                 exit(-1);
5269         }
5270
5271         rec->cache.objectid = key->objectid;
5272         rec->cache.start = key->offset;
5273
5274         rec->generation = btrfs_header_generation(leaf);
5275
5276         rec->objectid = key->objectid;
5277         rec->type = key->type;
5278         rec->offset = key->offset;
5279
5280         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5281         rec->chunk_objecteid =
5282                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5283         rec->chunk_offset =
5284                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5285         rec->length = btrfs_dev_extent_length(leaf, ptr);
5286         rec->cache.size = rec->length;
5287
5288         INIT_LIST_HEAD(&rec->chunk_list);
5289         INIT_LIST_HEAD(&rec->device_list);
5290
5291         return rec;
5292 }
5293
5294 static int
5295 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5296                            struct btrfs_key *key, struct extent_buffer *eb,
5297                            int slot)
5298 {
5299         struct device_extent_record *rec;
5300         int ret;
5301
5302         rec = btrfs_new_device_extent_record(eb, key, slot);
5303         ret = insert_device_extent_record(dev_extent_cache, rec);
5304         if (ret) {
5305                 fprintf(stderr,
5306                         "Device extent[%llu, %llu, %llu] existed.\n",
5307                         rec->objectid, rec->offset, rec->length);
5308                 free(rec);
5309         }
5310
5311         return ret;
5312 }
5313
5314 static int process_extent_item(struct btrfs_root *root,
5315                                struct cache_tree *extent_cache,
5316                                struct extent_buffer *eb, int slot)
5317 {
5318         struct btrfs_extent_item *ei;
5319         struct btrfs_extent_inline_ref *iref;
5320         struct btrfs_extent_data_ref *dref;
5321         struct btrfs_shared_data_ref *sref;
5322         struct btrfs_key key;
5323         struct extent_record tmpl;
5324         unsigned long end;
5325         unsigned long ptr;
5326         int type;
5327         u32 item_size = btrfs_item_size_nr(eb, slot);
5328         u64 refs = 0;
5329         u64 offset;
5330         u64 num_bytes;
5331         int metadata = 0;
5332
5333         btrfs_item_key_to_cpu(eb, &key, slot);
5334
5335         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5336                 metadata = 1;
5337                 num_bytes = root->nodesize;
5338         } else {
5339                 num_bytes = key.offset;
5340         }
5341
5342         if (item_size < sizeof(*ei)) {
5343 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5344                 struct btrfs_extent_item_v0 *ei0;
5345                 BUG_ON(item_size != sizeof(*ei0));
5346                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5347                 refs = btrfs_extent_refs_v0(eb, ei0);
5348 #else
5349                 BUG();
5350 #endif
5351                 memset(&tmpl, 0, sizeof(tmpl));
5352                 tmpl.start = key.objectid;
5353                 tmpl.nr = num_bytes;
5354                 tmpl.extent_item_refs = refs;
5355                 tmpl.metadata = metadata;
5356                 tmpl.found_rec = 1;
5357                 tmpl.max_size = num_bytes;
5358
5359                 return add_extent_rec(extent_cache, &tmpl);
5360         }
5361
5362         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5363         refs = btrfs_extent_refs(eb, ei);
5364         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5365                 metadata = 1;
5366         else
5367                 metadata = 0;
5368
5369         memset(&tmpl, 0, sizeof(tmpl));
5370         tmpl.start = key.objectid;
5371         tmpl.nr = num_bytes;
5372         tmpl.extent_item_refs = refs;
5373         tmpl.metadata = metadata;
5374         tmpl.found_rec = 1;
5375         tmpl.max_size = num_bytes;
5376         add_extent_rec(extent_cache, &tmpl);
5377
5378         ptr = (unsigned long)(ei + 1);
5379         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5380             key.type == BTRFS_EXTENT_ITEM_KEY)
5381                 ptr += sizeof(struct btrfs_tree_block_info);
5382
5383         end = (unsigned long)ei + item_size;
5384         while (ptr < end) {
5385                 iref = (struct btrfs_extent_inline_ref *)ptr;
5386                 type = btrfs_extent_inline_ref_type(eb, iref);
5387                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5388                 switch (type) {
5389                 case BTRFS_TREE_BLOCK_REF_KEY:
5390                         add_tree_backref(extent_cache, key.objectid,
5391                                          0, offset, 0);
5392                         break;
5393                 case BTRFS_SHARED_BLOCK_REF_KEY:
5394                         add_tree_backref(extent_cache, key.objectid,
5395                                          offset, 0, 0);
5396                         break;
5397                 case BTRFS_EXTENT_DATA_REF_KEY:
5398                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5399                         add_data_backref(extent_cache, key.objectid, 0,
5400                                         btrfs_extent_data_ref_root(eb, dref),
5401                                         btrfs_extent_data_ref_objectid(eb,
5402                                                                        dref),
5403                                         btrfs_extent_data_ref_offset(eb, dref),
5404                                         btrfs_extent_data_ref_count(eb, dref),
5405                                         0, num_bytes);
5406                         break;
5407                 case BTRFS_SHARED_DATA_REF_KEY:
5408                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5409                         add_data_backref(extent_cache, key.objectid, offset,
5410                                         0, 0, 0,
5411                                         btrfs_shared_data_ref_count(eb, sref),
5412                                         0, num_bytes);
5413                         break;
5414                 default:
5415                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5416                                 key.objectid, key.type, num_bytes);
5417                         goto out;
5418                 }
5419                 ptr += btrfs_extent_inline_ref_size(type);
5420         }
5421         WARN_ON(ptr > end);
5422 out:
5423         return 0;
5424 }
5425
5426 static int check_cache_range(struct btrfs_root *root,
5427                              struct btrfs_block_group_cache *cache,
5428                              u64 offset, u64 bytes)
5429 {
5430         struct btrfs_free_space *entry;
5431         u64 *logical;
5432         u64 bytenr;
5433         int stripe_len;
5434         int i, nr, ret;
5435
5436         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5437                 bytenr = btrfs_sb_offset(i);
5438                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5439                                        cache->key.objectid, bytenr, 0,
5440                                        &logical, &nr, &stripe_len);
5441                 if (ret)
5442                         return ret;
5443
5444                 while (nr--) {
5445                         if (logical[nr] + stripe_len <= offset)
5446                                 continue;
5447                         if (offset + bytes <= logical[nr])
5448                                 continue;
5449                         if (logical[nr] == offset) {
5450                                 if (stripe_len >= bytes) {
5451                                         kfree(logical);
5452                                         return 0;
5453                                 }
5454                                 bytes -= stripe_len;
5455                                 offset += stripe_len;
5456                         } else if (logical[nr] < offset) {
5457                                 if (logical[nr] + stripe_len >=
5458                                     offset + bytes) {
5459                                         kfree(logical);
5460                                         return 0;
5461                                 }
5462                                 bytes = (offset + bytes) -
5463                                         (logical[nr] + stripe_len);
5464                                 offset = logical[nr] + stripe_len;
5465                         } else {
5466                                 /*
5467                                  * Could be tricky, the super may land in the
5468                                  * middle of the area we're checking.  First
5469                                  * check the easiest case, it's at the end.
5470                                  */
5471                                 if (logical[nr] + stripe_len >=
5472                                     bytes + offset) {
5473                                         bytes = logical[nr] - offset;
5474                                         continue;
5475                                 }
5476
5477                                 /* Check the left side */
5478                                 ret = check_cache_range(root, cache,
5479                                                         offset,
5480                                                         logical[nr] - offset);
5481                                 if (ret) {
5482                                         kfree(logical);
5483                                         return ret;
5484                                 }
5485
5486                                 /* Now we continue with the right side */
5487                                 bytes = (offset + bytes) -
5488                                         (logical[nr] + stripe_len);
5489                                 offset = logical[nr] + stripe_len;
5490                         }
5491                 }
5492
5493                 kfree(logical);
5494         }
5495
5496         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5497         if (!entry) {
5498                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5499                         offset, offset+bytes);
5500                 return -EINVAL;
5501         }
5502
5503         if (entry->offset != offset) {
5504                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5505                         entry->offset);
5506                 return -EINVAL;
5507         }
5508
5509         if (entry->bytes != bytes) {
5510                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5511                         bytes, entry->bytes, offset);
5512                 return -EINVAL;
5513         }
5514
5515         unlink_free_space(cache->free_space_ctl, entry);
5516         free(entry);
5517         return 0;
5518 }
5519
5520 static int verify_space_cache(struct btrfs_root *root,
5521                               struct btrfs_block_group_cache *cache)
5522 {
5523         struct btrfs_path *path;
5524         struct extent_buffer *leaf;
5525         struct btrfs_key key;
5526         u64 last;
5527         int ret = 0;
5528
5529         path = btrfs_alloc_path();
5530         if (!path)
5531                 return -ENOMEM;
5532
5533         root = root->fs_info->extent_root;
5534
5535         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5536
5537         key.objectid = last;
5538         key.offset = 0;
5539         key.type = BTRFS_EXTENT_ITEM_KEY;
5540
5541         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5542         if (ret < 0)
5543                 goto out;
5544         ret = 0;
5545         while (1) {
5546                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5547                         ret = btrfs_next_leaf(root, path);
5548                         if (ret < 0)
5549                                 goto out;
5550                         if (ret > 0) {
5551                                 ret = 0;
5552                                 break;
5553                         }
5554                 }
5555                 leaf = path->nodes[0];
5556                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5557                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5558                         break;
5559                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5560                     key.type != BTRFS_METADATA_ITEM_KEY) {
5561                         path->slots[0]++;
5562                         continue;
5563                 }
5564
5565                 if (last == key.objectid) {
5566                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5567                                 last = key.objectid + key.offset;
5568                         else
5569                                 last = key.objectid + root->nodesize;
5570                         path->slots[0]++;
5571                         continue;
5572                 }
5573
5574                 ret = check_cache_range(root, cache, last,
5575                                         key.objectid - last);
5576                 if (ret)
5577                         break;
5578                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5579                         last = key.objectid + key.offset;
5580                 else
5581                         last = key.objectid + root->nodesize;
5582                 path->slots[0]++;
5583         }
5584
5585         if (last < cache->key.objectid + cache->key.offset)
5586                 ret = check_cache_range(root, cache, last,
5587                                         cache->key.objectid +
5588                                         cache->key.offset - last);
5589
5590 out:
5591         btrfs_free_path(path);
5592
5593         if (!ret &&
5594             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5595                 fprintf(stderr, "There are still entries left in the space "
5596                         "cache\n");
5597                 ret = -EINVAL;
5598         }
5599
5600         return ret;
5601 }
5602
5603 static int check_space_cache(struct btrfs_root *root)
5604 {
5605         struct btrfs_block_group_cache *cache;
5606         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5607         int ret;
5608         int error = 0;
5609
5610         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5611             btrfs_super_generation(root->fs_info->super_copy) !=
5612             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5613                 printf("cache and super generation don't match, space cache "
5614                        "will be invalidated\n");
5615                 return 0;
5616         }
5617
5618         if (ctx.progress_enabled) {
5619                 ctx.tp = TASK_FREE_SPACE;
5620                 task_start(ctx.info);
5621         }
5622
5623         while (1) {
5624                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5625                 if (!cache)
5626                         break;
5627
5628                 start = cache->key.objectid + cache->key.offset;
5629                 if (!cache->free_space_ctl) {
5630                         if (btrfs_init_free_space_ctl(cache,
5631                                                       root->sectorsize)) {
5632                                 ret = -ENOMEM;
5633                                 break;
5634                         }
5635                 } else {
5636                         btrfs_remove_free_space_cache(cache);
5637                 }
5638
5639                 if (btrfs_fs_compat_ro(root->fs_info,
5640                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5641                         ret = exclude_super_stripes(root, cache);
5642                         if (ret) {
5643                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5644                                         strerror(-ret));
5645                                 error++;
5646                                 continue;
5647                         }
5648                         ret = load_free_space_tree(root->fs_info, cache);
5649                         free_excluded_extents(root, cache);
5650                         if (ret < 0) {
5651                                 fprintf(stderr, "could not load free space tree: %s\n",
5652                                         strerror(-ret));
5653                                 error++;
5654                                 continue;
5655                         }
5656                         error += ret;
5657                 } else {
5658                         ret = load_free_space_cache(root->fs_info, cache);
5659                         if (!ret)
5660                                 continue;
5661                 }
5662
5663                 ret = verify_space_cache(root, cache);
5664                 if (ret) {
5665                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5666                                 cache->key.objectid);
5667                         error++;
5668                 }
5669         }
5670
5671         task_stop(ctx.info);
5672
5673         return error ? -EINVAL : 0;
5674 }
5675
5676 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5677                         u64 num_bytes, unsigned long leaf_offset,
5678                         struct extent_buffer *eb) {
5679
5680         u64 offset = 0;
5681         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5682         char *data;
5683         unsigned long csum_offset;
5684         u32 csum;
5685         u32 csum_expected;
5686         u64 read_len;
5687         u64 data_checked = 0;
5688         u64 tmp;
5689         int ret = 0;
5690         int mirror;
5691         int num_copies;
5692
5693         if (num_bytes % root->sectorsize)
5694                 return -EINVAL;
5695
5696         data = malloc(num_bytes);
5697         if (!data)
5698                 return -ENOMEM;
5699
5700         while (offset < num_bytes) {
5701                 mirror = 0;
5702 again:
5703                 read_len = num_bytes - offset;
5704                 /* read as much space once a time */
5705                 ret = read_extent_data(root, data + offset,
5706                                 bytenr + offset, &read_len, mirror);
5707                 if (ret)
5708                         goto out;
5709                 data_checked = 0;
5710                 /* verify every 4k data's checksum */
5711                 while (data_checked < read_len) {
5712                         csum = ~(u32)0;
5713                         tmp = offset + data_checked;
5714
5715                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5716                                                csum, root->sectorsize);
5717                         btrfs_csum_final(csum, (char *)&csum);
5718
5719                         csum_offset = leaf_offset +
5720                                  tmp / root->sectorsize * csum_size;
5721                         read_extent_buffer(eb, (char *)&csum_expected,
5722                                            csum_offset, csum_size);
5723                         /* try another mirror */
5724                         if (csum != csum_expected) {
5725                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5726                                                 mirror, bytenr + tmp,
5727                                                 csum, csum_expected);
5728                                 num_copies = btrfs_num_copies(
5729                                                 &root->fs_info->mapping_tree,
5730                                                 bytenr, num_bytes);
5731                                 if (mirror < num_copies - 1) {
5732                                         mirror += 1;
5733                                         goto again;
5734                                 }
5735                         }
5736                         data_checked += root->sectorsize;
5737                 }
5738                 offset += read_len;
5739         }
5740 out:
5741         free(data);
5742         return ret;
5743 }
5744
5745 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5746                                u64 num_bytes)
5747 {
5748         struct btrfs_path *path;
5749         struct extent_buffer *leaf;
5750         struct btrfs_key key;
5751         int ret;
5752
5753         path = btrfs_alloc_path();
5754         if (!path) {
5755                 fprintf(stderr, "Error allocating path\n");
5756                 return -ENOMEM;
5757         }
5758
5759         key.objectid = bytenr;
5760         key.type = BTRFS_EXTENT_ITEM_KEY;
5761         key.offset = (u64)-1;
5762
5763 again:
5764         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5765                                 0, 0);
5766         if (ret < 0) {
5767                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5768                 btrfs_free_path(path);
5769                 return ret;
5770         } else if (ret) {
5771                 if (path->slots[0] > 0) {
5772                         path->slots[0]--;
5773                 } else {
5774                         ret = btrfs_prev_leaf(root, path);
5775                         if (ret < 0) {
5776                                 goto out;
5777                         } else if (ret > 0) {
5778                                 ret = 0;
5779                                 goto out;
5780                         }
5781                 }
5782         }
5783
5784         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5785
5786         /*
5787          * Block group items come before extent items if they have the same
5788          * bytenr, so walk back one more just in case.  Dear future traveller,
5789          * first congrats on mastering time travel.  Now if it's not too much
5790          * trouble could you go back to 2006 and tell Chris to make the
5791          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5792          * EXTENT_ITEM_KEY please?
5793          */
5794         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5795                 if (path->slots[0] > 0) {
5796                         path->slots[0]--;
5797                 } else {
5798                         ret = btrfs_prev_leaf(root, path);
5799                         if (ret < 0) {
5800                                 goto out;
5801                         } else if (ret > 0) {
5802                                 ret = 0;
5803                                 goto out;
5804                         }
5805                 }
5806                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5807         }
5808
5809         while (num_bytes) {
5810                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5811                         ret = btrfs_next_leaf(root, path);
5812                         if (ret < 0) {
5813                                 fprintf(stderr, "Error going to next leaf "
5814                                         "%d\n", ret);
5815                                 btrfs_free_path(path);
5816                                 return ret;
5817                         } else if (ret) {
5818                                 break;
5819                         }
5820                 }
5821                 leaf = path->nodes[0];
5822                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5823                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5824                         path->slots[0]++;
5825                         continue;
5826                 }
5827                 if (key.objectid + key.offset < bytenr) {
5828                         path->slots[0]++;
5829                         continue;
5830                 }
5831                 if (key.objectid > bytenr + num_bytes)
5832                         break;
5833
5834                 if (key.objectid == bytenr) {
5835                         if (key.offset >= num_bytes) {
5836                                 num_bytes = 0;
5837                                 break;
5838                         }
5839                         num_bytes -= key.offset;
5840                         bytenr += key.offset;
5841                 } else if (key.objectid < bytenr) {
5842                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5843                                 num_bytes = 0;
5844                                 break;
5845                         }
5846                         num_bytes = (bytenr + num_bytes) -
5847                                 (key.objectid + key.offset);
5848                         bytenr = key.objectid + key.offset;
5849                 } else {
5850                         if (key.objectid + key.offset < bytenr + num_bytes) {
5851                                 u64 new_start = key.objectid + key.offset;
5852                                 u64 new_bytes = bytenr + num_bytes - new_start;
5853
5854                                 /*
5855                                  * Weird case, the extent is in the middle of
5856                                  * our range, we'll have to search one side
5857                                  * and then the other.  Not sure if this happens
5858                                  * in real life, but no harm in coding it up
5859                                  * anyway just in case.
5860                                  */
5861                                 btrfs_release_path(path);
5862                                 ret = check_extent_exists(root, new_start,
5863                                                           new_bytes);
5864                                 if (ret) {
5865                                         fprintf(stderr, "Right section didn't "
5866                                                 "have a record\n");
5867                                         break;
5868                                 }
5869                                 num_bytes = key.objectid - bytenr;
5870                                 goto again;
5871                         }
5872                         num_bytes = key.objectid - bytenr;
5873                 }
5874                 path->slots[0]++;
5875         }
5876         ret = 0;
5877
5878 out:
5879         if (num_bytes && !ret) {
5880                 fprintf(stderr, "There are no extents for csum range "
5881                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5882                 ret = 1;
5883         }
5884
5885         btrfs_free_path(path);
5886         return ret;
5887 }
5888
5889 static int check_csums(struct btrfs_root *root)
5890 {
5891         struct btrfs_path *path;
5892         struct extent_buffer *leaf;
5893         struct btrfs_key key;
5894         u64 offset = 0, num_bytes = 0;
5895         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5896         int errors = 0;
5897         int ret;
5898         u64 data_len;
5899         unsigned long leaf_offset;
5900
5901         root = root->fs_info->csum_root;
5902         if (!extent_buffer_uptodate(root->node)) {
5903                 fprintf(stderr, "No valid csum tree found\n");
5904                 return -ENOENT;
5905         }
5906
5907         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5908         key.type = BTRFS_EXTENT_CSUM_KEY;
5909         key.offset = 0;
5910
5911         path = btrfs_alloc_path();
5912         if (!path)
5913                 return -ENOMEM;
5914
5915         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5916         if (ret < 0) {
5917                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5918                 btrfs_free_path(path);
5919                 return ret;
5920         }
5921
5922         if (ret > 0 && path->slots[0])
5923                 path->slots[0]--;
5924         ret = 0;
5925
5926         while (1) {
5927                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5928                         ret = btrfs_next_leaf(root, path);
5929                         if (ret < 0) {
5930                                 fprintf(stderr, "Error going to next leaf "
5931                                         "%d\n", ret);
5932                                 break;
5933                         }
5934                         if (ret)
5935                                 break;
5936                 }
5937                 leaf = path->nodes[0];
5938
5939                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5940                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5941                         path->slots[0]++;
5942                         continue;
5943                 }
5944
5945                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5946                               csum_size) * root->sectorsize;
5947                 if (!check_data_csum)
5948                         goto skip_csum_check;
5949                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5950                 ret = check_extent_csums(root, key.offset, data_len,
5951                                          leaf_offset, leaf);
5952                 if (ret)
5953                         break;
5954 skip_csum_check:
5955                 if (!num_bytes) {
5956                         offset = key.offset;
5957                 } else if (key.offset != offset + num_bytes) {
5958                         ret = check_extent_exists(root, offset, num_bytes);
5959                         if (ret) {
5960                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5961                                         "there is no extent record\n",
5962                                         offset, offset+num_bytes);
5963                                 errors++;
5964                         }
5965                         offset = key.offset;
5966                         num_bytes = 0;
5967                 }
5968                 num_bytes += data_len;
5969                 path->slots[0]++;
5970         }
5971
5972         btrfs_free_path(path);
5973         return errors;
5974 }
5975
5976 static int is_dropped_key(struct btrfs_key *key,
5977                           struct btrfs_key *drop_key) {
5978         if (key->objectid < drop_key->objectid)
5979                 return 1;
5980         else if (key->objectid == drop_key->objectid) {
5981                 if (key->type < drop_key->type)
5982                         return 1;
5983                 else if (key->type == drop_key->type) {
5984                         if (key->offset < drop_key->offset)
5985                                 return 1;
5986                 }
5987         }
5988         return 0;
5989 }
5990
5991 /*
5992  * Here are the rules for FULL_BACKREF.
5993  *
5994  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5995  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5996  *      FULL_BACKREF set.
5997  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
5998  *    if it happened after the relocation occurred since we'll have dropped the
5999  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6000  *    have no real way to know for sure.
6001  *
6002  * We process the blocks one root at a time, and we start from the lowest root
6003  * objectid and go to the highest.  So we can just lookup the owner backref for
6004  * the record and if we don't find it then we know it doesn't exist and we have
6005  * a FULL BACKREF.
6006  *
6007  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6008  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6009  * be set or not and then we can check later once we've gathered all the refs.
6010  */
6011 static int calc_extent_flag(struct btrfs_root *root,
6012                            struct cache_tree *extent_cache,
6013                            struct extent_buffer *buf,
6014                            struct root_item_record *ri,
6015                            u64 *flags)
6016 {
6017         struct extent_record *rec;
6018         struct cache_extent *cache;
6019         struct tree_backref *tback;
6020         u64 owner = 0;
6021
6022         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6023         /* we have added this extent before */
6024         BUG_ON(!cache);
6025         rec = container_of(cache, struct extent_record, cache);
6026
6027         /*
6028          * Except file/reloc tree, we can not have
6029          * FULL BACKREF MODE
6030          */
6031         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6032                 goto normal;
6033         /*
6034          * root node
6035          */
6036         if (buf->start == ri->bytenr)
6037                 goto normal;
6038
6039         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6040                 goto full_backref;
6041
6042         owner = btrfs_header_owner(buf);
6043         if (owner == ri->objectid)
6044                 goto normal;
6045
6046         tback = find_tree_backref(rec, 0, owner);
6047         if (!tback)
6048                 goto full_backref;
6049 normal:
6050         *flags = 0;
6051         if (rec->flag_block_full_backref != FLAG_UNSET &&
6052             rec->flag_block_full_backref != 0)
6053                 rec->bad_full_backref = 1;
6054         return 0;
6055 full_backref:
6056         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6057         if (rec->flag_block_full_backref != FLAG_UNSET &&
6058             rec->flag_block_full_backref != 1)
6059                 rec->bad_full_backref = 1;
6060         return 0;
6061 }
6062
6063 static int run_next_block(struct btrfs_root *root,
6064                           struct block_info *bits,
6065                           int bits_nr,
6066                           u64 *last,
6067                           struct cache_tree *pending,
6068                           struct cache_tree *seen,
6069                           struct cache_tree *reada,
6070                           struct cache_tree *nodes,
6071                           struct cache_tree *extent_cache,
6072                           struct cache_tree *chunk_cache,
6073                           struct rb_root *dev_cache,
6074                           struct block_group_tree *block_group_cache,
6075                           struct device_extent_tree *dev_extent_cache,
6076                           struct root_item_record *ri)
6077 {
6078         struct extent_buffer *buf;
6079         struct extent_record *rec = NULL;
6080         u64 bytenr;
6081         u32 size;
6082         u64 parent;
6083         u64 owner;
6084         u64 flags;
6085         u64 ptr;
6086         u64 gen = 0;
6087         int ret = 0;
6088         int i;
6089         int nritems;
6090         struct btrfs_key key;
6091         struct cache_extent *cache;
6092         int reada_bits;
6093
6094         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6095                                     bits_nr, &reada_bits);
6096         if (nritems == 0)
6097                 return 1;
6098
6099         if (!reada_bits) {
6100                 for(i = 0; i < nritems; i++) {
6101                         ret = add_cache_extent(reada, bits[i].start,
6102                                                bits[i].size);
6103                         if (ret == -EEXIST)
6104                                 continue;
6105
6106                         /* fixme, get the parent transid */
6107                         readahead_tree_block(root, bits[i].start,
6108                                              bits[i].size, 0);
6109                 }
6110         }
6111         *last = bits[0].start;
6112         bytenr = bits[0].start;
6113         size = bits[0].size;
6114
6115         cache = lookup_cache_extent(pending, bytenr, size);
6116         if (cache) {
6117                 remove_cache_extent(pending, cache);
6118                 free(cache);
6119         }
6120         cache = lookup_cache_extent(reada, bytenr, size);
6121         if (cache) {
6122                 remove_cache_extent(reada, cache);
6123                 free(cache);
6124         }
6125         cache = lookup_cache_extent(nodes, bytenr, size);
6126         if (cache) {
6127                 remove_cache_extent(nodes, cache);
6128                 free(cache);
6129         }
6130         cache = lookup_cache_extent(extent_cache, bytenr, size);
6131         if (cache) {
6132                 rec = container_of(cache, struct extent_record, cache);
6133                 gen = rec->parent_generation;
6134         }
6135
6136         /* fixme, get the real parent transid */
6137         buf = read_tree_block(root, bytenr, size, gen);
6138         if (!extent_buffer_uptodate(buf)) {
6139                 record_bad_block_io(root->fs_info,
6140                                     extent_cache, bytenr, size);
6141                 goto out;
6142         }
6143
6144         nritems = btrfs_header_nritems(buf);
6145
6146         flags = 0;
6147         if (!init_extent_tree) {
6148                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6149                                        btrfs_header_level(buf), 1, NULL,
6150                                        &flags);
6151                 if (ret < 0) {
6152                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6153                         if (ret < 0) {
6154                                 fprintf(stderr, "Couldn't calc extent flags\n");
6155                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6156                         }
6157                 }
6158         } else {
6159                 flags = 0;
6160                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6161                 if (ret < 0) {
6162                         fprintf(stderr, "Couldn't calc extent flags\n");
6163                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6164                 }
6165         }
6166
6167         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6168                 if (ri != NULL &&
6169                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6170                     ri->objectid == btrfs_header_owner(buf)) {
6171                         /*
6172                          * Ok we got to this block from it's original owner and
6173                          * we have FULL_BACKREF set.  Relocation can leave
6174                          * converted blocks over so this is altogether possible,
6175                          * however it's not possible if the generation > the
6176                          * last snapshot, so check for this case.
6177                          */
6178                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6179                             btrfs_header_generation(buf) > ri->last_snapshot) {
6180                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6181                                 rec->bad_full_backref = 1;
6182                         }
6183                 }
6184         } else {
6185                 if (ri != NULL &&
6186                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6187                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6188                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6189                         rec->bad_full_backref = 1;
6190                 }
6191         }
6192
6193         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6194                 rec->flag_block_full_backref = 1;
6195                 parent = bytenr;
6196                 owner = 0;
6197         } else {
6198                 rec->flag_block_full_backref = 0;
6199                 parent = 0;
6200                 owner = btrfs_header_owner(buf);
6201         }
6202
6203         ret = check_block(root, extent_cache, buf, flags);
6204         if (ret)
6205                 goto out;
6206
6207         if (btrfs_is_leaf(buf)) {
6208                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6209                 for (i = 0; i < nritems; i++) {
6210                         struct btrfs_file_extent_item *fi;
6211                         btrfs_item_key_to_cpu(buf, &key, i);
6212                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6213                                 process_extent_item(root, extent_cache, buf,
6214                                                     i);
6215                                 continue;
6216                         }
6217                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6218                                 process_extent_item(root, extent_cache, buf,
6219                                                     i);
6220                                 continue;
6221                         }
6222                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6223                                 total_csum_bytes +=
6224                                         btrfs_item_size_nr(buf, i);
6225                                 continue;
6226                         }
6227                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6228                                 process_chunk_item(chunk_cache, &key, buf, i);
6229                                 continue;
6230                         }
6231                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6232                                 process_device_item(dev_cache, &key, buf, i);
6233                                 continue;
6234                         }
6235                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6236                                 process_block_group_item(block_group_cache,
6237                                         &key, buf, i);
6238                                 continue;
6239                         }
6240                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6241                                 process_device_extent_item(dev_extent_cache,
6242                                         &key, buf, i);
6243                                 continue;
6244
6245                         }
6246                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6247 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6248                                 process_extent_ref_v0(extent_cache, buf, i);
6249 #else
6250                                 BUG();
6251 #endif
6252                                 continue;
6253                         }
6254
6255                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6256                                 add_tree_backref(extent_cache, key.objectid, 0,
6257                                                  key.offset, 0);
6258                                 continue;
6259                         }
6260                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6261                                 add_tree_backref(extent_cache, key.objectid,
6262                                                  key.offset, 0, 0);
6263                                 continue;
6264                         }
6265                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6266                                 struct btrfs_extent_data_ref *ref;
6267                                 ref = btrfs_item_ptr(buf, i,
6268                                                 struct btrfs_extent_data_ref);
6269                                 add_data_backref(extent_cache,
6270                                         key.objectid, 0,
6271                                         btrfs_extent_data_ref_root(buf, ref),
6272                                         btrfs_extent_data_ref_objectid(buf,
6273                                                                        ref),
6274                                         btrfs_extent_data_ref_offset(buf, ref),
6275                                         btrfs_extent_data_ref_count(buf, ref),
6276                                         0, root->sectorsize);
6277                                 continue;
6278                         }
6279                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6280                                 struct btrfs_shared_data_ref *ref;
6281                                 ref = btrfs_item_ptr(buf, i,
6282                                                 struct btrfs_shared_data_ref);
6283                                 add_data_backref(extent_cache,
6284                                         key.objectid, key.offset, 0, 0, 0,
6285                                         btrfs_shared_data_ref_count(buf, ref),
6286                                         0, root->sectorsize);
6287                                 continue;
6288                         }
6289                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6290                                 struct bad_item *bad;
6291
6292                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6293                                         continue;
6294                                 if (!owner)
6295                                         continue;
6296                                 bad = malloc(sizeof(struct bad_item));
6297                                 if (!bad)
6298                                         continue;
6299                                 INIT_LIST_HEAD(&bad->list);
6300                                 memcpy(&bad->key, &key,
6301                                        sizeof(struct btrfs_key));
6302                                 bad->root_id = owner;
6303                                 list_add_tail(&bad->list, &delete_items);
6304                                 continue;
6305                         }
6306                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6307                                 continue;
6308                         fi = btrfs_item_ptr(buf, i,
6309                                             struct btrfs_file_extent_item);
6310                         if (btrfs_file_extent_type(buf, fi) ==
6311                             BTRFS_FILE_EXTENT_INLINE)
6312                                 continue;
6313                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6314                                 continue;
6315
6316                         data_bytes_allocated +=
6317                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6318                         if (data_bytes_allocated < root->sectorsize) {
6319                                 abort();
6320                         }
6321                         data_bytes_referenced +=
6322                                 btrfs_file_extent_num_bytes(buf, fi);
6323                         add_data_backref(extent_cache,
6324                                 btrfs_file_extent_disk_bytenr(buf, fi),
6325                                 parent, owner, key.objectid, key.offset -
6326                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6327                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6328                 }
6329         } else {
6330                 int level;
6331                 struct btrfs_key first_key;
6332
6333                 first_key.objectid = 0;
6334
6335                 if (nritems > 0)
6336                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6337                 level = btrfs_header_level(buf);
6338                 for (i = 0; i < nritems; i++) {
6339                         struct extent_record tmpl;
6340
6341                         ptr = btrfs_node_blockptr(buf, i);
6342                         size = root->nodesize;
6343                         btrfs_node_key_to_cpu(buf, &key, i);
6344                         if (ri != NULL) {
6345                                 if ((level == ri->drop_level)
6346                                     && is_dropped_key(&key, &ri->drop_key)) {
6347                                         continue;
6348                                 }
6349                         }
6350
6351                         memset(&tmpl, 0, sizeof(tmpl));
6352                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6353                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6354                         tmpl.start = ptr;
6355                         tmpl.nr = size;
6356                         tmpl.refs = 1;
6357                         tmpl.metadata = 1;
6358                         tmpl.max_size = size;
6359                         ret = add_extent_rec(extent_cache, &tmpl);
6360                         BUG_ON(ret);
6361
6362                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6363
6364                         if (level > 1) {
6365                                 add_pending(nodes, seen, ptr, size);
6366                         } else {
6367                                 add_pending(pending, seen, ptr, size);
6368                         }
6369                 }
6370                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6371                                       nritems) * sizeof(struct btrfs_key_ptr);
6372         }
6373         total_btree_bytes += buf->len;
6374         if (fs_root_objectid(btrfs_header_owner(buf)))
6375                 total_fs_tree_bytes += buf->len;
6376         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6377                 total_extent_tree_bytes += buf->len;
6378         if (!found_old_backref &&
6379             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6380             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6381             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6382                 found_old_backref = 1;
6383 out:
6384         free_extent_buffer(buf);
6385         return ret;
6386 }
6387
6388 static int add_root_to_pending(struct extent_buffer *buf,
6389                                struct cache_tree *extent_cache,
6390                                struct cache_tree *pending,
6391                                struct cache_tree *seen,
6392                                struct cache_tree *nodes,
6393                                u64 objectid)
6394 {
6395         struct extent_record tmpl;
6396
6397         if (btrfs_header_level(buf) > 0)
6398                 add_pending(nodes, seen, buf->start, buf->len);
6399         else
6400                 add_pending(pending, seen, buf->start, buf->len);
6401
6402         memset(&tmpl, 0, sizeof(tmpl));
6403         tmpl.start = buf->start;
6404         tmpl.nr = buf->len;
6405         tmpl.is_root = 1;
6406         tmpl.refs = 1;
6407         tmpl.metadata = 1;
6408         tmpl.max_size = buf->len;
6409         add_extent_rec(extent_cache, &tmpl);
6410
6411         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6412             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6413                 add_tree_backref(extent_cache, buf->start, buf->start,
6414                                  0, 1);
6415         else
6416                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6417         return 0;
6418 }
6419
6420 /* as we fix the tree, we might be deleting blocks that
6421  * we're tracking for repair.  This hook makes sure we
6422  * remove any backrefs for blocks as we are fixing them.
6423  */
6424 static int free_extent_hook(struct btrfs_trans_handle *trans,
6425                             struct btrfs_root *root,
6426                             u64 bytenr, u64 num_bytes, u64 parent,
6427                             u64 root_objectid, u64 owner, u64 offset,
6428                             int refs_to_drop)
6429 {
6430         struct extent_record *rec;
6431         struct cache_extent *cache;
6432         int is_data;
6433         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6434
6435         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6436         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6437         if (!cache)
6438                 return 0;
6439
6440         rec = container_of(cache, struct extent_record, cache);
6441         if (is_data) {
6442                 struct data_backref *back;
6443                 back = find_data_backref(rec, parent, root_objectid, owner,
6444                                          offset, 1, bytenr, num_bytes);
6445                 if (!back)
6446                         goto out;
6447                 if (back->node.found_ref) {
6448                         back->found_ref -= refs_to_drop;
6449                         if (rec->refs)
6450                                 rec->refs -= refs_to_drop;
6451                 }
6452                 if (back->node.found_extent_tree) {
6453                         back->num_refs -= refs_to_drop;
6454                         if (rec->extent_item_refs)
6455                                 rec->extent_item_refs -= refs_to_drop;
6456                 }
6457                 if (back->found_ref == 0)
6458                         back->node.found_ref = 0;
6459                 if (back->num_refs == 0)
6460                         back->node.found_extent_tree = 0;
6461
6462                 if (!back->node.found_extent_tree && back->node.found_ref) {
6463                         rb_erase(&back->node.node, &rec->backref_tree);
6464                         free(back);
6465                 }
6466         } else {
6467                 struct tree_backref *back;
6468                 back = find_tree_backref(rec, parent, root_objectid);
6469                 if (!back)
6470                         goto out;
6471                 if (back->node.found_ref) {
6472                         if (rec->refs)
6473                                 rec->refs--;
6474                         back->node.found_ref = 0;
6475                 }
6476                 if (back->node.found_extent_tree) {
6477                         if (rec->extent_item_refs)
6478                                 rec->extent_item_refs--;
6479                         back->node.found_extent_tree = 0;
6480                 }
6481                 if (!back->node.found_extent_tree && back->node.found_ref) {
6482                         rb_erase(&back->node.node, &rec->backref_tree);
6483                         free(back);
6484                 }
6485         }
6486         maybe_free_extent_rec(extent_cache, rec);
6487 out:
6488         return 0;
6489 }
6490
6491 static int delete_extent_records(struct btrfs_trans_handle *trans,
6492                                  struct btrfs_root *root,
6493                                  struct btrfs_path *path,
6494                                  u64 bytenr, u64 new_len)
6495 {
6496         struct btrfs_key key;
6497         struct btrfs_key found_key;
6498         struct extent_buffer *leaf;
6499         int ret;
6500         int slot;
6501
6502
6503         key.objectid = bytenr;
6504         key.type = (u8)-1;
6505         key.offset = (u64)-1;
6506
6507         while(1) {
6508                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6509                                         &key, path, 0, 1);
6510                 if (ret < 0)
6511                         break;
6512
6513                 if (ret > 0) {
6514                         ret = 0;
6515                         if (path->slots[0] == 0)
6516                                 break;
6517                         path->slots[0]--;
6518                 }
6519                 ret = 0;
6520
6521                 leaf = path->nodes[0];
6522                 slot = path->slots[0];
6523
6524                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6525                 if (found_key.objectid != bytenr)
6526                         break;
6527
6528                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6529                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6530                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6531                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6532                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6533                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6534                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6535                         btrfs_release_path(path);
6536                         if (found_key.type == 0) {
6537                                 if (found_key.offset == 0)
6538                                         break;
6539                                 key.offset = found_key.offset - 1;
6540                                 key.type = found_key.type;
6541                         }
6542                         key.type = found_key.type - 1;
6543                         key.offset = (u64)-1;
6544                         continue;
6545                 }
6546
6547                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6548                         found_key.objectid, found_key.type, found_key.offset);
6549
6550                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6551                 if (ret)
6552                         break;
6553                 btrfs_release_path(path);
6554
6555                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6556                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6557                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6558                                 found_key.offset : root->nodesize;
6559
6560                         ret = btrfs_update_block_group(trans, root, bytenr,
6561                                                        bytes, 0, 0);
6562                         if (ret)
6563                                 break;
6564                 }
6565         }
6566
6567         btrfs_release_path(path);
6568         return ret;
6569 }
6570
6571 /*
6572  * for a single backref, this will allocate a new extent
6573  * and add the backref to it.
6574  */
6575 static int record_extent(struct btrfs_trans_handle *trans,
6576                          struct btrfs_fs_info *info,
6577                          struct btrfs_path *path,
6578                          struct extent_record *rec,
6579                          struct extent_backref *back,
6580                          int allocated, u64 flags)
6581 {
6582         int ret;
6583         struct btrfs_root *extent_root = info->extent_root;
6584         struct extent_buffer *leaf;
6585         struct btrfs_key ins_key;
6586         struct btrfs_extent_item *ei;
6587         struct tree_backref *tback;
6588         struct data_backref *dback;
6589         struct btrfs_tree_block_info *bi;
6590
6591         if (!back->is_data)
6592                 rec->max_size = max_t(u64, rec->max_size,
6593                                     info->extent_root->nodesize);
6594
6595         if (!allocated) {
6596                 u32 item_size = sizeof(*ei);
6597
6598                 if (!back->is_data)
6599                         item_size += sizeof(*bi);
6600
6601                 ins_key.objectid = rec->start;
6602                 ins_key.offset = rec->max_size;
6603                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6604
6605                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6606                                         &ins_key, item_size);
6607                 if (ret)
6608                         goto fail;
6609
6610                 leaf = path->nodes[0];
6611                 ei = btrfs_item_ptr(leaf, path->slots[0],
6612                                     struct btrfs_extent_item);
6613
6614                 btrfs_set_extent_refs(leaf, ei, 0);
6615                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6616
6617                 if (back->is_data) {
6618                         btrfs_set_extent_flags(leaf, ei,
6619                                                BTRFS_EXTENT_FLAG_DATA);
6620                 } else {
6621                         struct btrfs_disk_key copy_key;;
6622
6623                         tback = to_tree_backref(back);
6624                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6625                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6626                                              sizeof(*bi));
6627
6628                         btrfs_set_disk_key_objectid(&copy_key,
6629                                                     rec->info_objectid);
6630                         btrfs_set_disk_key_type(&copy_key, 0);
6631                         btrfs_set_disk_key_offset(&copy_key, 0);
6632
6633                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6634                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6635
6636                         btrfs_set_extent_flags(leaf, ei,
6637                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6638                 }
6639
6640                 btrfs_mark_buffer_dirty(leaf);
6641                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6642                                                rec->max_size, 1, 0);
6643                 if (ret)
6644                         goto fail;
6645                 btrfs_release_path(path);
6646         }
6647
6648         if (back->is_data) {
6649                 u64 parent;
6650                 int i;
6651
6652                 dback = to_data_backref(back);
6653                 if (back->full_backref)
6654                         parent = dback->parent;
6655                 else
6656                         parent = 0;
6657
6658                 for (i = 0; i < dback->found_ref; i++) {
6659                         /* if parent != 0, we're doing a full backref
6660                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6661                          * just makes the backref allocator create a data
6662                          * backref
6663                          */
6664                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6665                                                    rec->start, rec->max_size,
6666                                                    parent,
6667                                                    dback->root,
6668                                                    parent ?
6669                                                    BTRFS_FIRST_FREE_OBJECTID :
6670                                                    dback->owner,
6671                                                    dback->offset);
6672                         if (ret)
6673                                 break;
6674                 }
6675                 fprintf(stderr, "adding new data backref"
6676                                 " on %llu %s %llu owner %llu"
6677                                 " offset %llu found %d\n",
6678                                 (unsigned long long)rec->start,
6679                                 back->full_backref ?
6680                                 "parent" : "root",
6681                                 back->full_backref ?
6682                                 (unsigned long long)parent :
6683                                 (unsigned long long)dback->root,
6684                                 (unsigned long long)dback->owner,
6685                                 (unsigned long long)dback->offset,
6686                                 dback->found_ref);
6687         } else {
6688                 u64 parent;
6689
6690                 tback = to_tree_backref(back);
6691                 if (back->full_backref)
6692                         parent = tback->parent;
6693                 else
6694                         parent = 0;
6695
6696                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6697                                            rec->start, rec->max_size,
6698                                            parent, tback->root, 0, 0);
6699                 fprintf(stderr, "adding new tree backref on "
6700                         "start %llu len %llu parent %llu root %llu\n",
6701                         rec->start, rec->max_size, parent, tback->root);
6702         }
6703 fail:
6704         btrfs_release_path(path);
6705         return ret;
6706 }
6707
6708 static struct extent_entry *find_entry(struct list_head *entries,
6709                                        u64 bytenr, u64 bytes)
6710 {
6711         struct extent_entry *entry = NULL;
6712
6713         list_for_each_entry(entry, entries, list) {
6714                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6715                         return entry;
6716         }
6717
6718         return NULL;
6719 }
6720
6721 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6722 {
6723         struct extent_entry *entry, *best = NULL, *prev = NULL;
6724
6725         list_for_each_entry(entry, entries, list) {
6726                 if (!prev) {
6727                         prev = entry;
6728                         continue;
6729                 }
6730
6731                 /*
6732                  * If there are as many broken entries as entries then we know
6733                  * not to trust this particular entry.
6734                  */
6735                 if (entry->broken == entry->count)
6736                         continue;
6737
6738                 /*
6739                  * If our current entry == best then we can't be sure our best
6740                  * is really the best, so we need to keep searching.
6741                  */
6742                 if (best && best->count == entry->count) {
6743                         prev = entry;
6744                         best = NULL;
6745                         continue;
6746                 }
6747
6748                 /* Prev == entry, not good enough, have to keep searching */
6749                 if (!prev->broken && prev->count == entry->count)
6750                         continue;
6751
6752                 if (!best)
6753                         best = (prev->count > entry->count) ? prev : entry;
6754                 else if (best->count < entry->count)
6755                         best = entry;
6756                 prev = entry;
6757         }
6758
6759         return best;
6760 }
6761
6762 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6763                       struct data_backref *dback, struct extent_entry *entry)
6764 {
6765         struct btrfs_trans_handle *trans;
6766         struct btrfs_root *root;
6767         struct btrfs_file_extent_item *fi;
6768         struct extent_buffer *leaf;
6769         struct btrfs_key key;
6770         u64 bytenr, bytes;
6771         int ret, err;
6772
6773         key.objectid = dback->root;
6774         key.type = BTRFS_ROOT_ITEM_KEY;
6775         key.offset = (u64)-1;
6776         root = btrfs_read_fs_root(info, &key);
6777         if (IS_ERR(root)) {
6778                 fprintf(stderr, "Couldn't find root for our ref\n");
6779                 return -EINVAL;
6780         }
6781
6782         /*
6783          * The backref points to the original offset of the extent if it was
6784          * split, so we need to search down to the offset we have and then walk
6785          * forward until we find the backref we're looking for.
6786          */
6787         key.objectid = dback->owner;
6788         key.type = BTRFS_EXTENT_DATA_KEY;
6789         key.offset = dback->offset;
6790         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6791         if (ret < 0) {
6792                 fprintf(stderr, "Error looking up ref %d\n", ret);
6793                 return ret;
6794         }
6795
6796         while (1) {
6797                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6798                         ret = btrfs_next_leaf(root, path);
6799                         if (ret) {
6800                                 fprintf(stderr, "Couldn't find our ref, next\n");
6801                                 return -EINVAL;
6802                         }
6803                 }
6804                 leaf = path->nodes[0];
6805                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6806                 if (key.objectid != dback->owner ||
6807                     key.type != BTRFS_EXTENT_DATA_KEY) {
6808                         fprintf(stderr, "Couldn't find our ref, search\n");
6809                         return -EINVAL;
6810                 }
6811                 fi = btrfs_item_ptr(leaf, path->slots[0],
6812                                     struct btrfs_file_extent_item);
6813                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6814                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6815
6816                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6817                         break;
6818                 path->slots[0]++;
6819         }
6820
6821         btrfs_release_path(path);
6822
6823         trans = btrfs_start_transaction(root, 1);
6824         if (IS_ERR(trans))
6825                 return PTR_ERR(trans);
6826
6827         /*
6828          * Ok we have the key of the file extent we want to fix, now we can cow
6829          * down to the thing and fix it.
6830          */
6831         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6832         if (ret < 0) {
6833                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6834                         key.objectid, key.type, key.offset, ret);
6835                 goto out;
6836         }
6837         if (ret > 0) {
6838                 fprintf(stderr, "Well that's odd, we just found this key "
6839                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6840                         key.offset);
6841                 ret = -EINVAL;
6842                 goto out;
6843         }
6844         leaf = path->nodes[0];
6845         fi = btrfs_item_ptr(leaf, path->slots[0],
6846                             struct btrfs_file_extent_item);
6847
6848         if (btrfs_file_extent_compression(leaf, fi) &&
6849             dback->disk_bytenr != entry->bytenr) {
6850                 fprintf(stderr, "Ref doesn't match the record start and is "
6851                         "compressed, please take a btrfs-image of this file "
6852                         "system and send it to a btrfs developer so they can "
6853                         "complete this functionality for bytenr %Lu\n",
6854                         dback->disk_bytenr);
6855                 ret = -EINVAL;
6856                 goto out;
6857         }
6858
6859         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6860                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6861         } else if (dback->disk_bytenr > entry->bytenr) {
6862                 u64 off_diff, offset;
6863
6864                 off_diff = dback->disk_bytenr - entry->bytenr;
6865                 offset = btrfs_file_extent_offset(leaf, fi);
6866                 if (dback->disk_bytenr + offset +
6867                     btrfs_file_extent_num_bytes(leaf, fi) >
6868                     entry->bytenr + entry->bytes) {
6869                         fprintf(stderr, "Ref is past the entry end, please "
6870                                 "take a btrfs-image of this file system and "
6871                                 "send it to a btrfs developer, ref %Lu\n",
6872                                 dback->disk_bytenr);
6873                         ret = -EINVAL;
6874                         goto out;
6875                 }
6876                 offset += off_diff;
6877                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6878                 btrfs_set_file_extent_offset(leaf, fi, offset);
6879         } else if (dback->disk_bytenr < entry->bytenr) {
6880                 u64 offset;
6881
6882                 offset = btrfs_file_extent_offset(leaf, fi);
6883                 if (dback->disk_bytenr + offset < entry->bytenr) {
6884                         fprintf(stderr, "Ref is before the entry start, please"
6885                                 " take a btrfs-image of this file system and "
6886                                 "send it to a btrfs developer, ref %Lu\n",
6887                                 dback->disk_bytenr);
6888                         ret = -EINVAL;
6889                         goto out;
6890                 }
6891
6892                 offset += dback->disk_bytenr;
6893                 offset -= entry->bytenr;
6894                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6895                 btrfs_set_file_extent_offset(leaf, fi, offset);
6896         }
6897
6898         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6899
6900         /*
6901          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6902          * only do this if we aren't using compression, otherwise it's a
6903          * trickier case.
6904          */
6905         if (!btrfs_file_extent_compression(leaf, fi))
6906                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6907         else
6908                 printf("ram bytes may be wrong?\n");
6909         btrfs_mark_buffer_dirty(leaf);
6910 out:
6911         err = btrfs_commit_transaction(trans, root);
6912         btrfs_release_path(path);
6913         return ret ? ret : err;
6914 }
6915
6916 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6917                            struct extent_record *rec)
6918 {
6919         struct extent_backref *back, *tmp;
6920         struct data_backref *dback;
6921         struct extent_entry *entry, *best = NULL;
6922         LIST_HEAD(entries);
6923         int nr_entries = 0;
6924         int broken_entries = 0;
6925         int ret = 0;
6926         short mismatch = 0;
6927
6928         /*
6929          * Metadata is easy and the backrefs should always agree on bytenr and
6930          * size, if not we've got bigger issues.
6931          */
6932         if (rec->metadata)
6933                 return 0;
6934
6935         rbtree_postorder_for_each_entry_safe(back, tmp,
6936                                              &rec->backref_tree, node) {
6937                 if (back->full_backref || !back->is_data)
6938                         continue;
6939
6940                 dback = to_data_backref(back);
6941
6942                 /*
6943                  * We only pay attention to backrefs that we found a real
6944                  * backref for.
6945                  */
6946                 if (dback->found_ref == 0)
6947                         continue;
6948
6949                 /*
6950                  * For now we only catch when the bytes don't match, not the
6951                  * bytenr.  We can easily do this at the same time, but I want
6952                  * to have a fs image to test on before we just add repair
6953                  * functionality willy-nilly so we know we won't screw up the
6954                  * repair.
6955                  */
6956
6957                 entry = find_entry(&entries, dback->disk_bytenr,
6958                                    dback->bytes);
6959                 if (!entry) {
6960                         entry = malloc(sizeof(struct extent_entry));
6961                         if (!entry) {
6962                                 ret = -ENOMEM;
6963                                 goto out;
6964                         }
6965                         memset(entry, 0, sizeof(*entry));
6966                         entry->bytenr = dback->disk_bytenr;
6967                         entry->bytes = dback->bytes;
6968                         list_add_tail(&entry->list, &entries);
6969                         nr_entries++;
6970                 }
6971
6972                 /*
6973                  * If we only have on entry we may think the entries agree when
6974                  * in reality they don't so we have to do some extra checking.
6975                  */
6976                 if (dback->disk_bytenr != rec->start ||
6977                     dback->bytes != rec->nr || back->broken)
6978                         mismatch = 1;
6979
6980                 if (back->broken) {
6981                         entry->broken++;
6982                         broken_entries++;
6983                 }
6984
6985                 entry->count++;
6986         }
6987
6988         /* Yay all the backrefs agree, carry on good sir */
6989         if (nr_entries <= 1 && !mismatch)
6990                 goto out;
6991
6992         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6993                 "%Lu\n", rec->start);
6994
6995         /*
6996          * First we want to see if the backrefs can agree amongst themselves who
6997          * is right, so figure out which one of the entries has the highest
6998          * count.
6999          */
7000         best = find_most_right_entry(&entries);
7001
7002         /*
7003          * Ok so we may have an even split between what the backrefs think, so
7004          * this is where we use the extent ref to see what it thinks.
7005          */
7006         if (!best) {
7007                 entry = find_entry(&entries, rec->start, rec->nr);
7008                 if (!entry && (!broken_entries || !rec->found_rec)) {
7009                         fprintf(stderr, "Backrefs don't agree with each other "
7010                                 "and extent record doesn't agree with anybody,"
7011                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7012                                 rec->start, rec->nr);
7013                         ret = -EINVAL;
7014                         goto out;
7015                 } else if (!entry) {
7016                         /*
7017                          * Ok our backrefs were broken, we'll assume this is the
7018                          * correct value and add an entry for this range.
7019                          */
7020                         entry = malloc(sizeof(struct extent_entry));
7021                         if (!entry) {
7022                                 ret = -ENOMEM;
7023                                 goto out;
7024                         }
7025                         memset(entry, 0, sizeof(*entry));
7026                         entry->bytenr = rec->start;
7027                         entry->bytes = rec->nr;
7028                         list_add_tail(&entry->list, &entries);
7029                         nr_entries++;
7030                 }
7031                 entry->count++;
7032                 best = find_most_right_entry(&entries);
7033                 if (!best) {
7034                         fprintf(stderr, "Backrefs and extent record evenly "
7035                                 "split on who is right, this is going to "
7036                                 "require user input to fix bytenr %Lu bytes "
7037                                 "%Lu\n", rec->start, rec->nr);
7038                         ret = -EINVAL;
7039                         goto out;
7040                 }
7041         }
7042
7043         /*
7044          * I don't think this can happen currently as we'll abort() if we catch
7045          * this case higher up, but in case somebody removes that we still can't
7046          * deal with it properly here yet, so just bail out of that's the case.
7047          */
7048         if (best->bytenr != rec->start) {
7049                 fprintf(stderr, "Extent start and backref starts don't match, "
7050                         "please use btrfs-image on this file system and send "
7051                         "it to a btrfs developer so they can make fsck fix "
7052                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7053                         rec->start, rec->nr);
7054                 ret = -EINVAL;
7055                 goto out;
7056         }
7057
7058         /*
7059          * Ok great we all agreed on an extent record, let's go find the real
7060          * references and fix up the ones that don't match.
7061          */
7062         rbtree_postorder_for_each_entry_safe(back, tmp,
7063                                              &rec->backref_tree, node) {
7064                 if (back->full_backref || !back->is_data)
7065                         continue;
7066
7067                 dback = to_data_backref(back);
7068
7069                 /*
7070                  * Still ignoring backrefs that don't have a real ref attached
7071                  * to them.
7072                  */
7073                 if (dback->found_ref == 0)
7074                         continue;
7075
7076                 if (dback->bytes == best->bytes &&
7077                     dback->disk_bytenr == best->bytenr)
7078                         continue;
7079
7080                 ret = repair_ref(info, path, dback, best);
7081                 if (ret)
7082                         goto out;
7083         }
7084
7085         /*
7086          * Ok we messed with the actual refs, which means we need to drop our
7087          * entire cache and go back and rescan.  I know this is a huge pain and
7088          * adds a lot of extra work, but it's the only way to be safe.  Once all
7089          * the backrefs agree we may not need to do anything to the extent
7090          * record itself.
7091          */
7092         ret = -EAGAIN;
7093 out:
7094         while (!list_empty(&entries)) {
7095                 entry = list_entry(entries.next, struct extent_entry, list);
7096                 list_del_init(&entry->list);
7097                 free(entry);
7098         }
7099         return ret;
7100 }
7101
7102 static int process_duplicates(struct btrfs_root *root,
7103                               struct cache_tree *extent_cache,
7104                               struct extent_record *rec)
7105 {
7106         struct extent_record *good, *tmp;
7107         struct cache_extent *cache;
7108         int ret;
7109
7110         /*
7111          * If we found a extent record for this extent then return, or if we
7112          * have more than one duplicate we are likely going to need to delete
7113          * something.
7114          */
7115         if (rec->found_rec || rec->num_duplicates > 1)
7116                 return 0;
7117
7118         /* Shouldn't happen but just in case */
7119         BUG_ON(!rec->num_duplicates);
7120
7121         /*
7122          * So this happens if we end up with a backref that doesn't match the
7123          * actual extent entry.  So either the backref is bad or the extent
7124          * entry is bad.  Either way we want to have the extent_record actually
7125          * reflect what we found in the extent_tree, so we need to take the
7126          * duplicate out and use that as the extent_record since the only way we
7127          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7128          */
7129         remove_cache_extent(extent_cache, &rec->cache);
7130
7131         good = to_extent_record(rec->dups.next);
7132         list_del_init(&good->list);
7133         INIT_LIST_HEAD(&good->backrefs);
7134         INIT_LIST_HEAD(&good->dups);
7135         good->cache.start = good->start;
7136         good->cache.size = good->nr;
7137         good->content_checked = 0;
7138         good->owner_ref_checked = 0;
7139         good->num_duplicates = 0;
7140         good->refs = rec->refs;
7141         list_splice_init(&rec->backrefs, &good->backrefs);
7142         while (1) {
7143                 cache = lookup_cache_extent(extent_cache, good->start,
7144                                             good->nr);
7145                 if (!cache)
7146                         break;
7147                 tmp = container_of(cache, struct extent_record, cache);
7148
7149                 /*
7150                  * If we find another overlapping extent and it's found_rec is
7151                  * set then it's a duplicate and we need to try and delete
7152                  * something.
7153                  */
7154                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7155                         if (list_empty(&good->list))
7156                                 list_add_tail(&good->list,
7157                                               &duplicate_extents);
7158                         good->num_duplicates += tmp->num_duplicates + 1;
7159                         list_splice_init(&tmp->dups, &good->dups);
7160                         list_del_init(&tmp->list);
7161                         list_add_tail(&tmp->list, &good->dups);
7162                         remove_cache_extent(extent_cache, &tmp->cache);
7163                         continue;
7164                 }
7165
7166                 /*
7167                  * Ok we have another non extent item backed extent rec, so lets
7168                  * just add it to this extent and carry on like we did above.
7169                  */
7170                 good->refs += tmp->refs;
7171                 list_splice_init(&tmp->backrefs, &good->backrefs);
7172                 remove_cache_extent(extent_cache, &tmp->cache);
7173                 free(tmp);
7174         }
7175         ret = insert_cache_extent(extent_cache, &good->cache);
7176         BUG_ON(ret);
7177         free(rec);
7178         return good->num_duplicates ? 0 : 1;
7179 }
7180
7181 static int delete_duplicate_records(struct btrfs_root *root,
7182                                     struct extent_record *rec)
7183 {
7184         struct btrfs_trans_handle *trans;
7185         LIST_HEAD(delete_list);
7186         struct btrfs_path *path;
7187         struct extent_record *tmp, *good, *n;
7188         int nr_del = 0;
7189         int ret = 0, err;
7190         struct btrfs_key key;
7191
7192         path = btrfs_alloc_path();
7193         if (!path) {
7194                 ret = -ENOMEM;
7195                 goto out;
7196         }
7197
7198         good = rec;
7199         /* Find the record that covers all of the duplicates. */
7200         list_for_each_entry(tmp, &rec->dups, list) {
7201                 if (good->start < tmp->start)
7202                         continue;
7203                 if (good->nr > tmp->nr)
7204                         continue;
7205
7206                 if (tmp->start + tmp->nr < good->start + good->nr) {
7207                         fprintf(stderr, "Ok we have overlapping extents that "
7208                                 "aren't completely covered by each other, this "
7209                                 "is going to require more careful thought.  "
7210                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7211                                 tmp->start, tmp->nr, good->start, good->nr);
7212                         abort();
7213                 }
7214                 good = tmp;
7215         }
7216
7217         if (good != rec)
7218                 list_add_tail(&rec->list, &delete_list);
7219
7220         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7221                 if (tmp == good)
7222                         continue;
7223                 list_move_tail(&tmp->list, &delete_list);
7224         }
7225
7226         root = root->fs_info->extent_root;
7227         trans = btrfs_start_transaction(root, 1);
7228         if (IS_ERR(trans)) {
7229                 ret = PTR_ERR(trans);
7230                 goto out;
7231         }
7232
7233         list_for_each_entry(tmp, &delete_list, list) {
7234                 if (tmp->found_rec == 0)
7235                         continue;
7236                 key.objectid = tmp->start;
7237                 key.type = BTRFS_EXTENT_ITEM_KEY;
7238                 key.offset = tmp->nr;
7239
7240                 /* Shouldn't happen but just in case */
7241                 if (tmp->metadata) {
7242                         fprintf(stderr, "Well this shouldn't happen, extent "
7243                                 "record overlaps but is metadata? "
7244                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7245                         abort();
7246                 }
7247
7248                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7249                 if (ret) {
7250                         if (ret > 0)
7251                                 ret = -EINVAL;
7252                         break;
7253                 }
7254                 ret = btrfs_del_item(trans, root, path);
7255                 if (ret)
7256                         break;
7257                 btrfs_release_path(path);
7258                 nr_del++;
7259         }
7260         err = btrfs_commit_transaction(trans, root);
7261         if (err && !ret)
7262                 ret = err;
7263 out:
7264         while (!list_empty(&delete_list)) {
7265                 tmp = to_extent_record(delete_list.next);
7266                 list_del_init(&tmp->list);
7267                 if (tmp == rec)
7268                         continue;
7269                 free(tmp);
7270         }
7271
7272         while (!list_empty(&rec->dups)) {
7273                 tmp = to_extent_record(rec->dups.next);
7274                 list_del_init(&tmp->list);
7275                 free(tmp);
7276         }
7277
7278         btrfs_free_path(path);
7279
7280         if (!ret && !nr_del)
7281                 rec->num_duplicates = 0;
7282
7283         return ret ? ret : nr_del;
7284 }
7285
7286 static int find_possible_backrefs(struct btrfs_fs_info *info,
7287                                   struct btrfs_path *path,
7288                                   struct cache_tree *extent_cache,
7289                                   struct extent_record *rec)
7290 {
7291         struct btrfs_root *root;
7292         struct extent_backref *back, *tmp;
7293         struct data_backref *dback;
7294         struct cache_extent *cache;
7295         struct btrfs_file_extent_item *fi;
7296         struct btrfs_key key;
7297         u64 bytenr, bytes;
7298         int ret;
7299
7300         rbtree_postorder_for_each_entry_safe(back, tmp,
7301                                              &rec->backref_tree, node) {
7302                 /* Don't care about full backrefs (poor unloved backrefs) */
7303                 if (back->full_backref || !back->is_data)
7304                         continue;
7305
7306                 dback = to_data_backref(back);
7307
7308                 /* We found this one, we don't need to do a lookup */
7309                 if (dback->found_ref)
7310                         continue;
7311
7312                 key.objectid = dback->root;
7313                 key.type = BTRFS_ROOT_ITEM_KEY;
7314                 key.offset = (u64)-1;
7315
7316                 root = btrfs_read_fs_root(info, &key);
7317
7318                 /* No root, definitely a bad ref, skip */
7319                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7320                         continue;
7321                 /* Other err, exit */
7322                 if (IS_ERR(root))
7323                         return PTR_ERR(root);
7324
7325                 key.objectid = dback->owner;
7326                 key.type = BTRFS_EXTENT_DATA_KEY;
7327                 key.offset = dback->offset;
7328                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7329                 if (ret) {
7330                         btrfs_release_path(path);
7331                         if (ret < 0)
7332                                 return ret;
7333                         /* Didn't find it, we can carry on */
7334                         ret = 0;
7335                         continue;
7336                 }
7337
7338                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7339                                     struct btrfs_file_extent_item);
7340                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7341                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7342                 btrfs_release_path(path);
7343                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7344                 if (cache) {
7345                         struct extent_record *tmp;
7346                         tmp = container_of(cache, struct extent_record, cache);
7347
7348                         /*
7349                          * If we found an extent record for the bytenr for this
7350                          * particular backref then we can't add it to our
7351                          * current extent record.  We only want to add backrefs
7352                          * that don't have a corresponding extent item in the
7353                          * extent tree since they likely belong to this record
7354                          * and we need to fix it if it doesn't match bytenrs.
7355                          */
7356                         if  (tmp->found_rec)
7357                                 continue;
7358                 }
7359
7360                 dback->found_ref += 1;
7361                 dback->disk_bytenr = bytenr;
7362                 dback->bytes = bytes;
7363
7364                 /*
7365                  * Set this so the verify backref code knows not to trust the
7366                  * values in this backref.
7367                  */
7368                 back->broken = 1;
7369         }
7370
7371         return 0;
7372 }
7373
7374 /*
7375  * Record orphan data ref into corresponding root.
7376  *
7377  * Return 0 if the extent item contains data ref and recorded.
7378  * Return 1 if the extent item contains no useful data ref
7379  *   On that case, it may contains only shared_dataref or metadata backref
7380  *   or the file extent exists(this should be handled by the extent bytenr
7381  *   recovery routine)
7382  * Return <0 if something goes wrong.
7383  */
7384 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7385                                       struct extent_record *rec)
7386 {
7387         struct btrfs_key key;
7388         struct btrfs_root *dest_root;
7389         struct extent_backref *back, *tmp;
7390         struct data_backref *dback;
7391         struct orphan_data_extent *orphan;
7392         struct btrfs_path *path;
7393         int recorded_data_ref = 0;
7394         int ret = 0;
7395
7396         if (rec->metadata)
7397                 return 1;
7398         path = btrfs_alloc_path();
7399         if (!path)
7400                 return -ENOMEM;
7401         rbtree_postorder_for_each_entry_safe(back, tmp,
7402                                              &rec->backref_tree, node) {
7403                 if (back->full_backref || !back->is_data ||
7404                     !back->found_extent_tree)
7405                         continue;
7406                 dback = to_data_backref(back);
7407                 if (dback->found_ref)
7408                         continue;
7409                 key.objectid = dback->root;
7410                 key.type = BTRFS_ROOT_ITEM_KEY;
7411                 key.offset = (u64)-1;
7412
7413                 dest_root = btrfs_read_fs_root(fs_info, &key);
7414
7415                 /* For non-exist root we just skip it */
7416                 if (IS_ERR(dest_root) || !dest_root)
7417                         continue;
7418
7419                 key.objectid = dback->owner;
7420                 key.type = BTRFS_EXTENT_DATA_KEY;
7421                 key.offset = dback->offset;
7422
7423                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7424                 /*
7425                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7426                  * we need to record it for inode/file extent rebuild.
7427                  * For ret > 0, we record it only for file extent rebuild.
7428                  * For ret == 0, the file extent exists but only bytenr
7429                  * mismatch, let the original bytenr fix routine to handle,
7430                  * don't record it.
7431                  */
7432                 if (ret == 0)
7433                         continue;
7434                 ret = 0;
7435                 orphan = malloc(sizeof(*orphan));
7436                 if (!orphan) {
7437                         ret = -ENOMEM;
7438                         goto out;
7439                 }
7440                 INIT_LIST_HEAD(&orphan->list);
7441                 orphan->root = dback->root;
7442                 orphan->objectid = dback->owner;
7443                 orphan->offset = dback->offset;
7444                 orphan->disk_bytenr = rec->cache.start;
7445                 orphan->disk_len = rec->cache.size;
7446                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7447                 recorded_data_ref = 1;
7448         }
7449 out:
7450         btrfs_free_path(path);
7451         if (!ret)
7452                 return !recorded_data_ref;
7453         else
7454                 return ret;
7455 }
7456
7457 /*
7458  * when an incorrect extent item is found, this will delete
7459  * all of the existing entries for it and recreate them
7460  * based on what the tree scan found.
7461  */
7462 static int fixup_extent_refs(struct btrfs_fs_info *info,
7463                              struct cache_tree *extent_cache,
7464                              struct extent_record *rec)
7465 {
7466         struct btrfs_trans_handle *trans = NULL;
7467         int ret;
7468         struct btrfs_path *path;
7469         struct cache_extent *cache;
7470         struct extent_backref *back, *tmp;
7471         int allocated = 0;
7472         u64 flags = 0;
7473
7474         if (rec->flag_block_full_backref)
7475                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7476
7477         path = btrfs_alloc_path();
7478         if (!path)
7479                 return -ENOMEM;
7480
7481         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7482                 /*
7483                  * Sometimes the backrefs themselves are so broken they don't
7484                  * get attached to any meaningful rec, so first go back and
7485                  * check any of our backrefs that we couldn't find and throw
7486                  * them into the list if we find the backref so that
7487                  * verify_backrefs can figure out what to do.
7488                  */
7489                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7490                 if (ret < 0)
7491                         goto out;
7492         }
7493
7494         /* step one, make sure all of the backrefs agree */
7495         ret = verify_backrefs(info, path, rec);
7496         if (ret < 0)
7497                 goto out;
7498
7499         trans = btrfs_start_transaction(info->extent_root, 1);
7500         if (IS_ERR(trans)) {
7501                 ret = PTR_ERR(trans);
7502                 goto out;
7503         }
7504
7505         /* step two, delete all the existing records */
7506         ret = delete_extent_records(trans, info->extent_root, path,
7507                                     rec->start, rec->max_size);
7508
7509         if (ret < 0)
7510                 goto out;
7511
7512         /* was this block corrupt?  If so, don't add references to it */
7513         cache = lookup_cache_extent(info->corrupt_blocks,
7514                                     rec->start, rec->max_size);
7515         if (cache) {
7516                 ret = 0;
7517                 goto out;
7518         }
7519
7520         /* step three, recreate all the refs we did find */
7521         rbtree_postorder_for_each_entry_safe(back, tmp,
7522                                              &rec->backref_tree, node) {
7523                 /*
7524                  * if we didn't find any references, don't create a
7525                  * new extent record
7526                  */
7527                 if (!back->found_ref)
7528                         continue;
7529
7530                 rec->bad_full_backref = 0;
7531                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7532                 allocated = 1;
7533
7534                 if (ret)
7535                         goto out;
7536         }
7537 out:
7538         if (trans) {
7539                 int err = btrfs_commit_transaction(trans, info->extent_root);
7540                 if (!ret)
7541                         ret = err;
7542         }
7543
7544         btrfs_free_path(path);
7545         return ret;
7546 }
7547
7548 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7549                               struct extent_record *rec)
7550 {
7551         struct btrfs_trans_handle *trans;
7552         struct btrfs_root *root = fs_info->extent_root;
7553         struct btrfs_path *path;
7554         struct btrfs_extent_item *ei;
7555         struct btrfs_key key;
7556         u64 flags;
7557         int ret = 0;
7558
7559         key.objectid = rec->start;
7560         if (rec->metadata) {
7561                 key.type = BTRFS_METADATA_ITEM_KEY;
7562                 key.offset = rec->info_level;
7563         } else {
7564                 key.type = BTRFS_EXTENT_ITEM_KEY;
7565                 key.offset = rec->max_size;
7566         }
7567
7568         path = btrfs_alloc_path();
7569         if (!path)
7570                 return -ENOMEM;
7571
7572         trans = btrfs_start_transaction(root, 0);
7573         if (IS_ERR(trans)) {
7574                 btrfs_free_path(path);
7575                 return PTR_ERR(trans);
7576         }
7577
7578         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7579         if (ret < 0) {
7580                 btrfs_free_path(path);
7581                 btrfs_commit_transaction(trans, root);
7582                 return ret;
7583         } else if (ret) {
7584                 fprintf(stderr, "Didn't find extent for %llu\n",
7585                         (unsigned long long)rec->start);
7586                 btrfs_free_path(path);
7587                 btrfs_commit_transaction(trans, root);
7588                 return -ENOENT;
7589         }
7590
7591         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7592                             struct btrfs_extent_item);
7593         flags = btrfs_extent_flags(path->nodes[0], ei);
7594         if (rec->flag_block_full_backref) {
7595                 fprintf(stderr, "setting full backref on %llu\n",
7596                         (unsigned long long)key.objectid);
7597                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7598         } else {
7599                 fprintf(stderr, "clearing full backref on %llu\n",
7600                         (unsigned long long)key.objectid);
7601                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7602         }
7603         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7604         btrfs_mark_buffer_dirty(path->nodes[0]);
7605         btrfs_free_path(path);
7606         return btrfs_commit_transaction(trans, root);
7607 }
7608
7609 /* right now we only prune from the extent allocation tree */
7610 static int prune_one_block(struct btrfs_trans_handle *trans,
7611                            struct btrfs_fs_info *info,
7612                            struct btrfs_corrupt_block *corrupt)
7613 {
7614         int ret;
7615         struct btrfs_path path;
7616         struct extent_buffer *eb;
7617         u64 found;
7618         int slot;
7619         int nritems;
7620         int level = corrupt->level + 1;
7621
7622         btrfs_init_path(&path);
7623 again:
7624         /* we want to stop at the parent to our busted block */
7625         path.lowest_level = level;
7626
7627         ret = btrfs_search_slot(trans, info->extent_root,
7628                                 &corrupt->key, &path, -1, 1);
7629
7630         if (ret < 0)
7631                 goto out;
7632
7633         eb = path.nodes[level];
7634         if (!eb) {
7635                 ret = -ENOENT;
7636                 goto out;
7637         }
7638
7639         /*
7640          * hopefully the search gave us the block we want to prune,
7641          * lets try that first
7642          */
7643         slot = path.slots[level];
7644         found =  btrfs_node_blockptr(eb, slot);
7645         if (found == corrupt->cache.start)
7646                 goto del_ptr;
7647
7648         nritems = btrfs_header_nritems(eb);
7649
7650         /* the search failed, lets scan this node and hope we find it */
7651         for (slot = 0; slot < nritems; slot++) {
7652                 found =  btrfs_node_blockptr(eb, slot);
7653                 if (found == corrupt->cache.start)
7654                         goto del_ptr;
7655         }
7656         /*
7657          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7658          * to this block
7659          */
7660         if (eb == info->extent_root->node) {
7661                 ret = -ENOENT;
7662                 goto out;
7663         } else {
7664                 level++;
7665                 btrfs_release_path(&path);
7666                 goto again;
7667         }
7668
7669 del_ptr:
7670         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7671         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7672
7673 out:
7674         btrfs_release_path(&path);
7675         return ret;
7676 }
7677
7678 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7679 {
7680         struct btrfs_trans_handle *trans = NULL;
7681         struct cache_extent *cache;
7682         struct btrfs_corrupt_block *corrupt;
7683
7684         while (1) {
7685                 cache = search_cache_extent(info->corrupt_blocks, 0);
7686                 if (!cache)
7687                         break;
7688                 if (!trans) {
7689                         trans = btrfs_start_transaction(info->extent_root, 1);
7690                         if (IS_ERR(trans))
7691                                 return PTR_ERR(trans);
7692                 }
7693                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7694                 prune_one_block(trans, info, corrupt);
7695                 remove_cache_extent(info->corrupt_blocks, cache);
7696         }
7697         if (trans)
7698                 return btrfs_commit_transaction(trans, info->extent_root);
7699         return 0;
7700 }
7701
7702 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7703 {
7704         struct btrfs_block_group_cache *cache;
7705         u64 start, end;
7706         int ret;
7707
7708         while (1) {
7709                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7710                                             &start, &end, EXTENT_DIRTY);
7711                 if (ret)
7712                         break;
7713                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7714                                    GFP_NOFS);
7715         }
7716
7717         start = 0;
7718         while (1) {
7719                 cache = btrfs_lookup_first_block_group(fs_info, start);
7720                 if (!cache)
7721                         break;
7722                 if (cache->cached)
7723                         cache->cached = 0;
7724                 start = cache->key.objectid + cache->key.offset;
7725         }
7726 }
7727
7728 static int check_extent_refs(struct btrfs_root *root,
7729                              struct cache_tree *extent_cache)
7730 {
7731         struct extent_record *rec;
7732         struct cache_extent *cache;
7733         int err = 0;
7734         int ret = 0;
7735         int fixed = 0;
7736         int had_dups = 0;
7737         int recorded = 0;
7738
7739         if (repair) {
7740                 /*
7741                  * if we're doing a repair, we have to make sure
7742                  * we don't allocate from the problem extents.
7743                  * In the worst case, this will be all the
7744                  * extents in the FS
7745                  */
7746                 cache = search_cache_extent(extent_cache, 0);
7747                 while(cache) {
7748                         rec = container_of(cache, struct extent_record, cache);
7749                         set_extent_dirty(root->fs_info->excluded_extents,
7750                                          rec->start,
7751                                          rec->start + rec->max_size - 1,
7752                                          GFP_NOFS);
7753                         cache = next_cache_extent(cache);
7754                 }
7755
7756                 /* pin down all the corrupted blocks too */
7757                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7758                 while(cache) {
7759                         set_extent_dirty(root->fs_info->excluded_extents,
7760                                          cache->start,
7761                                          cache->start + cache->size - 1,
7762                                          GFP_NOFS);
7763                         cache = next_cache_extent(cache);
7764                 }
7765                 prune_corrupt_blocks(root->fs_info);
7766                 reset_cached_block_groups(root->fs_info);
7767         }
7768
7769         reset_cached_block_groups(root->fs_info);
7770
7771         /*
7772          * We need to delete any duplicate entries we find first otherwise we
7773          * could mess up the extent tree when we have backrefs that actually
7774          * belong to a different extent item and not the weird duplicate one.
7775          */
7776         while (repair && !list_empty(&duplicate_extents)) {
7777                 rec = to_extent_record(duplicate_extents.next);
7778                 list_del_init(&rec->list);
7779
7780                 /* Sometimes we can find a backref before we find an actual
7781                  * extent, so we need to process it a little bit to see if there
7782                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7783                  * if this is a backref screwup.  If we need to delete stuff
7784                  * process_duplicates() will return 0, otherwise it will return
7785                  * 1 and we
7786                  */
7787                 if (process_duplicates(root, extent_cache, rec))
7788                         continue;
7789                 ret = delete_duplicate_records(root, rec);
7790                 if (ret < 0)
7791                         return ret;
7792                 /*
7793                  * delete_duplicate_records will return the number of entries
7794                  * deleted, so if it's greater than 0 then we know we actually
7795                  * did something and we need to remove.
7796                  */
7797                 if (ret)
7798                         had_dups = 1;
7799         }
7800
7801         if (had_dups)
7802                 return -EAGAIN;
7803
7804         while(1) {
7805                 int cur_err = 0;
7806
7807                 fixed = 0;
7808                 recorded = 0;
7809                 cache = search_cache_extent(extent_cache, 0);
7810                 if (!cache)
7811                         break;
7812                 rec = container_of(cache, struct extent_record, cache);
7813                 if (rec->num_duplicates) {
7814                         fprintf(stderr, "extent item %llu has multiple extent "
7815                                 "items\n", (unsigned long long)rec->start);
7816                         err = 1;
7817                         cur_err = 1;
7818                 }
7819
7820                 if (rec->refs != rec->extent_item_refs) {
7821                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7822                                 (unsigned long long)rec->start,
7823                                 (unsigned long long)rec->nr);
7824                         fprintf(stderr, "extent item %llu, found %llu\n",
7825                                 (unsigned long long)rec->extent_item_refs,
7826                                 (unsigned long long)rec->refs);
7827                         ret = record_orphan_data_extents(root->fs_info, rec);
7828                         if (ret < 0)
7829                                 goto repair_abort;
7830                         if (ret == 0) {
7831                                 recorded = 1;
7832                         } else {
7833                                 /*
7834                                  * we can't use the extent to repair file
7835                                  * extent, let the fallback method handle it.
7836                                  */
7837                                 if (!fixed && repair) {
7838                                         ret = fixup_extent_refs(
7839                                                         root->fs_info,
7840                                                         extent_cache, rec);
7841                                         if (ret)
7842                                                 goto repair_abort;
7843                                         fixed = 1;
7844                                 }
7845                         }
7846                         err = 1;
7847                         cur_err = 1;
7848                 }
7849                 if (all_backpointers_checked(rec, 1)) {
7850                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7851                                 (unsigned long long)rec->start,
7852                                 (unsigned long long)rec->nr);
7853
7854                         if (!fixed && !recorded && repair) {
7855                                 ret = fixup_extent_refs(root->fs_info,
7856                                                         extent_cache, rec);
7857                                 if (ret)
7858                                         goto repair_abort;
7859                                 fixed = 1;
7860                         }
7861                         cur_err = 1;
7862                         err = 1;
7863                 }
7864                 if (!rec->owner_ref_checked) {
7865                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7866                                 (unsigned long long)rec->start,
7867                                 (unsigned long long)rec->nr);
7868                         if (!fixed && !recorded && repair) {
7869                                 ret = fixup_extent_refs(root->fs_info,
7870                                                         extent_cache, rec);
7871                                 if (ret)
7872                                         goto repair_abort;
7873                                 fixed = 1;
7874                         }
7875                         err = 1;
7876                         cur_err = 1;
7877                 }
7878                 if (rec->bad_full_backref) {
7879                         fprintf(stderr, "bad full backref, on [%llu]\n",
7880                                 (unsigned long long)rec->start);
7881                         if (repair) {
7882                                 ret = fixup_extent_flags(root->fs_info, rec);
7883                                 if (ret)
7884                                         goto repair_abort;
7885                                 fixed = 1;
7886                         }
7887                         err = 1;
7888                         cur_err = 1;
7889                 }
7890                 /*
7891                  * Although it's not a extent ref's problem, we reuse this
7892                  * routine for error reporting.
7893                  * No repair function yet.
7894                  */
7895                 if (rec->crossing_stripes) {
7896                         fprintf(stderr,
7897                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7898                                 rec->start, rec->start + rec->max_size);
7899                         err = 1;
7900                         cur_err = 1;
7901                 }
7902
7903                 if (rec->wrong_chunk_type) {
7904                         fprintf(stderr,
7905                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7906                                 rec->start, rec->start + rec->max_size);
7907                         err = 1;
7908                         cur_err = 1;
7909                 }
7910
7911                 remove_cache_extent(extent_cache, cache);
7912                 free_all_extent_backrefs(rec);
7913                 if (!init_extent_tree && repair && (!cur_err || fixed))
7914                         clear_extent_dirty(root->fs_info->excluded_extents,
7915                                            rec->start,
7916                                            rec->start + rec->max_size - 1,
7917                                            GFP_NOFS);
7918                 free(rec);
7919         }
7920 repair_abort:
7921         if (repair) {
7922                 if (ret && ret != -EAGAIN) {
7923                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7924                         exit(1);
7925                 } else if (!ret) {
7926                         struct btrfs_trans_handle *trans;
7927
7928                         root = root->fs_info->extent_root;
7929                         trans = btrfs_start_transaction(root, 1);
7930                         if (IS_ERR(trans)) {
7931                                 ret = PTR_ERR(trans);
7932                                 goto repair_abort;
7933                         }
7934
7935                         btrfs_fix_block_accounting(trans, root);
7936                         ret = btrfs_commit_transaction(trans, root);
7937                         if (ret)
7938                                 goto repair_abort;
7939                 }
7940                 if (err)
7941                         fprintf(stderr, "repaired damaged extent references\n");
7942                 return ret;
7943         }
7944         return err;
7945 }
7946
7947 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7948 {
7949         u64 stripe_size;
7950
7951         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7952                 stripe_size = length;
7953                 stripe_size /= num_stripes;
7954         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7955                 stripe_size = length * 2;
7956                 stripe_size /= num_stripes;
7957         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7958                 stripe_size = length;
7959                 stripe_size /= (num_stripes - 1);
7960         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7961                 stripe_size = length;
7962                 stripe_size /= (num_stripes - 2);
7963         } else {
7964                 stripe_size = length;
7965         }
7966         return stripe_size;
7967 }
7968
7969 /*
7970  * Check the chunk with its block group/dev list ref:
7971  * Return 0 if all refs seems valid.
7972  * Return 1 if part of refs seems valid, need later check for rebuild ref
7973  * like missing block group and needs to search extent tree to rebuild them.
7974  * Return -1 if essential refs are missing and unable to rebuild.
7975  */
7976 static int check_chunk_refs(struct chunk_record *chunk_rec,
7977                             struct block_group_tree *block_group_cache,
7978                             struct device_extent_tree *dev_extent_cache,
7979                             int silent)
7980 {
7981         struct cache_extent *block_group_item;
7982         struct block_group_record *block_group_rec;
7983         struct cache_extent *dev_extent_item;
7984         struct device_extent_record *dev_extent_rec;
7985         u64 devid;
7986         u64 offset;
7987         u64 length;
7988         int metadump_v2 = 0;
7989         int i;
7990         int ret = 0;
7991
7992         block_group_item = lookup_cache_extent(&block_group_cache->tree,
7993                                                chunk_rec->offset,
7994                                                chunk_rec->length);
7995         if (block_group_item) {
7996                 block_group_rec = container_of(block_group_item,
7997                                                struct block_group_record,
7998                                                cache);
7999                 if (chunk_rec->length != block_group_rec->offset ||
8000                     chunk_rec->offset != block_group_rec->objectid ||
8001                     (!metadump_v2 &&
8002                      chunk_rec->type_flags != block_group_rec->flags)) {
8003                         if (!silent)
8004                                 fprintf(stderr,
8005                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8006                                         chunk_rec->objectid,
8007                                         chunk_rec->type,
8008                                         chunk_rec->offset,
8009                                         chunk_rec->length,
8010                                         chunk_rec->offset,
8011                                         chunk_rec->type_flags,
8012                                         block_group_rec->objectid,
8013                                         block_group_rec->type,
8014                                         block_group_rec->offset,
8015                                         block_group_rec->offset,
8016                                         block_group_rec->objectid,
8017                                         block_group_rec->flags);
8018                         ret = -1;
8019                 } else {
8020                         list_del_init(&block_group_rec->list);
8021                         chunk_rec->bg_rec = block_group_rec;
8022                 }
8023         } else {
8024                 if (!silent)
8025                         fprintf(stderr,
8026                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8027                                 chunk_rec->objectid,
8028                                 chunk_rec->type,
8029                                 chunk_rec->offset,
8030                                 chunk_rec->length,
8031                                 chunk_rec->offset,
8032                                 chunk_rec->type_flags);
8033                 ret = 1;
8034         }
8035
8036         if (metadump_v2)
8037                 return ret;
8038
8039         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8040                                     chunk_rec->num_stripes);
8041         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8042                 devid = chunk_rec->stripes[i].devid;
8043                 offset = chunk_rec->stripes[i].offset;
8044                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8045                                                        devid, offset, length);
8046                 if (dev_extent_item) {
8047                         dev_extent_rec = container_of(dev_extent_item,
8048                                                 struct device_extent_record,
8049                                                 cache);
8050                         if (dev_extent_rec->objectid != devid ||
8051                             dev_extent_rec->offset != offset ||
8052                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8053                             dev_extent_rec->length != length) {
8054                                 if (!silent)
8055                                         fprintf(stderr,
8056                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8057                                                 chunk_rec->objectid,
8058                                                 chunk_rec->type,
8059                                                 chunk_rec->offset,
8060                                                 chunk_rec->stripes[i].devid,
8061                                                 chunk_rec->stripes[i].offset,
8062                                                 dev_extent_rec->objectid,
8063                                                 dev_extent_rec->offset,
8064                                                 dev_extent_rec->length);
8065                                 ret = -1;
8066                         } else {
8067                                 list_move(&dev_extent_rec->chunk_list,
8068                                           &chunk_rec->dextents);
8069                         }
8070                 } else {
8071                         if (!silent)
8072                                 fprintf(stderr,
8073                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8074                                         chunk_rec->objectid,
8075                                         chunk_rec->type,
8076                                         chunk_rec->offset,
8077                                         chunk_rec->stripes[i].devid,
8078                                         chunk_rec->stripes[i].offset);
8079                         ret = -1;
8080                 }
8081         }
8082         return ret;
8083 }
8084
8085 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8086 int check_chunks(struct cache_tree *chunk_cache,
8087                  struct block_group_tree *block_group_cache,
8088                  struct device_extent_tree *dev_extent_cache,
8089                  struct list_head *good, struct list_head *bad,
8090                  struct list_head *rebuild, int silent)
8091 {
8092         struct cache_extent *chunk_item;
8093         struct chunk_record *chunk_rec;
8094         struct block_group_record *bg_rec;
8095         struct device_extent_record *dext_rec;
8096         int err;
8097         int ret = 0;
8098
8099         chunk_item = first_cache_extent(chunk_cache);
8100         while (chunk_item) {
8101                 chunk_rec = container_of(chunk_item, struct chunk_record,
8102                                          cache);
8103                 err = check_chunk_refs(chunk_rec, block_group_cache,
8104                                        dev_extent_cache, silent);
8105                 if (err < 0)
8106                         ret = err;
8107                 if (err == 0 && good)
8108                         list_add_tail(&chunk_rec->list, good);
8109                 if (err > 0 && rebuild)
8110                         list_add_tail(&chunk_rec->list, rebuild);
8111                 if (err < 0 && bad)
8112                         list_add_tail(&chunk_rec->list, bad);
8113                 chunk_item = next_cache_extent(chunk_item);
8114         }
8115
8116         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8117                 if (!silent)
8118                         fprintf(stderr,
8119                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8120                                 bg_rec->objectid,
8121                                 bg_rec->offset,
8122                                 bg_rec->flags);
8123                 if (!ret)
8124                         ret = 1;
8125         }
8126
8127         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8128                             chunk_list) {
8129                 if (!silent)
8130                         fprintf(stderr,
8131                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8132                                 dext_rec->objectid,
8133                                 dext_rec->offset,
8134                                 dext_rec->length);
8135                 if (!ret)
8136                         ret = 1;
8137         }
8138         return ret;
8139 }
8140
8141
8142 static int check_device_used(struct device_record *dev_rec,
8143                              struct device_extent_tree *dext_cache)
8144 {
8145         struct cache_extent *cache;
8146         struct device_extent_record *dev_extent_rec;
8147         u64 total_byte = 0;
8148
8149         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8150         while (cache) {
8151                 dev_extent_rec = container_of(cache,
8152                                               struct device_extent_record,
8153                                               cache);
8154                 if (dev_extent_rec->objectid != dev_rec->devid)
8155                         break;
8156
8157                 list_del_init(&dev_extent_rec->device_list);
8158                 total_byte += dev_extent_rec->length;
8159                 cache = next_cache_extent(cache);
8160         }
8161
8162         if (total_byte != dev_rec->byte_used) {
8163                 fprintf(stderr,
8164                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8165                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8166                         dev_rec->type, dev_rec->offset);
8167                 return -1;
8168         } else {
8169                 return 0;
8170         }
8171 }
8172
8173 /* check btrfs_dev_item -> btrfs_dev_extent */
8174 static int check_devices(struct rb_root *dev_cache,
8175                          struct device_extent_tree *dev_extent_cache)
8176 {
8177         struct rb_node *dev_node;
8178         struct device_record *dev_rec;
8179         struct device_extent_record *dext_rec;
8180         int err;
8181         int ret = 0;
8182
8183         dev_node = rb_first(dev_cache);
8184         while (dev_node) {
8185                 dev_rec = container_of(dev_node, struct device_record, node);
8186                 err = check_device_used(dev_rec, dev_extent_cache);
8187                 if (err)
8188                         ret = err;
8189
8190                 dev_node = rb_next(dev_node);
8191         }
8192         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8193                             device_list) {
8194                 fprintf(stderr,
8195                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8196                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8197                 if (!ret)
8198                         ret = 1;
8199         }
8200         return ret;
8201 }
8202
8203 static int add_root_item_to_list(struct list_head *head,
8204                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8205                                   u8 level, u8 drop_level,
8206                                   int level_size, struct btrfs_key *drop_key)
8207 {
8208
8209         struct root_item_record *ri_rec;
8210         ri_rec = malloc(sizeof(*ri_rec));
8211         if (!ri_rec)
8212                 return -ENOMEM;
8213         ri_rec->bytenr = bytenr;
8214         ri_rec->objectid = objectid;
8215         ri_rec->level = level;
8216         ri_rec->level_size = level_size;
8217         ri_rec->drop_level = drop_level;
8218         ri_rec->last_snapshot = last_snapshot;
8219         if (drop_key)
8220                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8221         list_add_tail(&ri_rec->list, head);
8222
8223         return 0;
8224 }
8225
8226 static void free_root_item_list(struct list_head *list)
8227 {
8228         struct root_item_record *ri_rec;
8229
8230         while (!list_empty(list)) {
8231                 ri_rec = list_first_entry(list, struct root_item_record,
8232                                           list);
8233                 list_del_init(&ri_rec->list);
8234                 free(ri_rec);
8235         }
8236 }
8237
8238 static int deal_root_from_list(struct list_head *list,
8239                                struct btrfs_root *root,
8240                                struct block_info *bits,
8241                                int bits_nr,
8242                                struct cache_tree *pending,
8243                                struct cache_tree *seen,
8244                                struct cache_tree *reada,
8245                                struct cache_tree *nodes,
8246                                struct cache_tree *extent_cache,
8247                                struct cache_tree *chunk_cache,
8248                                struct rb_root *dev_cache,
8249                                struct block_group_tree *block_group_cache,
8250                                struct device_extent_tree *dev_extent_cache)
8251 {
8252         int ret = 0;
8253         u64 last;
8254
8255         while (!list_empty(list)) {
8256                 struct root_item_record *rec;
8257                 struct extent_buffer *buf;
8258                 rec = list_entry(list->next,
8259                                  struct root_item_record, list);
8260                 last = 0;
8261                 buf = read_tree_block(root->fs_info->tree_root,
8262                                       rec->bytenr, rec->level_size, 0);
8263                 if (!extent_buffer_uptodate(buf)) {
8264                         free_extent_buffer(buf);
8265                         ret = -EIO;
8266                         break;
8267                 }
8268                 add_root_to_pending(buf, extent_cache, pending,
8269                                     seen, nodes, rec->objectid);
8270                 /*
8271                  * To rebuild extent tree, we need deal with snapshot
8272                  * one by one, otherwise we deal with node firstly which
8273                  * can maximize readahead.
8274                  */
8275                 while (1) {
8276                         ret = run_next_block(root, bits, bits_nr, &last,
8277                                              pending, seen, reada, nodes,
8278                                              extent_cache, chunk_cache,
8279                                              dev_cache, block_group_cache,
8280                                              dev_extent_cache, rec);
8281                         if (ret != 0)
8282                                 break;
8283                 }
8284                 free_extent_buffer(buf);
8285                 list_del(&rec->list);
8286                 free(rec);
8287                 if (ret < 0)
8288                         break;
8289         }
8290         while (ret >= 0) {
8291                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8292                                      reada, nodes, extent_cache, chunk_cache,
8293                                      dev_cache, block_group_cache,
8294                                      dev_extent_cache, NULL);
8295                 if (ret != 0) {
8296                         if (ret > 0)
8297                                 ret = 0;
8298                         break;
8299                 }
8300         }
8301         return ret;
8302 }
8303
8304 static int check_chunks_and_extents(struct btrfs_root *root)
8305 {
8306         struct rb_root dev_cache;
8307         struct cache_tree chunk_cache;
8308         struct block_group_tree block_group_cache;
8309         struct device_extent_tree dev_extent_cache;
8310         struct cache_tree extent_cache;
8311         struct cache_tree seen;
8312         struct cache_tree pending;
8313         struct cache_tree reada;
8314         struct cache_tree nodes;
8315         struct extent_io_tree excluded_extents;
8316         struct cache_tree corrupt_blocks;
8317         struct btrfs_path path;
8318         struct btrfs_key key;
8319         struct btrfs_key found_key;
8320         int ret, err = 0;
8321         struct block_info *bits;
8322         int bits_nr;
8323         struct extent_buffer *leaf;
8324         int slot;
8325         struct btrfs_root_item ri;
8326         struct list_head dropping_trees;
8327         struct list_head normal_trees;
8328         struct btrfs_root *root1;
8329         u64 objectid;
8330         u32 level_size;
8331         u8 level;
8332
8333         dev_cache = RB_ROOT;
8334         cache_tree_init(&chunk_cache);
8335         block_group_tree_init(&block_group_cache);
8336         device_extent_tree_init(&dev_extent_cache);
8337
8338         cache_tree_init(&extent_cache);
8339         cache_tree_init(&seen);
8340         cache_tree_init(&pending);
8341         cache_tree_init(&nodes);
8342         cache_tree_init(&reada);
8343         cache_tree_init(&corrupt_blocks);
8344         extent_io_tree_init(&excluded_extents);
8345         INIT_LIST_HEAD(&dropping_trees);
8346         INIT_LIST_HEAD(&normal_trees);
8347
8348         if (repair) {
8349                 root->fs_info->excluded_extents = &excluded_extents;
8350                 root->fs_info->fsck_extent_cache = &extent_cache;
8351                 root->fs_info->free_extent_hook = free_extent_hook;
8352                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8353         }
8354
8355         bits_nr = 1024;
8356         bits = malloc(bits_nr * sizeof(struct block_info));
8357         if (!bits) {
8358                 perror("malloc");
8359                 exit(1);
8360         }
8361
8362         if (ctx.progress_enabled) {
8363                 ctx.tp = TASK_EXTENTS;
8364                 task_start(ctx.info);
8365         }
8366
8367 again:
8368         root1 = root->fs_info->tree_root;
8369         level = btrfs_header_level(root1->node);
8370         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8371                                     root1->node->start, 0, level, 0,
8372                                     root1->nodesize, NULL);
8373         if (ret < 0)
8374                 goto out;
8375         root1 = root->fs_info->chunk_root;
8376         level = btrfs_header_level(root1->node);
8377         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8378                                     root1->node->start, 0, level, 0,
8379                                     root1->nodesize, NULL);
8380         if (ret < 0)
8381                 goto out;
8382         btrfs_init_path(&path);
8383         key.offset = 0;
8384         key.objectid = 0;
8385         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8386         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8387                                         &key, &path, 0, 0);
8388         if (ret < 0)
8389                 goto out;
8390         while(1) {
8391                 leaf = path.nodes[0];
8392                 slot = path.slots[0];
8393                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8394                         ret = btrfs_next_leaf(root, &path);
8395                         if (ret != 0)
8396                                 break;
8397                         leaf = path.nodes[0];
8398                         slot = path.slots[0];
8399                 }
8400                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8401                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8402                         unsigned long offset;
8403                         u64 last_snapshot;
8404
8405                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8406                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8407                         last_snapshot = btrfs_root_last_snapshot(&ri);
8408                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8409                                 level = btrfs_root_level(&ri);
8410                                 level_size = root->nodesize;
8411                                 ret = add_root_item_to_list(&normal_trees,
8412                                                 found_key.objectid,
8413                                                 btrfs_root_bytenr(&ri),
8414                                                 last_snapshot, level,
8415                                                 0, level_size, NULL);
8416                                 if (ret < 0)
8417                                         goto out;
8418                         } else {
8419                                 level = btrfs_root_level(&ri);
8420                                 level_size = root->nodesize;
8421                                 objectid = found_key.objectid;
8422                                 btrfs_disk_key_to_cpu(&found_key,
8423                                                       &ri.drop_progress);
8424                                 ret = add_root_item_to_list(&dropping_trees,
8425                                                 objectid,
8426                                                 btrfs_root_bytenr(&ri),
8427                                                 last_snapshot, level,
8428                                                 ri.drop_level,
8429                                                 level_size, &found_key);
8430                                 if (ret < 0)
8431                                         goto out;
8432                         }
8433                 }
8434                 path.slots[0]++;
8435         }
8436         btrfs_release_path(&path);
8437
8438         /*
8439          * check_block can return -EAGAIN if it fixes something, please keep
8440          * this in mind when dealing with return values from these functions, if
8441          * we get -EAGAIN we want to fall through and restart the loop.
8442          */
8443         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8444                                   &seen, &reada, &nodes, &extent_cache,
8445                                   &chunk_cache, &dev_cache, &block_group_cache,
8446                                   &dev_extent_cache);
8447         if (ret < 0) {
8448                 if (ret == -EAGAIN)
8449                         goto loop;
8450                 goto out;
8451         }
8452         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8453                                   &pending, &seen, &reada, &nodes,
8454                                   &extent_cache, &chunk_cache, &dev_cache,
8455                                   &block_group_cache, &dev_extent_cache);
8456         if (ret < 0) {
8457                 if (ret == -EAGAIN)
8458                         goto loop;
8459                 goto out;
8460         }
8461
8462         ret = check_chunks(&chunk_cache, &block_group_cache,
8463                            &dev_extent_cache, NULL, NULL, NULL, 0);
8464         if (ret) {
8465                 if (ret == -EAGAIN)
8466                         goto loop;
8467                 err = ret;
8468         }
8469
8470         ret = check_extent_refs(root, &extent_cache);
8471         if (ret < 0) {
8472                 if (ret == -EAGAIN)
8473                         goto loop;
8474                 goto out;
8475         }
8476
8477         ret = check_devices(&dev_cache, &dev_extent_cache);
8478         if (ret && err)
8479                 ret = err;
8480
8481 out:
8482         task_stop(ctx.info);
8483         if (repair) {
8484                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8485                 extent_io_tree_cleanup(&excluded_extents);
8486                 root->fs_info->fsck_extent_cache = NULL;
8487                 root->fs_info->free_extent_hook = NULL;
8488                 root->fs_info->corrupt_blocks = NULL;
8489                 root->fs_info->excluded_extents = NULL;
8490         }
8491         free(bits);
8492         free_chunk_cache_tree(&chunk_cache);
8493         free_device_cache_tree(&dev_cache);
8494         free_block_group_tree(&block_group_cache);
8495         free_device_extent_tree(&dev_extent_cache);
8496         free_extent_cache_tree(&seen);
8497         free_extent_cache_tree(&pending);
8498         free_extent_cache_tree(&reada);
8499         free_extent_cache_tree(&nodes);
8500         return ret;
8501 loop:
8502         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8503         free_extent_cache_tree(&seen);
8504         free_extent_cache_tree(&pending);
8505         free_extent_cache_tree(&reada);
8506         free_extent_cache_tree(&nodes);
8507         free_chunk_cache_tree(&chunk_cache);
8508         free_block_group_tree(&block_group_cache);
8509         free_device_cache_tree(&dev_cache);
8510         free_device_extent_tree(&dev_extent_cache);
8511         free_extent_record_cache(root->fs_info, &extent_cache);
8512         free_root_item_list(&normal_trees);
8513         free_root_item_list(&dropping_trees);
8514         extent_io_tree_cleanup(&excluded_extents);
8515         goto again;
8516 }
8517
8518 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8519                            struct btrfs_root *root, int overwrite)
8520 {
8521         struct extent_buffer *c;
8522         struct extent_buffer *old = root->node;
8523         int level;
8524         int ret;
8525         struct btrfs_disk_key disk_key = {0,0,0};
8526
8527         level = 0;
8528
8529         if (overwrite) {
8530                 c = old;
8531                 extent_buffer_get(c);
8532                 goto init;
8533         }
8534         c = btrfs_alloc_free_block(trans, root,
8535                                    root->nodesize,
8536                                    root->root_key.objectid,
8537                                    &disk_key, level, 0, 0);
8538         if (IS_ERR(c)) {
8539                 c = old;
8540                 extent_buffer_get(c);
8541                 overwrite = 1;
8542         }
8543 init:
8544         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8545         btrfs_set_header_level(c, level);
8546         btrfs_set_header_bytenr(c, c->start);
8547         btrfs_set_header_generation(c, trans->transid);
8548         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8549         btrfs_set_header_owner(c, root->root_key.objectid);
8550
8551         write_extent_buffer(c, root->fs_info->fsid,
8552                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8553
8554         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8555                             btrfs_header_chunk_tree_uuid(c),
8556                             BTRFS_UUID_SIZE);
8557
8558         btrfs_mark_buffer_dirty(c);
8559         /*
8560          * this case can happen in the following case:
8561          *
8562          * 1.overwrite previous root.
8563          *
8564          * 2.reinit reloc data root, this is because we skip pin
8565          * down reloc data tree before which means we can allocate
8566          * same block bytenr here.
8567          */
8568         if (old->start == c->start) {
8569                 btrfs_set_root_generation(&root->root_item,
8570                                           trans->transid);
8571                 root->root_item.level = btrfs_header_level(root->node);
8572                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8573                                         &root->root_key, &root->root_item);
8574                 if (ret) {
8575                         free_extent_buffer(c);
8576                         return ret;
8577                 }
8578         }
8579         free_extent_buffer(old);
8580         root->node = c;
8581         add_root_to_dirty_list(root);
8582         return 0;
8583 }
8584
8585 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8586                                 struct extent_buffer *eb, int tree_root)
8587 {
8588         struct extent_buffer *tmp;
8589         struct btrfs_root_item *ri;
8590         struct btrfs_key key;
8591         u64 bytenr;
8592         u32 nodesize;
8593         int level = btrfs_header_level(eb);
8594         int nritems;
8595         int ret;
8596         int i;
8597
8598         /*
8599          * If we have pinned this block before, don't pin it again.
8600          * This can not only avoid forever loop with broken filesystem
8601          * but also give us some speedups.
8602          */
8603         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8604                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8605                 return 0;
8606
8607         btrfs_pin_extent(fs_info, eb->start, eb->len);
8608
8609         nodesize = btrfs_super_nodesize(fs_info->super_copy);
8610         nritems = btrfs_header_nritems(eb);
8611         for (i = 0; i < nritems; i++) {
8612                 if (level == 0) {
8613                         btrfs_item_key_to_cpu(eb, &key, i);
8614                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8615                                 continue;
8616                         /* Skip the extent root and reloc roots */
8617                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8618                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8619                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8620                                 continue;
8621                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8622                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8623
8624                         /*
8625                          * If at any point we start needing the real root we
8626                          * will have to build a stump root for the root we are
8627                          * in, but for now this doesn't actually use the root so
8628                          * just pass in extent_root.
8629                          */
8630                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8631                                               nodesize, 0);
8632                         if (!extent_buffer_uptodate(tmp)) {
8633                                 fprintf(stderr, "Error reading root block\n");
8634                                 return -EIO;
8635                         }
8636                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8637                         free_extent_buffer(tmp);
8638                         if (ret)
8639                                 return ret;
8640                 } else {
8641                         bytenr = btrfs_node_blockptr(eb, i);
8642
8643                         /* If we aren't the tree root don't read the block */
8644                         if (level == 1 && !tree_root) {
8645                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
8646                                 continue;
8647                         }
8648
8649                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8650                                               nodesize, 0);
8651                         if (!extent_buffer_uptodate(tmp)) {
8652                                 fprintf(stderr, "Error reading tree block\n");
8653                                 return -EIO;
8654                         }
8655                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8656                         free_extent_buffer(tmp);
8657                         if (ret)
8658                                 return ret;
8659                 }
8660         }
8661
8662         return 0;
8663 }
8664
8665 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8666 {
8667         int ret;
8668
8669         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8670         if (ret)
8671                 return ret;
8672
8673         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8674 }
8675
8676 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8677 {
8678         struct btrfs_block_group_cache *cache;
8679         struct btrfs_path *path;
8680         struct extent_buffer *leaf;
8681         struct btrfs_chunk *chunk;
8682         struct btrfs_key key;
8683         int ret;
8684         u64 start;
8685
8686         path = btrfs_alloc_path();
8687         if (!path)
8688                 return -ENOMEM;
8689
8690         key.objectid = 0;
8691         key.type = BTRFS_CHUNK_ITEM_KEY;
8692         key.offset = 0;
8693
8694         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8695         if (ret < 0) {
8696                 btrfs_free_path(path);
8697                 return ret;
8698         }
8699
8700         /*
8701          * We do this in case the block groups were screwed up and had alloc
8702          * bits that aren't actually set on the chunks.  This happens with
8703          * restored images every time and could happen in real life I guess.
8704          */
8705         fs_info->avail_data_alloc_bits = 0;
8706         fs_info->avail_metadata_alloc_bits = 0;
8707         fs_info->avail_system_alloc_bits = 0;
8708
8709         /* First we need to create the in-memory block groups */
8710         while (1) {
8711                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8712                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
8713                         if (ret < 0) {
8714                                 btrfs_free_path(path);
8715                                 return ret;
8716                         }
8717                         if (ret) {
8718                                 ret = 0;
8719                                 break;
8720                         }
8721                 }
8722                 leaf = path->nodes[0];
8723                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8724                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8725                         path->slots[0]++;
8726                         continue;
8727                 }
8728
8729                 chunk = btrfs_item_ptr(leaf, path->slots[0],
8730                                        struct btrfs_chunk);
8731                 btrfs_add_block_group(fs_info, 0,
8732                                       btrfs_chunk_type(leaf, chunk),
8733                                       key.objectid, key.offset,
8734                                       btrfs_chunk_length(leaf, chunk));
8735                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8736                                  key.offset + btrfs_chunk_length(leaf, chunk),
8737                                  GFP_NOFS);
8738                 path->slots[0]++;
8739         }
8740         start = 0;
8741         while (1) {
8742                 cache = btrfs_lookup_first_block_group(fs_info, start);
8743                 if (!cache)
8744                         break;
8745                 cache->cached = 1;
8746                 start = cache->key.objectid + cache->key.offset;
8747         }
8748
8749         btrfs_free_path(path);
8750         return 0;
8751 }
8752
8753 static int reset_balance(struct btrfs_trans_handle *trans,
8754                          struct btrfs_fs_info *fs_info)
8755 {
8756         struct btrfs_root *root = fs_info->tree_root;
8757         struct btrfs_path *path;
8758         struct extent_buffer *leaf;
8759         struct btrfs_key key;
8760         int del_slot, del_nr = 0;
8761         int ret;
8762         int found = 0;
8763
8764         path = btrfs_alloc_path();
8765         if (!path)
8766                 return -ENOMEM;
8767
8768         key.objectid = BTRFS_BALANCE_OBJECTID;
8769         key.type = BTRFS_BALANCE_ITEM_KEY;
8770         key.offset = 0;
8771
8772         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8773         if (ret) {
8774                 if (ret > 0)
8775                         ret = 0;
8776                 if (!ret)
8777                         goto reinit_data_reloc;
8778                 else
8779                         goto out;
8780         }
8781
8782         ret = btrfs_del_item(trans, root, path);
8783         if (ret)
8784                 goto out;
8785         btrfs_release_path(path);
8786
8787         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8788         key.type = BTRFS_ROOT_ITEM_KEY;
8789         key.offset = 0;
8790
8791         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8792         if (ret < 0)
8793                 goto out;
8794         while (1) {
8795                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8796                         if (!found)
8797                                 break;
8798
8799                         if (del_nr) {
8800                                 ret = btrfs_del_items(trans, root, path,
8801                                                       del_slot, del_nr);
8802                                 del_nr = 0;
8803                                 if (ret)
8804                                         goto out;
8805                         }
8806                         key.offset++;
8807                         btrfs_release_path(path);
8808
8809                         found = 0;
8810                         ret = btrfs_search_slot(trans, root, &key, path,
8811                                                 -1, 1);
8812                         if (ret < 0)
8813                                 goto out;
8814                         continue;
8815                 }
8816                 found = 1;
8817                 leaf = path->nodes[0];
8818                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8819                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8820                         break;
8821                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8822                         path->slots[0]++;
8823                         continue;
8824                 }
8825                 if (!del_nr) {
8826                         del_slot = path->slots[0];
8827                         del_nr = 1;
8828                 } else {
8829                         del_nr++;
8830                 }
8831                 path->slots[0]++;
8832         }
8833
8834         if (del_nr) {
8835                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8836                 if (ret)
8837                         goto out;
8838         }
8839         btrfs_release_path(path);
8840
8841 reinit_data_reloc:
8842         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8843         key.type = BTRFS_ROOT_ITEM_KEY;
8844         key.offset = (u64)-1;
8845         root = btrfs_read_fs_root(fs_info, &key);
8846         if (IS_ERR(root)) {
8847                 fprintf(stderr, "Error reading data reloc tree\n");
8848                 ret = PTR_ERR(root);
8849                 goto out;
8850         }
8851         record_root_in_trans(trans, root);
8852         ret = btrfs_fsck_reinit_root(trans, root, 0);
8853         if (ret)
8854                 goto out;
8855         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8856 out:
8857         btrfs_free_path(path);
8858         return ret;
8859 }
8860
8861 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8862                               struct btrfs_fs_info *fs_info)
8863 {
8864         u64 start = 0;
8865         int ret;
8866
8867         /*
8868          * The only reason we don't do this is because right now we're just
8869          * walking the trees we find and pinning down their bytes, we don't look
8870          * at any of the leaves.  In order to do mixed groups we'd have to check
8871          * the leaves of any fs roots and pin down the bytes for any file
8872          * extents we find.  Not hard but why do it if we don't have to?
8873          */
8874         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8875                 fprintf(stderr, "We don't support re-initing the extent tree "
8876                         "for mixed block groups yet, please notify a btrfs "
8877                         "developer you want to do this so they can add this "
8878                         "functionality.\n");
8879                 return -EINVAL;
8880         }
8881
8882         /*
8883          * first we need to walk all of the trees except the extent tree and pin
8884          * down the bytes that are in use so we don't overwrite any existing
8885          * metadata.
8886          */
8887         ret = pin_metadata_blocks(fs_info);
8888         if (ret) {
8889                 fprintf(stderr, "error pinning down used bytes\n");
8890                 return ret;
8891         }
8892
8893         /*
8894          * Need to drop all the block groups since we're going to recreate all
8895          * of them again.
8896          */
8897         btrfs_free_block_groups(fs_info);
8898         ret = reset_block_groups(fs_info);
8899         if (ret) {
8900                 fprintf(stderr, "error resetting the block groups\n");
8901                 return ret;
8902         }
8903
8904         /* Ok we can allocate now, reinit the extent root */
8905         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8906         if (ret) {
8907                 fprintf(stderr, "extent root initialization failed\n");
8908                 /*
8909                  * When the transaction code is updated we should end the
8910                  * transaction, but for now progs only knows about commit so
8911                  * just return an error.
8912                  */
8913                 return ret;
8914         }
8915
8916         /*
8917          * Now we have all the in-memory block groups setup so we can make
8918          * allocations properly, and the metadata we care about is safe since we
8919          * pinned all of it above.
8920          */
8921         while (1) {
8922                 struct btrfs_block_group_cache *cache;
8923
8924                 cache = btrfs_lookup_first_block_group(fs_info, start);
8925                 if (!cache)
8926                         break;
8927                 start = cache->key.objectid + cache->key.offset;
8928                 ret = btrfs_insert_item(trans, fs_info->extent_root,
8929                                         &cache->key, &cache->item,
8930                                         sizeof(cache->item));
8931                 if (ret) {
8932                         fprintf(stderr, "Error adding block group\n");
8933                         return ret;
8934                 }
8935                 btrfs_extent_post_op(trans, fs_info->extent_root);
8936         }
8937
8938         ret = reset_balance(trans, fs_info);
8939         if (ret)
8940                 fprintf(stderr, "error resetting the pending balance\n");
8941
8942         return ret;
8943 }
8944
8945 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8946 {
8947         struct btrfs_path *path;
8948         struct btrfs_trans_handle *trans;
8949         struct btrfs_key key;
8950         int ret;
8951
8952         printf("Recowing metadata block %llu\n", eb->start);
8953         key.objectid = btrfs_header_owner(eb);
8954         key.type = BTRFS_ROOT_ITEM_KEY;
8955         key.offset = (u64)-1;
8956
8957         root = btrfs_read_fs_root(root->fs_info, &key);
8958         if (IS_ERR(root)) {
8959                 fprintf(stderr, "Couldn't find owner root %llu\n",
8960                         key.objectid);
8961                 return PTR_ERR(root);
8962         }
8963
8964         path = btrfs_alloc_path();
8965         if (!path)
8966                 return -ENOMEM;
8967
8968         trans = btrfs_start_transaction(root, 1);
8969         if (IS_ERR(trans)) {
8970                 btrfs_free_path(path);
8971                 return PTR_ERR(trans);
8972         }
8973
8974         path->lowest_level = btrfs_header_level(eb);
8975         if (path->lowest_level)
8976                 btrfs_node_key_to_cpu(eb, &key, 0);
8977         else
8978                 btrfs_item_key_to_cpu(eb, &key, 0);
8979
8980         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8981         btrfs_commit_transaction(trans, root);
8982         btrfs_free_path(path);
8983         return ret;
8984 }
8985
8986 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8987 {
8988         struct btrfs_path *path;
8989         struct btrfs_trans_handle *trans;
8990         struct btrfs_key key;
8991         int ret;
8992
8993         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8994                bad->key.type, bad->key.offset);
8995         key.objectid = bad->root_id;
8996         key.type = BTRFS_ROOT_ITEM_KEY;
8997         key.offset = (u64)-1;
8998
8999         root = btrfs_read_fs_root(root->fs_info, &key);
9000         if (IS_ERR(root)) {
9001                 fprintf(stderr, "Couldn't find owner root %llu\n",
9002                         key.objectid);
9003                 return PTR_ERR(root);
9004         }
9005
9006         path = btrfs_alloc_path();
9007         if (!path)
9008                 return -ENOMEM;
9009
9010         trans = btrfs_start_transaction(root, 1);
9011         if (IS_ERR(trans)) {
9012                 btrfs_free_path(path);
9013                 return PTR_ERR(trans);
9014         }
9015
9016         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
9017         if (ret) {
9018                 if (ret > 0)
9019                         ret = 0;
9020                 goto out;
9021         }
9022         ret = btrfs_del_item(trans, root, path);
9023 out:
9024         btrfs_commit_transaction(trans, root);
9025         btrfs_free_path(path);
9026         return ret;
9027 }
9028
9029 static int zero_log_tree(struct btrfs_root *root)
9030 {
9031         struct btrfs_trans_handle *trans;
9032         int ret;
9033
9034         trans = btrfs_start_transaction(root, 1);
9035         if (IS_ERR(trans)) {
9036                 ret = PTR_ERR(trans);
9037                 return ret;
9038         }
9039         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
9040         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
9041         ret = btrfs_commit_transaction(trans, root);
9042         return ret;
9043 }
9044
9045 static int populate_csum(struct btrfs_trans_handle *trans,
9046                          struct btrfs_root *csum_root, char *buf, u64 start,
9047                          u64 len)
9048 {
9049         u64 offset = 0;
9050         u64 sectorsize;
9051         int ret = 0;
9052
9053         while (offset < len) {
9054                 sectorsize = csum_root->sectorsize;
9055                 ret = read_extent_data(csum_root, buf, start + offset,
9056                                        &sectorsize, 0);
9057                 if (ret)
9058                         break;
9059                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
9060                                             start + offset, buf, sectorsize);
9061                 if (ret)
9062                         break;
9063                 offset += sectorsize;
9064         }
9065         return ret;
9066 }
9067
9068 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
9069                                       struct btrfs_root *csum_root,
9070                                       struct btrfs_root *cur_root)
9071 {
9072         struct btrfs_path *path;
9073         struct btrfs_key key;
9074         struct extent_buffer *node;
9075         struct btrfs_file_extent_item *fi;
9076         char *buf = NULL;
9077         u64 start = 0;
9078         u64 len = 0;
9079         int slot = 0;
9080         int ret = 0;
9081
9082         path = btrfs_alloc_path();
9083         if (!path)
9084                 return -ENOMEM;
9085         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
9086         if (!buf) {
9087                 ret = -ENOMEM;
9088                 goto out;
9089         }
9090
9091         key.objectid = 0;
9092         key.offset = 0;
9093         key.type = 0;
9094
9095         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
9096         if (ret < 0)
9097                 goto out;
9098         /* Iterate all regular file extents and fill its csum */
9099         while (1) {
9100                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
9101
9102                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9103                         goto next;
9104                 node = path->nodes[0];
9105                 slot = path->slots[0];
9106                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
9107                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
9108                         goto next;
9109                 start = btrfs_file_extent_disk_bytenr(node, fi);
9110                 len = btrfs_file_extent_disk_num_bytes(node, fi);
9111
9112                 ret = populate_csum(trans, csum_root, buf, start, len);
9113                 if (ret == -EEXIST)
9114                         ret = 0;
9115                 if (ret < 0)
9116                         goto out;
9117 next:
9118                 /*
9119                  * TODO: if next leaf is corrupted, jump to nearest next valid
9120                  * leaf.
9121                  */
9122                 ret = btrfs_next_item(cur_root, path);
9123                 if (ret < 0)
9124                         goto out;
9125                 if (ret > 0) {
9126                         ret = 0;
9127                         goto out;
9128                 }
9129         }
9130
9131 out:
9132         btrfs_free_path(path);
9133         free(buf);
9134         return ret;
9135 }
9136
9137 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
9138                                   struct btrfs_root *csum_root)
9139 {
9140         struct btrfs_fs_info *fs_info = csum_root->fs_info;
9141         struct btrfs_path *path;
9142         struct btrfs_root *tree_root = fs_info->tree_root;
9143         struct btrfs_root *cur_root;
9144         struct extent_buffer *node;
9145         struct btrfs_key key;
9146         int slot = 0;
9147         int ret = 0;
9148
9149         path = btrfs_alloc_path();
9150         if (!path)
9151                 return -ENOMEM;
9152
9153         key.objectid = BTRFS_FS_TREE_OBJECTID;
9154         key.offset = 0;
9155         key.type = BTRFS_ROOT_ITEM_KEY;
9156
9157         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
9158         if (ret < 0)
9159                 goto out;
9160         if (ret > 0) {
9161                 ret = -ENOENT;
9162                 goto out;
9163         }
9164
9165         while (1) {
9166                 node = path->nodes[0];
9167                 slot = path->slots[0];
9168                 btrfs_item_key_to_cpu(node, &key, slot);
9169                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
9170                         goto out;
9171                 if (key.type != BTRFS_ROOT_ITEM_KEY)
9172                         goto next;
9173                 if (!is_fstree(key.objectid))
9174                         goto next;
9175                 key.offset = (u64)-1;
9176
9177                 cur_root = btrfs_read_fs_root(fs_info, &key);
9178                 if (IS_ERR(cur_root) || !cur_root) {
9179                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
9180                                 key.objectid);
9181                         goto out;
9182                 }
9183                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
9184                                 cur_root);
9185                 if (ret < 0)
9186                         goto out;
9187 next:
9188                 ret = btrfs_next_item(tree_root, path);
9189                 if (ret > 0) {
9190                         ret = 0;
9191                         goto out;
9192                 }
9193                 if (ret < 0)
9194                         goto out;
9195         }
9196
9197 out:
9198         btrfs_free_path(path);
9199         return ret;
9200 }
9201
9202 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9203                                       struct btrfs_root *csum_root)
9204 {
9205         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9206         struct btrfs_path *path;
9207         struct btrfs_extent_item *ei;
9208         struct extent_buffer *leaf;
9209         char *buf;
9210         struct btrfs_key key;
9211         int ret;
9212
9213         path = btrfs_alloc_path();
9214         if (!path)
9215                 return -ENOMEM;
9216
9217         key.objectid = 0;
9218         key.type = BTRFS_EXTENT_ITEM_KEY;
9219         key.offset = 0;
9220
9221         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9222         if (ret < 0) {
9223                 btrfs_free_path(path);
9224                 return ret;
9225         }
9226
9227         buf = malloc(csum_root->sectorsize);
9228         if (!buf) {
9229                 btrfs_free_path(path);
9230                 return -ENOMEM;
9231         }
9232
9233         while (1) {
9234                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9235                         ret = btrfs_next_leaf(extent_root, path);
9236                         if (ret < 0)
9237                                 break;
9238                         if (ret) {
9239                                 ret = 0;
9240                                 break;
9241                         }
9242                 }
9243                 leaf = path->nodes[0];
9244
9245                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9246                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9247                         path->slots[0]++;
9248                         continue;
9249                 }
9250
9251                 ei = btrfs_item_ptr(leaf, path->slots[0],
9252                                     struct btrfs_extent_item);
9253                 if (!(btrfs_extent_flags(leaf, ei) &
9254                       BTRFS_EXTENT_FLAG_DATA)) {
9255                         path->slots[0]++;
9256                         continue;
9257                 }
9258
9259                 ret = populate_csum(trans, csum_root, buf, key.objectid,
9260                                     key.offset);
9261                 if (ret)
9262                         break;
9263                 path->slots[0]++;
9264         }
9265
9266         btrfs_free_path(path);
9267         free(buf);
9268         return ret;
9269 }
9270
9271 /*
9272  * Recalculate the csum and put it into the csum tree.
9273  *
9274  * Extent tree init will wipe out all the extent info, so in that case, we
9275  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
9276  * will use fs/subvol trees to init the csum tree.
9277  */
9278 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9279                           struct btrfs_root *csum_root,
9280                           int search_fs_tree)
9281 {
9282         if (search_fs_tree)
9283                 return fill_csum_tree_from_fs(trans, csum_root);
9284         else
9285                 return fill_csum_tree_from_extent(trans, csum_root);
9286 }
9287
9288 static void free_roots_info_cache(void)
9289 {
9290         if (!roots_info_cache)
9291                 return;
9292
9293         while (!cache_tree_empty(roots_info_cache)) {
9294                 struct cache_extent *entry;
9295                 struct root_item_info *rii;
9296
9297                 entry = first_cache_extent(roots_info_cache);
9298                 if (!entry)
9299                         break;
9300                 remove_cache_extent(roots_info_cache, entry);
9301                 rii = container_of(entry, struct root_item_info, cache_extent);
9302                 free(rii);
9303         }
9304
9305         free(roots_info_cache);
9306         roots_info_cache = NULL;
9307 }
9308
9309 static int build_roots_info_cache(struct btrfs_fs_info *info)
9310 {
9311         int ret = 0;
9312         struct btrfs_key key;
9313         struct extent_buffer *leaf;
9314         struct btrfs_path *path;
9315
9316         if (!roots_info_cache) {
9317                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9318                 if (!roots_info_cache)
9319                         return -ENOMEM;
9320                 cache_tree_init(roots_info_cache);
9321         }
9322
9323         path = btrfs_alloc_path();
9324         if (!path)
9325                 return -ENOMEM;
9326
9327         key.objectid = 0;
9328         key.type = BTRFS_EXTENT_ITEM_KEY;
9329         key.offset = 0;
9330
9331         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9332         if (ret < 0)
9333                 goto out;
9334         leaf = path->nodes[0];
9335
9336         while (1) {
9337                 struct btrfs_key found_key;
9338                 struct btrfs_extent_item *ei;
9339                 struct btrfs_extent_inline_ref *iref;
9340                 int slot = path->slots[0];
9341                 int type;
9342                 u64 flags;
9343                 u64 root_id;
9344                 u8 level;
9345                 struct cache_extent *entry;
9346                 struct root_item_info *rii;
9347
9348                 if (slot >= btrfs_header_nritems(leaf)) {
9349                         ret = btrfs_next_leaf(info->extent_root, path);
9350                         if (ret < 0) {
9351                                 break;
9352                         } else if (ret) {
9353                                 ret = 0;
9354                                 break;
9355                         }
9356                         leaf = path->nodes[0];
9357                         slot = path->slots[0];
9358                 }
9359
9360                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9361
9362                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9363                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9364                         goto next;
9365
9366                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9367                 flags = btrfs_extent_flags(leaf, ei);
9368
9369                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9370                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9371                         goto next;
9372
9373                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9374                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9375                         level = found_key.offset;
9376                 } else {
9377                         struct btrfs_tree_block_info *binfo;
9378
9379                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
9380                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9381                         level = btrfs_tree_block_level(leaf, binfo);
9382                 }
9383
9384                 /*
9385                  * For a root extent, it must be of the following type and the
9386                  * first (and only one) iref in the item.
9387                  */
9388                 type = btrfs_extent_inline_ref_type(leaf, iref);
9389                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9390                         goto next;
9391
9392                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9393                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9394                 if (!entry) {
9395                         rii = malloc(sizeof(struct root_item_info));
9396                         if (!rii) {
9397                                 ret = -ENOMEM;
9398                                 goto out;
9399                         }
9400                         rii->cache_extent.start = root_id;
9401                         rii->cache_extent.size = 1;
9402                         rii->level = (u8)-1;
9403                         entry = &rii->cache_extent;
9404                         ret = insert_cache_extent(roots_info_cache, entry);
9405                         ASSERT(ret == 0);
9406                 } else {
9407                         rii = container_of(entry, struct root_item_info,
9408                                            cache_extent);
9409                 }
9410
9411                 ASSERT(rii->cache_extent.start == root_id);
9412                 ASSERT(rii->cache_extent.size == 1);
9413
9414                 if (level > rii->level || rii->level == (u8)-1) {
9415                         rii->level = level;
9416                         rii->bytenr = found_key.objectid;
9417                         rii->gen = btrfs_extent_generation(leaf, ei);
9418                         rii->node_count = 1;
9419                 } else if (level == rii->level) {
9420                         rii->node_count++;
9421                 }
9422 next:
9423                 path->slots[0]++;
9424         }
9425
9426 out:
9427         btrfs_free_path(path);
9428
9429         return ret;
9430 }
9431
9432 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9433                                   struct btrfs_path *path,
9434                                   const struct btrfs_key *root_key,
9435                                   const int read_only_mode)
9436 {
9437         const u64 root_id = root_key->objectid;
9438         struct cache_extent *entry;
9439         struct root_item_info *rii;
9440         struct btrfs_root_item ri;
9441         unsigned long offset;
9442
9443         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9444         if (!entry) {
9445                 fprintf(stderr,
9446                         "Error: could not find extent items for root %llu\n",
9447                         root_key->objectid);
9448                 return -ENOENT;
9449         }
9450
9451         rii = container_of(entry, struct root_item_info, cache_extent);
9452         ASSERT(rii->cache_extent.start == root_id);
9453         ASSERT(rii->cache_extent.size == 1);
9454
9455         if (rii->node_count != 1) {
9456                 fprintf(stderr,
9457                         "Error: could not find btree root extent for root %llu\n",
9458                         root_id);
9459                 return -ENOENT;
9460         }
9461
9462         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9463         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9464
9465         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9466             btrfs_root_level(&ri) != rii->level ||
9467             btrfs_root_generation(&ri) != rii->gen) {
9468
9469                 /*
9470                  * If we're in repair mode but our caller told us to not update
9471                  * the root item, i.e. just check if it needs to be updated, don't
9472                  * print this message, since the caller will call us again shortly
9473                  * for the same root item without read only mode (the caller will
9474                  * open a transaction first).
9475                  */
9476                 if (!(read_only_mode && repair))
9477                         fprintf(stderr,
9478                                 "%sroot item for root %llu,"
9479                                 " current bytenr %llu, current gen %llu, current level %u,"
9480                                 " new bytenr %llu, new gen %llu, new level %u\n",
9481                                 (read_only_mode ? "" : "fixing "),
9482                                 root_id,
9483                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9484                                 btrfs_root_level(&ri),
9485                                 rii->bytenr, rii->gen, rii->level);
9486
9487                 if (btrfs_root_generation(&ri) > rii->gen) {
9488                         fprintf(stderr,
9489                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9490                                 root_id, btrfs_root_generation(&ri), rii->gen);
9491                         return -EINVAL;
9492                 }
9493
9494                 if (!read_only_mode) {
9495                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9496                         btrfs_set_root_level(&ri, rii->level);
9497                         btrfs_set_root_generation(&ri, rii->gen);
9498                         write_extent_buffer(path->nodes[0], &ri,
9499                                             offset, sizeof(ri));
9500                 }
9501
9502                 return 1;
9503         }
9504
9505         return 0;
9506 }
9507
9508 /*
9509  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9510  * caused read-only snapshots to be corrupted if they were created at a moment
9511  * when the source subvolume/snapshot had orphan items. The issue was that the
9512  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9513  * node instead of the post orphan cleanup root node.
9514  * So this function, and its callees, just detects and fixes those cases. Even
9515  * though the regression was for read-only snapshots, this function applies to
9516  * any snapshot/subvolume root.
9517  * This must be run before any other repair code - not doing it so, makes other
9518  * repair code delete or modify backrefs in the extent tree for example, which
9519  * will result in an inconsistent fs after repairing the root items.
9520  */
9521 static int repair_root_items(struct btrfs_fs_info *info)
9522 {
9523         struct btrfs_path *path = NULL;
9524         struct btrfs_key key;
9525         struct extent_buffer *leaf;
9526         struct btrfs_trans_handle *trans = NULL;
9527         int ret = 0;
9528         int bad_roots = 0;
9529         int need_trans = 0;
9530
9531         ret = build_roots_info_cache(info);
9532         if (ret)
9533                 goto out;
9534
9535         path = btrfs_alloc_path();
9536         if (!path) {
9537                 ret = -ENOMEM;
9538                 goto out;
9539         }
9540
9541         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9542         key.type = BTRFS_ROOT_ITEM_KEY;
9543         key.offset = 0;
9544
9545 again:
9546         /*
9547          * Avoid opening and committing transactions if a leaf doesn't have
9548          * any root items that need to be fixed, so that we avoid rotating
9549          * backup roots unnecessarily.
9550          */
9551         if (need_trans) {
9552                 trans = btrfs_start_transaction(info->tree_root, 1);
9553                 if (IS_ERR(trans)) {
9554                         ret = PTR_ERR(trans);
9555                         goto out;
9556                 }
9557         }
9558
9559         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9560                                 0, trans ? 1 : 0);
9561         if (ret < 0)
9562                 goto out;
9563         leaf = path->nodes[0];
9564
9565         while (1) {
9566                 struct btrfs_key found_key;
9567
9568                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9569                         int no_more_keys = find_next_key(path, &key);
9570
9571                         btrfs_release_path(path);
9572                         if (trans) {
9573                                 ret = btrfs_commit_transaction(trans,
9574                                                                info->tree_root);
9575                                 trans = NULL;
9576                                 if (ret < 0)
9577                                         goto out;
9578                         }
9579                         need_trans = 0;
9580                         if (no_more_keys)
9581                                 break;
9582                         goto again;
9583                 }
9584
9585                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9586
9587                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9588                         goto next;
9589                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9590                         goto next;
9591
9592                 ret = maybe_repair_root_item(info, path, &found_key,
9593                                              trans ? 0 : 1);
9594                 if (ret < 0)
9595                         goto out;
9596                 if (ret) {
9597                         if (!trans && repair) {
9598                                 need_trans = 1;
9599                                 key = found_key;
9600                                 btrfs_release_path(path);
9601                                 goto again;
9602                         }
9603                         bad_roots++;
9604                 }
9605 next:
9606                 path->slots[0]++;
9607         }
9608         ret = 0;
9609 out:
9610         free_roots_info_cache();
9611         btrfs_free_path(path);
9612         if (trans)
9613                 btrfs_commit_transaction(trans, info->tree_root);
9614         if (ret < 0)
9615                 return ret;
9616
9617         return bad_roots;
9618 }
9619
9620 const char * const cmd_check_usage[] = {
9621         "btrfs check [options] <device>",
9622         "Check structural integrity of a filesystem (unmounted).",
9623         "Check structural integrity of an unmounted filesystem. Verify internal",
9624         "trees' consistency and item connectivity. In the repair mode try to",
9625         "fix the problems found.",
9626         "WARNING: the repair mode is considered dangerous",
9627         "",
9628         "-s|--super <superblock>     use this superblock copy",
9629         "-b|--backup                 use the first valid backup root copy",
9630         "--repair                    try to repair the filesystem",
9631         "--readonly                  run in read-only mode (default)",
9632         "--init-csum-tree            create a new CRC tree",
9633         "--init-extent-tree          create a new extent tree",
9634         "--check-data-csum           verify checksums of data blocks",
9635         "-Q|--qgroup-report           print a report on qgroup consistency",
9636         "-E|--subvol-extents <subvolid>",
9637         "                            print subvolume extents and sharing state",
9638         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
9639         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
9640         "-p|--progress               indicate progress",
9641         NULL
9642 };
9643
9644 int cmd_check(int argc, char **argv)
9645 {
9646         struct cache_tree root_cache;
9647         struct btrfs_root *root;
9648         struct btrfs_fs_info *info;
9649         u64 bytenr = 0;
9650         u64 subvolid = 0;
9651         u64 tree_root_bytenr = 0;
9652         u64 chunk_root_bytenr = 0;
9653         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9654         int ret;
9655         u64 num;
9656         int init_csum_tree = 0;
9657         int readonly = 0;
9658         int qgroup_report = 0;
9659         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9660
9661         while(1) {
9662                 int c;
9663                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
9664                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
9665                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE };
9666                 static const struct option long_options[] = {
9667                         { "super", required_argument, NULL, 's' },
9668                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
9669                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
9670                         { "init-csum-tree", no_argument, NULL,
9671                                 GETOPT_VAL_INIT_CSUM },
9672                         { "init-extent-tree", no_argument, NULL,
9673                                 GETOPT_VAL_INIT_EXTENT },
9674                         { "check-data-csum", no_argument, NULL,
9675                                 GETOPT_VAL_CHECK_CSUM },
9676                         { "backup", no_argument, NULL, 'b' },
9677                         { "subvol-extents", required_argument, NULL, 'E' },
9678                         { "qgroup-report", no_argument, NULL, 'Q' },
9679                         { "tree-root", required_argument, NULL, 'r' },
9680                         { "chunk-root", required_argument, NULL,
9681                                 GETOPT_VAL_CHUNK_TREE },
9682                         { "progress", no_argument, NULL, 'p' },
9683                         { NULL, 0, NULL, 0}
9684                 };
9685
9686                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9687                 if (c < 0)
9688                         break;
9689                 switch(c) {
9690                         case 'a': /* ignored */ break;
9691                         case 'b':
9692                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9693                                 break;
9694                         case 's':
9695                                 num = arg_strtou64(optarg);
9696                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9697                                         fprintf(stderr,
9698                                                 "ERROR: super mirror should be less than: %d\n",
9699                                                 BTRFS_SUPER_MIRROR_MAX);
9700                                         exit(1);
9701                                 }
9702                                 bytenr = btrfs_sb_offset(((int)num));
9703                                 printf("using SB copy %llu, bytenr %llu\n", num,
9704                                        (unsigned long long)bytenr);
9705                                 break;
9706                         case 'Q':
9707                                 qgroup_report = 1;
9708                                 break;
9709                         case 'E':
9710                                 subvolid = arg_strtou64(optarg);
9711                                 break;
9712                         case 'r':
9713                                 tree_root_bytenr = arg_strtou64(optarg);
9714                                 break;
9715                         case GETOPT_VAL_CHUNK_TREE:
9716                                 chunk_root_bytenr = arg_strtou64(optarg);
9717                                 break;
9718                         case 'p':
9719                                 ctx.progress_enabled = true;
9720                                 break;
9721                         case '?':
9722                         case 'h':
9723                                 usage(cmd_check_usage);
9724                         case GETOPT_VAL_REPAIR:
9725                                 printf("enabling repair mode\n");
9726                                 repair = 1;
9727                                 ctree_flags |= OPEN_CTREE_WRITES;
9728                                 break;
9729                         case GETOPT_VAL_READONLY:
9730                                 readonly = 1;
9731                                 break;
9732                         case GETOPT_VAL_INIT_CSUM:
9733                                 printf("Creating a new CRC tree\n");
9734                                 init_csum_tree = 1;
9735                                 repair = 1;
9736                                 ctree_flags |= OPEN_CTREE_WRITES;
9737                                 break;
9738                         case GETOPT_VAL_INIT_EXTENT:
9739                                 init_extent_tree = 1;
9740                                 ctree_flags |= (OPEN_CTREE_WRITES |
9741                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9742                                 repair = 1;
9743                                 break;
9744                         case GETOPT_VAL_CHECK_CSUM:
9745                                 check_data_csum = 1;
9746                                 break;
9747                 }
9748         }
9749
9750         if (check_argc_exact(argc - optind, 1))
9751                 usage(cmd_check_usage);
9752
9753         if (ctx.progress_enabled) {
9754                 ctx.tp = TASK_NOTHING;
9755                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9756         }
9757
9758         /* This check is the only reason for --readonly to exist */
9759         if (readonly && repair) {
9760                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9761                 exit(1);
9762         }
9763
9764         radix_tree_init();
9765         cache_tree_init(&root_cache);
9766
9767         if((ret = check_mounted(argv[optind])) < 0) {
9768                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9769                 goto err_out;
9770         } else if(ret) {
9771                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9772                 ret = -EBUSY;
9773                 goto err_out;
9774         }
9775
9776         /* only allow partial opening under repair mode */
9777         if (repair)
9778                 ctree_flags |= OPEN_CTREE_PARTIAL;
9779
9780         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9781                                   chunk_root_bytenr, ctree_flags);
9782         if (!info) {
9783                 fprintf(stderr, "Couldn't open file system\n");
9784                 ret = -EIO;
9785                 goto err_out;
9786         }
9787
9788         global_info = info;
9789         root = info->fs_root;
9790
9791         /*
9792          * repair mode will force us to commit transaction which
9793          * will make us fail to load log tree when mounting.
9794          */
9795         if (repair && btrfs_super_log_root(info->super_copy)) {
9796                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9797                 if (!ret) {
9798                         ret = 1;
9799                         goto close_out;
9800                 }
9801                 ret = zero_log_tree(root);
9802                 if (ret) {
9803                         fprintf(stderr, "fail to zero log tree\n");
9804                         goto close_out;
9805                 }
9806         }
9807
9808         uuid_unparse(info->super_copy->fsid, uuidbuf);
9809         if (qgroup_report) {
9810                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9811                        uuidbuf);
9812                 ret = qgroup_verify_all(info);
9813                 if (ret == 0)
9814                         ret = report_qgroups(1);
9815                 goto close_out;
9816         }
9817         if (subvolid) {
9818                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9819                        subvolid, argv[optind], uuidbuf);
9820                 ret = print_extent_state(info, subvolid);
9821                 goto close_out;
9822         }
9823         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9824
9825         if (!extent_buffer_uptodate(info->tree_root->node) ||
9826             !extent_buffer_uptodate(info->dev_root->node) ||
9827             !extent_buffer_uptodate(info->chunk_root->node)) {
9828                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9829                 ret = -EIO;
9830                 goto close_out;
9831         }
9832
9833         if (init_extent_tree || init_csum_tree) {
9834                 struct btrfs_trans_handle *trans;
9835
9836                 trans = btrfs_start_transaction(info->extent_root, 0);
9837                 if (IS_ERR(trans)) {
9838                         fprintf(stderr, "Error starting transaction\n");
9839                         ret = PTR_ERR(trans);
9840                         goto close_out;
9841                 }
9842
9843                 if (init_extent_tree) {
9844                         printf("Creating a new extent tree\n");
9845                         ret = reinit_extent_tree(trans, info);
9846                         if (ret)
9847                                 goto close_out;
9848                 }
9849
9850                 if (init_csum_tree) {
9851                         fprintf(stderr, "Reinit crc root\n");
9852                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9853                         if (ret) {
9854                                 fprintf(stderr, "crc root initialization failed\n");
9855                                 ret = -EIO;
9856                                 goto close_out;
9857                         }
9858
9859                         ret = fill_csum_tree(trans, info->csum_root,
9860                                              init_extent_tree);
9861                         if (ret) {
9862                                 fprintf(stderr, "crc refilling failed\n");
9863                                 return -EIO;
9864                         }
9865                 }
9866                 /*
9867                  * Ok now we commit and run the normal fsck, which will add
9868                  * extent entries for all of the items it finds.
9869                  */
9870                 ret = btrfs_commit_transaction(trans, info->extent_root);
9871                 if (ret)
9872                         goto close_out;
9873         }
9874         if (!extent_buffer_uptodate(info->extent_root->node)) {
9875                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9876                 ret = -EIO;
9877                 goto close_out;
9878         }
9879         if (!extent_buffer_uptodate(info->csum_root->node)) {
9880                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9881                 ret = -EIO;
9882                 goto close_out;
9883         }
9884
9885         if (!ctx.progress_enabled)
9886                 fprintf(stderr, "checking extents\n");
9887         ret = check_chunks_and_extents(root);
9888         if (ret)
9889                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9890
9891         ret = repair_root_items(info);
9892         if (ret < 0)
9893                 goto close_out;
9894         if (repair) {
9895                 fprintf(stderr, "Fixed %d roots.\n", ret);
9896                 ret = 0;
9897         } else if (ret > 0) {
9898                 fprintf(stderr,
9899                        "Found %d roots with an outdated root item.\n",
9900                        ret);
9901                 fprintf(stderr,
9902                         "Please run a filesystem check with the option --repair to fix them.\n");
9903                 ret = 1;
9904                 goto close_out;
9905         }
9906
9907         if (!ctx.progress_enabled) {
9908                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
9909                         fprintf(stderr, "checking free space tree\n");
9910                 else
9911                         fprintf(stderr, "checking free space cache\n");
9912         }
9913         ret = check_space_cache(root);
9914         if (ret)
9915                 goto out;
9916
9917         /*
9918          * We used to have to have these hole extents in between our real
9919          * extents so if we don't have this flag set we need to make sure there
9920          * are no gaps in the file extents for inodes, otherwise we can just
9921          * ignore it when this happens.
9922          */
9923         no_holes = btrfs_fs_incompat(root->fs_info,
9924                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9925         if (!ctx.progress_enabled)
9926                 fprintf(stderr, "checking fs roots\n");
9927         ret = check_fs_roots(root, &root_cache);
9928         if (ret)
9929                 goto out;
9930
9931         fprintf(stderr, "checking csums\n");
9932         ret = check_csums(root);
9933         if (ret)
9934                 goto out;
9935
9936         fprintf(stderr, "checking root refs\n");
9937         ret = check_root_refs(root, &root_cache);
9938         if (ret)
9939                 goto out;
9940
9941         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9942                 struct extent_buffer *eb;
9943
9944                 eb = list_first_entry(&root->fs_info->recow_ebs,
9945                                       struct extent_buffer, recow);
9946                 list_del_init(&eb->recow);
9947                 ret = recow_extent_buffer(root, eb);
9948                 if (ret)
9949                         break;
9950         }
9951
9952         while (!list_empty(&delete_items)) {
9953                 struct bad_item *bad;
9954
9955                 bad = list_first_entry(&delete_items, struct bad_item, list);
9956                 list_del_init(&bad->list);
9957                 if (repair)
9958                         ret = delete_bad_item(root, bad);
9959                 free(bad);
9960         }
9961
9962         if (info->quota_enabled) {
9963                 int err;
9964                 fprintf(stderr, "checking quota groups\n");
9965                 err = qgroup_verify_all(info);
9966                 if (err)
9967                         goto out;
9968         }
9969
9970         if (!list_empty(&root->fs_info->recow_ebs)) {
9971                 fprintf(stderr, "Transid errors in file system\n");
9972                 ret = 1;
9973         }
9974 out:
9975         /* Don't override original ret */
9976         if (ret)
9977                 report_qgroups(0);
9978         else
9979                 ret = report_qgroups(0);
9980         if (found_old_backref) { /*
9981                  * there was a disk format change when mixed
9982                  * backref was in testing tree. The old format
9983                  * existed about one week.
9984                  */
9985                 printf("\n * Found old mixed backref format. "
9986                        "The old format is not supported! *"
9987                        "\n * Please mount the FS in readonly mode, "
9988                        "backup data and re-format the FS. *\n\n");
9989                 ret = 1;
9990         }
9991         printf("found %llu bytes used err is %d\n",
9992                (unsigned long long)bytes_used, ret);
9993         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9994         printf("total tree bytes: %llu\n",
9995                (unsigned long long)total_btree_bytes);
9996         printf("total fs tree bytes: %llu\n",
9997                (unsigned long long)total_fs_tree_bytes);
9998         printf("total extent tree bytes: %llu\n",
9999                (unsigned long long)total_extent_tree_bytes);
10000         printf("btree space waste bytes: %llu\n",
10001                (unsigned long long)btree_space_waste);
10002         printf("file data blocks allocated: %llu\n referenced %llu\n",
10003                 (unsigned long long)data_bytes_allocated,
10004                 (unsigned long long)data_bytes_referenced);
10005
10006         free_qgroup_counts();
10007         free_root_recs_tree(&root_cache);
10008 close_out:
10009         close_ctree(root);
10010 err_out:
10011         if (ctx.progress_enabled)
10012                 task_deinit(ctx.info);
10013
10014         return ret;
10015 }