32fc20c4cc69c66c1458db93b2e112a7687ea964
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
76
77 struct extent_backref {
78         struct rb_node node;
79         unsigned int is_data:1;
80         unsigned int found_extent_tree:1;
81         unsigned int full_backref:1;
82         unsigned int found_ref:1;
83         unsigned int broken:1;
84 };
85
86 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
87 {
88         return rb_entry(node, struct extent_backref, node);
89 }
90
91 struct data_backref {
92         struct extent_backref node;
93         union {
94                 u64 parent;
95                 u64 root;
96         };
97         u64 owner;
98         u64 offset;
99         u64 disk_bytenr;
100         u64 bytes;
101         u64 ram_bytes;
102         u32 num_refs;
103         u32 found_ref;
104 };
105
106 static inline struct data_backref* to_data_backref(struct extent_backref *back)
107 {
108         return container_of(back, struct data_backref, node);
109 }
110
111 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
112 {
113         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
114         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
115         struct data_backref *back1 = to_data_backref(ext1);
116         struct data_backref *back2 = to_data_backref(ext2);
117
118         WARN_ON(!ext1->is_data);
119         WARN_ON(!ext2->is_data);
120
121         /* parent and root are a union, so this covers both */
122         if (back1->parent > back2->parent)
123                 return 1;
124         if (back1->parent < back2->parent)
125                 return -1;
126
127         /* This is a full backref and the parents match. */
128         if (back1->node.full_backref)
129                 return 0;
130
131         if (back1->owner > back2->owner)
132                 return 1;
133         if (back1->owner < back2->owner)
134                 return -1;
135
136         if (back1->offset > back2->offset)
137                 return 1;
138         if (back1->offset < back2->offset)
139                 return -1;
140
141         if (back1->bytes > back2->bytes)
142                 return 1;
143         if (back1->bytes < back2->bytes)
144                 return -1;
145
146         if (back1->found_ref && back2->found_ref) {
147                 if (back1->disk_bytenr > back2->disk_bytenr)
148                         return 1;
149                 if (back1->disk_bytenr < back2->disk_bytenr)
150                         return -1;
151
152                 if (back1->found_ref > back2->found_ref)
153                         return 1;
154                 if (back1->found_ref < back2->found_ref)
155                         return -1;
156         }
157
158         return 0;
159 }
160
161 /*
162  * Much like data_backref, just removed the undetermined members
163  * and change it to use list_head.
164  * During extent scan, it is stored in root->orphan_data_extent.
165  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
166  */
167 struct orphan_data_extent {
168         struct list_head list;
169         u64 root;
170         u64 objectid;
171         u64 offset;
172         u64 disk_bytenr;
173         u64 disk_len;
174 };
175
176 struct tree_backref {
177         struct extent_backref node;
178         union {
179                 u64 parent;
180                 u64 root;
181         };
182 };
183
184 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
185 {
186         return container_of(back, struct tree_backref, node);
187 }
188
189 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
190 {
191         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
192         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
193         struct tree_backref *back1 = to_tree_backref(ext1);
194         struct tree_backref *back2 = to_tree_backref(ext2);
195
196         WARN_ON(ext1->is_data);
197         WARN_ON(ext2->is_data);
198
199         /* parent and root are a union, so this covers both */
200         if (back1->parent > back2->parent)
201                 return 1;
202         if (back1->parent < back2->parent)
203                 return -1;
204
205         return 0;
206 }
207
208 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
209 {
210         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
211         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
212
213         if (ext1->is_data > ext2->is_data)
214                 return 1;
215
216         if (ext1->is_data < ext2->is_data)
217                 return -1;
218
219         if (ext1->full_backref > ext2->full_backref)
220                 return 1;
221         if (ext1->full_backref < ext2->full_backref)
222                 return -1;
223
224         if (ext1->is_data)
225                 return compare_data_backref(node1, node2);
226         else
227                 return compare_tree_backref(node1, node2);
228 }
229
230 /* Explicit initialization for extent_record::flag_block_full_backref */
231 enum { FLAG_UNSET = 2 };
232
233 struct extent_record {
234         struct list_head backrefs;
235         struct list_head dups;
236         struct rb_root backref_tree;
237         struct list_head list;
238         struct cache_extent cache;
239         struct btrfs_disk_key parent_key;
240         u64 start;
241         u64 max_size;
242         u64 nr;
243         u64 refs;
244         u64 extent_item_refs;
245         u64 generation;
246         u64 parent_generation;
247         u64 info_objectid;
248         u32 num_duplicates;
249         u8 info_level;
250         unsigned int flag_block_full_backref:2;
251         unsigned int found_rec:1;
252         unsigned int content_checked:1;
253         unsigned int owner_ref_checked:1;
254         unsigned int is_root:1;
255         unsigned int metadata:1;
256         unsigned int bad_full_backref:1;
257         unsigned int crossing_stripes:1;
258         unsigned int wrong_chunk_type:1;
259 };
260
261 static inline struct extent_record* to_extent_record(struct list_head *entry)
262 {
263         return container_of(entry, struct extent_record, list);
264 }
265
266 struct inode_backref {
267         struct list_head list;
268         unsigned int found_dir_item:1;
269         unsigned int found_dir_index:1;
270         unsigned int found_inode_ref:1;
271         unsigned int filetype:8;
272         int errors;
273         unsigned int ref_type;
274         u64 dir;
275         u64 index;
276         u16 namelen;
277         char name[0];
278 };
279
280 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
281 {
282         return list_entry(entry, struct inode_backref, list);
283 }
284
285 struct root_item_record {
286         struct list_head list;
287         u64 objectid;
288         u64 bytenr;
289         u64 last_snapshot;
290         u8 level;
291         u8 drop_level;
292         int level_size;
293         struct btrfs_key drop_key;
294 };
295
296 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
297 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
298 #define REF_ERR_NO_INODE_REF            (1 << 2)
299 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
300 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
301 #define REF_ERR_DUP_INODE_REF           (1 << 5)
302 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
303 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
304 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
305 #define REF_ERR_NO_ROOT_REF             (1 << 9)
306 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
307 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
308 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
309
310 struct file_extent_hole {
311         struct rb_node node;
312         u64 start;
313         u64 len;
314 };
315
316 struct inode_record {
317         struct list_head backrefs;
318         unsigned int checked:1;
319         unsigned int merging:1;
320         unsigned int found_inode_item:1;
321         unsigned int found_dir_item:1;
322         unsigned int found_file_extent:1;
323         unsigned int found_csum_item:1;
324         unsigned int some_csum_missing:1;
325         unsigned int nodatasum:1;
326         int errors;
327
328         u64 ino;
329         u32 nlink;
330         u32 imode;
331         u64 isize;
332         u64 nbytes;
333
334         u32 found_link;
335         u64 found_size;
336         u64 extent_start;
337         u64 extent_end;
338         struct rb_root holes;
339         struct list_head orphan_extents;
340
341         u32 refs;
342 };
343
344 #define I_ERR_NO_INODE_ITEM             (1 << 0)
345 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
346 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
347 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
348 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
349 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
350 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
351 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
352 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
353 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
354 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
355 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
356 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
357 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
358 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
359
360 struct root_backref {
361         struct list_head list;
362         unsigned int found_dir_item:1;
363         unsigned int found_dir_index:1;
364         unsigned int found_back_ref:1;
365         unsigned int found_forward_ref:1;
366         unsigned int reachable:1;
367         int errors;
368         u64 ref_root;
369         u64 dir;
370         u64 index;
371         u16 namelen;
372         char name[0];
373 };
374
375 static inline struct root_backref* to_root_backref(struct list_head *entry)
376 {
377         return list_entry(entry, struct root_backref, list);
378 }
379
380 struct root_record {
381         struct list_head backrefs;
382         struct cache_extent cache;
383         unsigned int found_root_item:1;
384         u64 objectid;
385         u32 found_ref;
386 };
387
388 struct ptr_node {
389         struct cache_extent cache;
390         void *data;
391 };
392
393 struct shared_node {
394         struct cache_extent cache;
395         struct cache_tree root_cache;
396         struct cache_tree inode_cache;
397         struct inode_record *current;
398         u32 refs;
399 };
400
401 struct block_info {
402         u64 start;
403         u32 size;
404 };
405
406 struct walk_control {
407         struct cache_tree shared;
408         struct shared_node *nodes[BTRFS_MAX_LEVEL];
409         int active_node;
410         int root_level;
411 };
412
413 struct bad_item {
414         struct btrfs_key key;
415         u64 root_id;
416         struct list_head list;
417 };
418
419 struct extent_entry {
420         u64 bytenr;
421         u64 bytes;
422         int count;
423         int broken;
424         struct list_head list;
425 };
426
427 struct root_item_info {
428         /* level of the root */
429         u8 level;
430         /* number of nodes at this level, must be 1 for a root */
431         int node_count;
432         u64 bytenr;
433         u64 gen;
434         struct cache_extent cache_extent;
435 };
436
437 /*
438  * Error bit for low memory mode check.
439  *
440  * Currently no caller cares about it yet.  Just internal use for error
441  * classification.
442  */
443 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
444 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
445 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
446 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
447 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
448
449 static void *print_status_check(void *p)
450 {
451         struct task_ctx *priv = p;
452         const char work_indicator[] = { '.', 'o', 'O', 'o' };
453         uint32_t count = 0;
454         static char *task_position_string[] = {
455                 "checking extents",
456                 "checking free space cache",
457                 "checking fs roots",
458         };
459
460         task_period_start(priv->info, 1000 /* 1s */);
461
462         if (priv->tp == TASK_NOTHING)
463                 return NULL;
464
465         while (1) {
466                 printf("%s [%c]\r", task_position_string[priv->tp],
467                                 work_indicator[count % 4]);
468                 count++;
469                 fflush(stdout);
470                 task_period_wait(priv->info);
471         }
472         return NULL;
473 }
474
475 static int print_status_return(void *p)
476 {
477         printf("\n");
478         fflush(stdout);
479
480         return 0;
481 }
482
483 /* Compatible function to allow reuse of old codes */
484 static u64 first_extent_gap(struct rb_root *holes)
485 {
486         struct file_extent_hole *hole;
487
488         if (RB_EMPTY_ROOT(holes))
489                 return (u64)-1;
490
491         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
492         return hole->start;
493 }
494
495 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
496 {
497         struct file_extent_hole *hole1;
498         struct file_extent_hole *hole2;
499
500         hole1 = rb_entry(node1, struct file_extent_hole, node);
501         hole2 = rb_entry(node2, struct file_extent_hole, node);
502
503         if (hole1->start > hole2->start)
504                 return -1;
505         if (hole1->start < hole2->start)
506                 return 1;
507         /* Now hole1->start == hole2->start */
508         if (hole1->len >= hole2->len)
509                 /*
510                  * Hole 1 will be merge center
511                  * Same hole will be merged later
512                  */
513                 return -1;
514         /* Hole 2 will be merge center */
515         return 1;
516 }
517
518 /*
519  * Add a hole to the record
520  *
521  * This will do hole merge for copy_file_extent_holes(),
522  * which will ensure there won't be continuous holes.
523  */
524 static int add_file_extent_hole(struct rb_root *holes,
525                                 u64 start, u64 len)
526 {
527         struct file_extent_hole *hole;
528         struct file_extent_hole *prev = NULL;
529         struct file_extent_hole *next = NULL;
530
531         hole = malloc(sizeof(*hole));
532         if (!hole)
533                 return -ENOMEM;
534         hole->start = start;
535         hole->len = len;
536         /* Since compare will not return 0, no -EEXIST will happen */
537         rb_insert(holes, &hole->node, compare_hole);
538
539         /* simple merge with previous hole */
540         if (rb_prev(&hole->node))
541                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
542                                 node);
543         if (prev && prev->start + prev->len >= hole->start) {
544                 hole->len = hole->start + hole->len - prev->start;
545                 hole->start = prev->start;
546                 rb_erase(&prev->node, holes);
547                 free(prev);
548                 prev = NULL;
549         }
550
551         /* iterate merge with next holes */
552         while (1) {
553                 if (!rb_next(&hole->node))
554                         break;
555                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
556                                         node);
557                 if (hole->start + hole->len >= next->start) {
558                         if (hole->start + hole->len <= next->start + next->len)
559                                 hole->len = next->start + next->len -
560                                             hole->start;
561                         rb_erase(&next->node, holes);
562                         free(next);
563                         next = NULL;
564                 } else
565                         break;
566         }
567         return 0;
568 }
569
570 static int compare_hole_range(struct rb_node *node, void *data)
571 {
572         struct file_extent_hole *hole;
573         u64 start;
574
575         hole = (struct file_extent_hole *)data;
576         start = hole->start;
577
578         hole = rb_entry(node, struct file_extent_hole, node);
579         if (start < hole->start)
580                 return -1;
581         if (start >= hole->start && start < hole->start + hole->len)
582                 return 0;
583         return 1;
584 }
585
586 /*
587  * Delete a hole in the record
588  *
589  * This will do the hole split and is much restrict than add.
590  */
591 static int del_file_extent_hole(struct rb_root *holes,
592                                 u64 start, u64 len)
593 {
594         struct file_extent_hole *hole;
595         struct file_extent_hole tmp;
596         u64 prev_start = 0;
597         u64 prev_len = 0;
598         u64 next_start = 0;
599         u64 next_len = 0;
600         struct rb_node *node;
601         int have_prev = 0;
602         int have_next = 0;
603         int ret = 0;
604
605         tmp.start = start;
606         tmp.len = len;
607         node = rb_search(holes, &tmp, compare_hole_range, NULL);
608         if (!node)
609                 return -EEXIST;
610         hole = rb_entry(node, struct file_extent_hole, node);
611         if (start + len > hole->start + hole->len)
612                 return -EEXIST;
613
614         /*
615          * Now there will be no overlap, delete the hole and re-add the
616          * split(s) if they exists.
617          */
618         if (start > hole->start) {
619                 prev_start = hole->start;
620                 prev_len = start - hole->start;
621                 have_prev = 1;
622         }
623         if (hole->start + hole->len > start + len) {
624                 next_start = start + len;
625                 next_len = hole->start + hole->len - start - len;
626                 have_next = 1;
627         }
628         rb_erase(node, holes);
629         free(hole);
630         if (have_prev) {
631                 ret = add_file_extent_hole(holes, prev_start, prev_len);
632                 if (ret < 0)
633                         return ret;
634         }
635         if (have_next) {
636                 ret = add_file_extent_hole(holes, next_start, next_len);
637                 if (ret < 0)
638                         return ret;
639         }
640         return 0;
641 }
642
643 static int copy_file_extent_holes(struct rb_root *dst,
644                                   struct rb_root *src)
645 {
646         struct file_extent_hole *hole;
647         struct rb_node *node;
648         int ret = 0;
649
650         node = rb_first(src);
651         while (node) {
652                 hole = rb_entry(node, struct file_extent_hole, node);
653                 ret = add_file_extent_hole(dst, hole->start, hole->len);
654                 if (ret)
655                         break;
656                 node = rb_next(node);
657         }
658         return ret;
659 }
660
661 static void free_file_extent_holes(struct rb_root *holes)
662 {
663         struct rb_node *node;
664         struct file_extent_hole *hole;
665
666         node = rb_first(holes);
667         while (node) {
668                 hole = rb_entry(node, struct file_extent_hole, node);
669                 rb_erase(node, holes);
670                 free(hole);
671                 node = rb_first(holes);
672         }
673 }
674
675 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
676
677 static void record_root_in_trans(struct btrfs_trans_handle *trans,
678                                  struct btrfs_root *root)
679 {
680         if (root->last_trans != trans->transid) {
681                 root->track_dirty = 1;
682                 root->last_trans = trans->transid;
683                 root->commit_root = root->node;
684                 extent_buffer_get(root->node);
685         }
686 }
687
688 static u8 imode_to_type(u32 imode)
689 {
690 #define S_SHIFT 12
691         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
692                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
693                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
694                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
695                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
696                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
697                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
698                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
699         };
700
701         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
702 #undef S_SHIFT
703 }
704
705 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
706 {
707         struct device_record *rec1;
708         struct device_record *rec2;
709
710         rec1 = rb_entry(node1, struct device_record, node);
711         rec2 = rb_entry(node2, struct device_record, node);
712         if (rec1->devid > rec2->devid)
713                 return -1;
714         else if (rec1->devid < rec2->devid)
715                 return 1;
716         else
717                 return 0;
718 }
719
720 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
721 {
722         struct inode_record *rec;
723         struct inode_backref *backref;
724         struct inode_backref *orig;
725         struct inode_backref *tmp;
726         struct orphan_data_extent *src_orphan;
727         struct orphan_data_extent *dst_orphan;
728         size_t size;
729         int ret;
730
731         rec = malloc(sizeof(*rec));
732         if (!rec)
733                 return ERR_PTR(-ENOMEM);
734         memcpy(rec, orig_rec, sizeof(*rec));
735         rec->refs = 1;
736         INIT_LIST_HEAD(&rec->backrefs);
737         INIT_LIST_HEAD(&rec->orphan_extents);
738         rec->holes = RB_ROOT;
739
740         list_for_each_entry(orig, &orig_rec->backrefs, list) {
741                 size = sizeof(*orig) + orig->namelen + 1;
742                 backref = malloc(size);
743                 if (!backref) {
744                         ret = -ENOMEM;
745                         goto cleanup;
746                 }
747                 memcpy(backref, orig, size);
748                 list_add_tail(&backref->list, &rec->backrefs);
749         }
750         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
751                 dst_orphan = malloc(sizeof(*dst_orphan));
752                 if (!dst_orphan) {
753                         ret = -ENOMEM;
754                         goto cleanup;
755                 }
756                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
757                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
758         }
759         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
760         BUG_ON(ret < 0);
761
762         return rec;
763
764 cleanup:
765         if (!list_empty(&rec->backrefs))
766                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
767                         list_del(&orig->list);
768                         free(orig);
769                 }
770
771         if (!list_empty(&rec->orphan_extents))
772                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
773                         list_del(&orig->list);
774                         free(orig);
775                 }
776
777         free(rec);
778
779         return ERR_PTR(ret);
780 }
781
782 static void print_orphan_data_extents(struct list_head *orphan_extents,
783                                       u64 objectid)
784 {
785         struct orphan_data_extent *orphan;
786
787         if (list_empty(orphan_extents))
788                 return;
789         printf("The following data extent is lost in tree %llu:\n",
790                objectid);
791         list_for_each_entry(orphan, orphan_extents, list) {
792                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
793                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
794                        orphan->disk_len);
795         }
796 }
797
798 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
799 {
800         u64 root_objectid = root->root_key.objectid;
801         int errors = rec->errors;
802
803         if (!errors)
804                 return;
805         /* reloc root errors, we print its corresponding fs root objectid*/
806         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
807                 root_objectid = root->root_key.offset;
808                 fprintf(stderr, "reloc");
809         }
810         fprintf(stderr, "root %llu inode %llu errors %x",
811                 (unsigned long long) root_objectid,
812                 (unsigned long long) rec->ino, rec->errors);
813
814         if (errors & I_ERR_NO_INODE_ITEM)
815                 fprintf(stderr, ", no inode item");
816         if (errors & I_ERR_NO_ORPHAN_ITEM)
817                 fprintf(stderr, ", no orphan item");
818         if (errors & I_ERR_DUP_INODE_ITEM)
819                 fprintf(stderr, ", dup inode item");
820         if (errors & I_ERR_DUP_DIR_INDEX)
821                 fprintf(stderr, ", dup dir index");
822         if (errors & I_ERR_ODD_DIR_ITEM)
823                 fprintf(stderr, ", odd dir item");
824         if (errors & I_ERR_ODD_FILE_EXTENT)
825                 fprintf(stderr, ", odd file extent");
826         if (errors & I_ERR_BAD_FILE_EXTENT)
827                 fprintf(stderr, ", bad file extent");
828         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
829                 fprintf(stderr, ", file extent overlap");
830         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
831                 fprintf(stderr, ", file extent discount");
832         if (errors & I_ERR_DIR_ISIZE_WRONG)
833                 fprintf(stderr, ", dir isize wrong");
834         if (errors & I_ERR_FILE_NBYTES_WRONG)
835                 fprintf(stderr, ", nbytes wrong");
836         if (errors & I_ERR_ODD_CSUM_ITEM)
837                 fprintf(stderr, ", odd csum item");
838         if (errors & I_ERR_SOME_CSUM_MISSING)
839                 fprintf(stderr, ", some csum missing");
840         if (errors & I_ERR_LINK_COUNT_WRONG)
841                 fprintf(stderr, ", link count wrong");
842         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
843                 fprintf(stderr, ", orphan file extent");
844         fprintf(stderr, "\n");
845         /* Print the orphan extents if needed */
846         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
847                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
848
849         /* Print the holes if needed */
850         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
851                 struct file_extent_hole *hole;
852                 struct rb_node *node;
853                 int found = 0;
854
855                 node = rb_first(&rec->holes);
856                 fprintf(stderr, "Found file extent holes:\n");
857                 while (node) {
858                         found = 1;
859                         hole = rb_entry(node, struct file_extent_hole, node);
860                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
861                                 hole->start, hole->len);
862                         node = rb_next(node);
863                 }
864                 if (!found)
865                         fprintf(stderr, "\tstart: 0, len: %llu\n",
866                                 round_up(rec->isize, root->sectorsize));
867         }
868 }
869
870 static void print_ref_error(int errors)
871 {
872         if (errors & REF_ERR_NO_DIR_ITEM)
873                 fprintf(stderr, ", no dir item");
874         if (errors & REF_ERR_NO_DIR_INDEX)
875                 fprintf(stderr, ", no dir index");
876         if (errors & REF_ERR_NO_INODE_REF)
877                 fprintf(stderr, ", no inode ref");
878         if (errors & REF_ERR_DUP_DIR_ITEM)
879                 fprintf(stderr, ", dup dir item");
880         if (errors & REF_ERR_DUP_DIR_INDEX)
881                 fprintf(stderr, ", dup dir index");
882         if (errors & REF_ERR_DUP_INODE_REF)
883                 fprintf(stderr, ", dup inode ref");
884         if (errors & REF_ERR_INDEX_UNMATCH)
885                 fprintf(stderr, ", index mismatch");
886         if (errors & REF_ERR_FILETYPE_UNMATCH)
887                 fprintf(stderr, ", filetype mismatch");
888         if (errors & REF_ERR_NAME_TOO_LONG)
889                 fprintf(stderr, ", name too long");
890         if (errors & REF_ERR_NO_ROOT_REF)
891                 fprintf(stderr, ", no root ref");
892         if (errors & REF_ERR_NO_ROOT_BACKREF)
893                 fprintf(stderr, ", no root backref");
894         if (errors & REF_ERR_DUP_ROOT_REF)
895                 fprintf(stderr, ", dup root ref");
896         if (errors & REF_ERR_DUP_ROOT_BACKREF)
897                 fprintf(stderr, ", dup root backref");
898         fprintf(stderr, "\n");
899 }
900
901 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
902                                           u64 ino, int mod)
903 {
904         struct ptr_node *node;
905         struct cache_extent *cache;
906         struct inode_record *rec = NULL;
907         int ret;
908
909         cache = lookup_cache_extent(inode_cache, ino, 1);
910         if (cache) {
911                 node = container_of(cache, struct ptr_node, cache);
912                 rec = node->data;
913                 if (mod && rec->refs > 1) {
914                         node->data = clone_inode_rec(rec);
915                         if (IS_ERR(node->data))
916                                 return node->data;
917                         rec->refs--;
918                         rec = node->data;
919                 }
920         } else if (mod) {
921                 rec = calloc(1, sizeof(*rec));
922                 if (!rec)
923                         return ERR_PTR(-ENOMEM);
924                 rec->ino = ino;
925                 rec->extent_start = (u64)-1;
926                 rec->refs = 1;
927                 INIT_LIST_HEAD(&rec->backrefs);
928                 INIT_LIST_HEAD(&rec->orphan_extents);
929                 rec->holes = RB_ROOT;
930
931                 node = malloc(sizeof(*node));
932                 if (!node) {
933                         free(rec);
934                         return ERR_PTR(-ENOMEM);
935                 }
936                 node->cache.start = ino;
937                 node->cache.size = 1;
938                 node->data = rec;
939
940                 if (ino == BTRFS_FREE_INO_OBJECTID)
941                         rec->found_link = 1;
942
943                 ret = insert_cache_extent(inode_cache, &node->cache);
944                 if (ret)
945                         return ERR_PTR(-EEXIST);
946         }
947         return rec;
948 }
949
950 static void free_orphan_data_extents(struct list_head *orphan_extents)
951 {
952         struct orphan_data_extent *orphan;
953
954         while (!list_empty(orphan_extents)) {
955                 orphan = list_entry(orphan_extents->next,
956                                     struct orphan_data_extent, list);
957                 list_del(&orphan->list);
958                 free(orphan);
959         }
960 }
961
962 static void free_inode_rec(struct inode_record *rec)
963 {
964         struct inode_backref *backref;
965
966         if (--rec->refs > 0)
967                 return;
968
969         while (!list_empty(&rec->backrefs)) {
970                 backref = to_inode_backref(rec->backrefs.next);
971                 list_del(&backref->list);
972                 free(backref);
973         }
974         free_orphan_data_extents(&rec->orphan_extents);
975         free_file_extent_holes(&rec->holes);
976         free(rec);
977 }
978
979 static int can_free_inode_rec(struct inode_record *rec)
980 {
981         if (!rec->errors && rec->checked && rec->found_inode_item &&
982             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
983                 return 1;
984         return 0;
985 }
986
987 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
988                                  struct inode_record *rec)
989 {
990         struct cache_extent *cache;
991         struct inode_backref *tmp, *backref;
992         struct ptr_node *node;
993         unsigned char filetype;
994
995         if (!rec->found_inode_item)
996                 return;
997
998         filetype = imode_to_type(rec->imode);
999         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1000                 if (backref->found_dir_item && backref->found_dir_index) {
1001                         if (backref->filetype != filetype)
1002                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1003                         if (!backref->errors && backref->found_inode_ref &&
1004                             rec->nlink == rec->found_link) {
1005                                 list_del(&backref->list);
1006                                 free(backref);
1007                         }
1008                 }
1009         }
1010
1011         if (!rec->checked || rec->merging)
1012                 return;
1013
1014         if (S_ISDIR(rec->imode)) {
1015                 if (rec->found_size != rec->isize)
1016                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1017                 if (rec->found_file_extent)
1018                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1019         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1020                 if (rec->found_dir_item)
1021                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1022                 if (rec->found_size != rec->nbytes)
1023                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1024                 if (rec->nlink > 0 && !no_holes &&
1025                     (rec->extent_end < rec->isize ||
1026                      first_extent_gap(&rec->holes) < rec->isize))
1027                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1028         }
1029
1030         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1031                 if (rec->found_csum_item && rec->nodatasum)
1032                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1033                 if (rec->some_csum_missing && !rec->nodatasum)
1034                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1035         }
1036
1037         BUG_ON(rec->refs != 1);
1038         if (can_free_inode_rec(rec)) {
1039                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1040                 node = container_of(cache, struct ptr_node, cache);
1041                 BUG_ON(node->data != rec);
1042                 remove_cache_extent(inode_cache, &node->cache);
1043                 free(node);
1044                 free_inode_rec(rec);
1045         }
1046 }
1047
1048 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1049 {
1050         struct btrfs_path path;
1051         struct btrfs_key key;
1052         int ret;
1053
1054         key.objectid = BTRFS_ORPHAN_OBJECTID;
1055         key.type = BTRFS_ORPHAN_ITEM_KEY;
1056         key.offset = ino;
1057
1058         btrfs_init_path(&path);
1059         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1060         btrfs_release_path(&path);
1061         if (ret > 0)
1062                 ret = -ENOENT;
1063         return ret;
1064 }
1065
1066 static int process_inode_item(struct extent_buffer *eb,
1067                               int slot, struct btrfs_key *key,
1068                               struct shared_node *active_node)
1069 {
1070         struct inode_record *rec;
1071         struct btrfs_inode_item *item;
1072
1073         rec = active_node->current;
1074         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1075         if (rec->found_inode_item) {
1076                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1077                 return 1;
1078         }
1079         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1080         rec->nlink = btrfs_inode_nlink(eb, item);
1081         rec->isize = btrfs_inode_size(eb, item);
1082         rec->nbytes = btrfs_inode_nbytes(eb, item);
1083         rec->imode = btrfs_inode_mode(eb, item);
1084         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1085                 rec->nodatasum = 1;
1086         rec->found_inode_item = 1;
1087         if (rec->nlink == 0)
1088                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1089         maybe_free_inode_rec(&active_node->inode_cache, rec);
1090         return 0;
1091 }
1092
1093 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1094                                                 const char *name,
1095                                                 int namelen, u64 dir)
1096 {
1097         struct inode_backref *backref;
1098
1099         list_for_each_entry(backref, &rec->backrefs, list) {
1100                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1101                         break;
1102                 if (backref->dir != dir || backref->namelen != namelen)
1103                         continue;
1104                 if (memcmp(name, backref->name, namelen))
1105                         continue;
1106                 return backref;
1107         }
1108
1109         backref = malloc(sizeof(*backref) + namelen + 1);
1110         if (!backref)
1111                 return NULL;
1112         memset(backref, 0, sizeof(*backref));
1113         backref->dir = dir;
1114         backref->namelen = namelen;
1115         memcpy(backref->name, name, namelen);
1116         backref->name[namelen] = '\0';
1117         list_add_tail(&backref->list, &rec->backrefs);
1118         return backref;
1119 }
1120
1121 static int add_inode_backref(struct cache_tree *inode_cache,
1122                              u64 ino, u64 dir, u64 index,
1123                              const char *name, int namelen,
1124                              int filetype, int itemtype, int errors)
1125 {
1126         struct inode_record *rec;
1127         struct inode_backref *backref;
1128
1129         rec = get_inode_rec(inode_cache, ino, 1);
1130         BUG_ON(IS_ERR(rec));
1131         backref = get_inode_backref(rec, name, namelen, dir);
1132         BUG_ON(!backref);
1133         if (errors)
1134                 backref->errors |= errors;
1135         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1136                 if (backref->found_dir_index)
1137                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1138                 if (backref->found_inode_ref && backref->index != index)
1139                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1140                 if (backref->found_dir_item && backref->filetype != filetype)
1141                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1142
1143                 backref->index = index;
1144                 backref->filetype = filetype;
1145                 backref->found_dir_index = 1;
1146         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1147                 rec->found_link++;
1148                 if (backref->found_dir_item)
1149                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1150                 if (backref->found_dir_index && backref->filetype != filetype)
1151                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1152
1153                 backref->filetype = filetype;
1154                 backref->found_dir_item = 1;
1155         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1156                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1157                 if (backref->found_inode_ref)
1158                         backref->errors |= REF_ERR_DUP_INODE_REF;
1159                 if (backref->found_dir_index && backref->index != index)
1160                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1161                 else
1162                         backref->index = index;
1163
1164                 backref->ref_type = itemtype;
1165                 backref->found_inode_ref = 1;
1166         } else {
1167                 BUG_ON(1);
1168         }
1169
1170         maybe_free_inode_rec(inode_cache, rec);
1171         return 0;
1172 }
1173
1174 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1175                             struct cache_tree *dst_cache)
1176 {
1177         struct inode_backref *backref;
1178         u32 dir_count = 0;
1179         int ret = 0;
1180
1181         dst->merging = 1;
1182         list_for_each_entry(backref, &src->backrefs, list) {
1183                 if (backref->found_dir_index) {
1184                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1185                                         backref->index, backref->name,
1186                                         backref->namelen, backref->filetype,
1187                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1188                 }
1189                 if (backref->found_dir_item) {
1190                         dir_count++;
1191                         add_inode_backref(dst_cache, dst->ino,
1192                                         backref->dir, 0, backref->name,
1193                                         backref->namelen, backref->filetype,
1194                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1195                 }
1196                 if (backref->found_inode_ref) {
1197                         add_inode_backref(dst_cache, dst->ino,
1198                                         backref->dir, backref->index,
1199                                         backref->name, backref->namelen, 0,
1200                                         backref->ref_type, backref->errors);
1201                 }
1202         }
1203
1204         if (src->found_dir_item)
1205                 dst->found_dir_item = 1;
1206         if (src->found_file_extent)
1207                 dst->found_file_extent = 1;
1208         if (src->found_csum_item)
1209                 dst->found_csum_item = 1;
1210         if (src->some_csum_missing)
1211                 dst->some_csum_missing = 1;
1212         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1213                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1214                 if (ret < 0)
1215                         return ret;
1216         }
1217
1218         BUG_ON(src->found_link < dir_count);
1219         dst->found_link += src->found_link - dir_count;
1220         dst->found_size += src->found_size;
1221         if (src->extent_start != (u64)-1) {
1222                 if (dst->extent_start == (u64)-1) {
1223                         dst->extent_start = src->extent_start;
1224                         dst->extent_end = src->extent_end;
1225                 } else {
1226                         if (dst->extent_end > src->extent_start)
1227                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1228                         else if (dst->extent_end < src->extent_start) {
1229                                 ret = add_file_extent_hole(&dst->holes,
1230                                         dst->extent_end,
1231                                         src->extent_start - dst->extent_end);
1232                         }
1233                         if (dst->extent_end < src->extent_end)
1234                                 dst->extent_end = src->extent_end;
1235                 }
1236         }
1237
1238         dst->errors |= src->errors;
1239         if (src->found_inode_item) {
1240                 if (!dst->found_inode_item) {
1241                         dst->nlink = src->nlink;
1242                         dst->isize = src->isize;
1243                         dst->nbytes = src->nbytes;
1244                         dst->imode = src->imode;
1245                         dst->nodatasum = src->nodatasum;
1246                         dst->found_inode_item = 1;
1247                 } else {
1248                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1249                 }
1250         }
1251         dst->merging = 0;
1252
1253         return 0;
1254 }
1255
1256 static int splice_shared_node(struct shared_node *src_node,
1257                               struct shared_node *dst_node)
1258 {
1259         struct cache_extent *cache;
1260         struct ptr_node *node, *ins;
1261         struct cache_tree *src, *dst;
1262         struct inode_record *rec, *conflict;
1263         u64 current_ino = 0;
1264         int splice = 0;
1265         int ret;
1266
1267         if (--src_node->refs == 0)
1268                 splice = 1;
1269         if (src_node->current)
1270                 current_ino = src_node->current->ino;
1271
1272         src = &src_node->root_cache;
1273         dst = &dst_node->root_cache;
1274 again:
1275         cache = search_cache_extent(src, 0);
1276         while (cache) {
1277                 node = container_of(cache, struct ptr_node, cache);
1278                 rec = node->data;
1279                 cache = next_cache_extent(cache);
1280
1281                 if (splice) {
1282                         remove_cache_extent(src, &node->cache);
1283                         ins = node;
1284                 } else {
1285                         ins = malloc(sizeof(*ins));
1286                         BUG_ON(!ins);
1287                         ins->cache.start = node->cache.start;
1288                         ins->cache.size = node->cache.size;
1289                         ins->data = rec;
1290                         rec->refs++;
1291                 }
1292                 ret = insert_cache_extent(dst, &ins->cache);
1293                 if (ret == -EEXIST) {
1294                         conflict = get_inode_rec(dst, rec->ino, 1);
1295                         BUG_ON(IS_ERR(conflict));
1296                         merge_inode_recs(rec, conflict, dst);
1297                         if (rec->checked) {
1298                                 conflict->checked = 1;
1299                                 if (dst_node->current == conflict)
1300                                         dst_node->current = NULL;
1301                         }
1302                         maybe_free_inode_rec(dst, conflict);
1303                         free_inode_rec(rec);
1304                         free(ins);
1305                 } else {
1306                         BUG_ON(ret);
1307                 }
1308         }
1309
1310         if (src == &src_node->root_cache) {
1311                 src = &src_node->inode_cache;
1312                 dst = &dst_node->inode_cache;
1313                 goto again;
1314         }
1315
1316         if (current_ino > 0 && (!dst_node->current ||
1317             current_ino > dst_node->current->ino)) {
1318                 if (dst_node->current) {
1319                         dst_node->current->checked = 1;
1320                         maybe_free_inode_rec(dst, dst_node->current);
1321                 }
1322                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1323                 BUG_ON(IS_ERR(dst_node->current));
1324         }
1325         return 0;
1326 }
1327
1328 static void free_inode_ptr(struct cache_extent *cache)
1329 {
1330         struct ptr_node *node;
1331         struct inode_record *rec;
1332
1333         node = container_of(cache, struct ptr_node, cache);
1334         rec = node->data;
1335         free_inode_rec(rec);
1336         free(node);
1337 }
1338
1339 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1340
1341 static struct shared_node *find_shared_node(struct cache_tree *shared,
1342                                             u64 bytenr)
1343 {
1344         struct cache_extent *cache;
1345         struct shared_node *node;
1346
1347         cache = lookup_cache_extent(shared, bytenr, 1);
1348         if (cache) {
1349                 node = container_of(cache, struct shared_node, cache);
1350                 return node;
1351         }
1352         return NULL;
1353 }
1354
1355 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1356 {
1357         int ret;
1358         struct shared_node *node;
1359
1360         node = calloc(1, sizeof(*node));
1361         if (!node)
1362                 return -ENOMEM;
1363         node->cache.start = bytenr;
1364         node->cache.size = 1;
1365         cache_tree_init(&node->root_cache);
1366         cache_tree_init(&node->inode_cache);
1367         node->refs = refs;
1368
1369         ret = insert_cache_extent(shared, &node->cache);
1370
1371         return ret;
1372 }
1373
1374 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1375                              struct walk_control *wc, int level)
1376 {
1377         struct shared_node *node;
1378         struct shared_node *dest;
1379         int ret;
1380
1381         if (level == wc->active_node)
1382                 return 0;
1383
1384         BUG_ON(wc->active_node <= level);
1385         node = find_shared_node(&wc->shared, bytenr);
1386         if (!node) {
1387                 ret = add_shared_node(&wc->shared, bytenr, refs);
1388                 BUG_ON(ret);
1389                 node = find_shared_node(&wc->shared, bytenr);
1390                 wc->nodes[level] = node;
1391                 wc->active_node = level;
1392                 return 0;
1393         }
1394
1395         if (wc->root_level == wc->active_node &&
1396             btrfs_root_refs(&root->root_item) == 0) {
1397                 if (--node->refs == 0) {
1398                         free_inode_recs_tree(&node->root_cache);
1399                         free_inode_recs_tree(&node->inode_cache);
1400                         remove_cache_extent(&wc->shared, &node->cache);
1401                         free(node);
1402                 }
1403                 return 1;
1404         }
1405
1406         dest = wc->nodes[wc->active_node];
1407         splice_shared_node(node, dest);
1408         if (node->refs == 0) {
1409                 remove_cache_extent(&wc->shared, &node->cache);
1410                 free(node);
1411         }
1412         return 1;
1413 }
1414
1415 static int leave_shared_node(struct btrfs_root *root,
1416                              struct walk_control *wc, int level)
1417 {
1418         struct shared_node *node;
1419         struct shared_node *dest;
1420         int i;
1421
1422         if (level == wc->root_level)
1423                 return 0;
1424
1425         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1426                 if (wc->nodes[i])
1427                         break;
1428         }
1429         BUG_ON(i >= BTRFS_MAX_LEVEL);
1430
1431         node = wc->nodes[wc->active_node];
1432         wc->nodes[wc->active_node] = NULL;
1433         wc->active_node = i;
1434
1435         dest = wc->nodes[wc->active_node];
1436         if (wc->active_node < wc->root_level ||
1437             btrfs_root_refs(&root->root_item) > 0) {
1438                 BUG_ON(node->refs <= 1);
1439                 splice_shared_node(node, dest);
1440         } else {
1441                 BUG_ON(node->refs < 2);
1442                 node->refs--;
1443         }
1444         return 0;
1445 }
1446
1447 /*
1448  * Returns:
1449  * < 0 - on error
1450  * 1   - if the root with id child_root_id is a child of root parent_root_id
1451  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1452  *       has other root(s) as parent(s)
1453  * 2   - if the root child_root_id doesn't have any parent roots
1454  */
1455 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1456                          u64 child_root_id)
1457 {
1458         struct btrfs_path path;
1459         struct btrfs_key key;
1460         struct extent_buffer *leaf;
1461         int has_parent = 0;
1462         int ret;
1463
1464         btrfs_init_path(&path);
1465
1466         key.objectid = parent_root_id;
1467         key.type = BTRFS_ROOT_REF_KEY;
1468         key.offset = child_root_id;
1469         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1470                                 0, 0);
1471         if (ret < 0)
1472                 return ret;
1473         btrfs_release_path(&path);
1474         if (!ret)
1475                 return 1;
1476
1477         key.objectid = child_root_id;
1478         key.type = BTRFS_ROOT_BACKREF_KEY;
1479         key.offset = 0;
1480         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1481                                 0, 0);
1482         if (ret < 0)
1483                 goto out;
1484
1485         while (1) {
1486                 leaf = path.nodes[0];
1487                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1488                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1489                         if (ret)
1490                                 break;
1491                         leaf = path.nodes[0];
1492                 }
1493
1494                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1495                 if (key.objectid != child_root_id ||
1496                     key.type != BTRFS_ROOT_BACKREF_KEY)
1497                         break;
1498
1499                 has_parent = 1;
1500
1501                 if (key.offset == parent_root_id) {
1502                         btrfs_release_path(&path);
1503                         return 1;
1504                 }
1505
1506                 path.slots[0]++;
1507         }
1508 out:
1509         btrfs_release_path(&path);
1510         if (ret < 0)
1511                 return ret;
1512         return has_parent ? 0 : 2;
1513 }
1514
1515 static int process_dir_item(struct btrfs_root *root,
1516                             struct extent_buffer *eb,
1517                             int slot, struct btrfs_key *key,
1518                             struct shared_node *active_node)
1519 {
1520         u32 total;
1521         u32 cur = 0;
1522         u32 len;
1523         u32 name_len;
1524         u32 data_len;
1525         int error;
1526         int nritems = 0;
1527         int filetype;
1528         struct btrfs_dir_item *di;
1529         struct inode_record *rec;
1530         struct cache_tree *root_cache;
1531         struct cache_tree *inode_cache;
1532         struct btrfs_key location;
1533         char namebuf[BTRFS_NAME_LEN];
1534
1535         root_cache = &active_node->root_cache;
1536         inode_cache = &active_node->inode_cache;
1537         rec = active_node->current;
1538         rec->found_dir_item = 1;
1539
1540         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1541         total = btrfs_item_size_nr(eb, slot);
1542         while (cur < total) {
1543                 nritems++;
1544                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1545                 name_len = btrfs_dir_name_len(eb, di);
1546                 data_len = btrfs_dir_data_len(eb, di);
1547                 filetype = btrfs_dir_type(eb, di);
1548
1549                 rec->found_size += name_len;
1550                 if (name_len <= BTRFS_NAME_LEN) {
1551                         len = name_len;
1552                         error = 0;
1553                 } else {
1554                         len = BTRFS_NAME_LEN;
1555                         error = REF_ERR_NAME_TOO_LONG;
1556                 }
1557                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1558
1559                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1560                         add_inode_backref(inode_cache, location.objectid,
1561                                           key->objectid, key->offset, namebuf,
1562                                           len, filetype, key->type, error);
1563                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1564                         add_inode_backref(root_cache, location.objectid,
1565                                           key->objectid, key->offset,
1566                                           namebuf, len, filetype,
1567                                           key->type, error);
1568                 } else {
1569                         fprintf(stderr, "invalid location in dir item %u\n",
1570                                 location.type);
1571                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1572                                           key->objectid, key->offset, namebuf,
1573                                           len, filetype, key->type, error);
1574                 }
1575
1576                 len = sizeof(*di) + name_len + data_len;
1577                 di = (struct btrfs_dir_item *)((char *)di + len);
1578                 cur += len;
1579         }
1580         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1581                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1582
1583         return 0;
1584 }
1585
1586 static int process_inode_ref(struct extent_buffer *eb,
1587                              int slot, struct btrfs_key *key,
1588                              struct shared_node *active_node)
1589 {
1590         u32 total;
1591         u32 cur = 0;
1592         u32 len;
1593         u32 name_len;
1594         u64 index;
1595         int error;
1596         struct cache_tree *inode_cache;
1597         struct btrfs_inode_ref *ref;
1598         char namebuf[BTRFS_NAME_LEN];
1599
1600         inode_cache = &active_node->inode_cache;
1601
1602         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1603         total = btrfs_item_size_nr(eb, slot);
1604         while (cur < total) {
1605                 name_len = btrfs_inode_ref_name_len(eb, ref);
1606                 index = btrfs_inode_ref_index(eb, ref);
1607                 if (name_len <= BTRFS_NAME_LEN) {
1608                         len = name_len;
1609                         error = 0;
1610                 } else {
1611                         len = BTRFS_NAME_LEN;
1612                         error = REF_ERR_NAME_TOO_LONG;
1613                 }
1614                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1615                 add_inode_backref(inode_cache, key->objectid, key->offset,
1616                                   index, namebuf, len, 0, key->type, error);
1617
1618                 len = sizeof(*ref) + name_len;
1619                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1620                 cur += len;
1621         }
1622         return 0;
1623 }
1624
1625 static int process_inode_extref(struct extent_buffer *eb,
1626                                 int slot, struct btrfs_key *key,
1627                                 struct shared_node *active_node)
1628 {
1629         u32 total;
1630         u32 cur = 0;
1631         u32 len;
1632         u32 name_len;
1633         u64 index;
1634         u64 parent;
1635         int error;
1636         struct cache_tree *inode_cache;
1637         struct btrfs_inode_extref *extref;
1638         char namebuf[BTRFS_NAME_LEN];
1639
1640         inode_cache = &active_node->inode_cache;
1641
1642         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1643         total = btrfs_item_size_nr(eb, slot);
1644         while (cur < total) {
1645                 name_len = btrfs_inode_extref_name_len(eb, extref);
1646                 index = btrfs_inode_extref_index(eb, extref);
1647                 parent = btrfs_inode_extref_parent(eb, extref);
1648                 if (name_len <= BTRFS_NAME_LEN) {
1649                         len = name_len;
1650                         error = 0;
1651                 } else {
1652                         len = BTRFS_NAME_LEN;
1653                         error = REF_ERR_NAME_TOO_LONG;
1654                 }
1655                 read_extent_buffer(eb, namebuf,
1656                                    (unsigned long)(extref + 1), len);
1657                 add_inode_backref(inode_cache, key->objectid, parent,
1658                                   index, namebuf, len, 0, key->type, error);
1659
1660                 len = sizeof(*extref) + name_len;
1661                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1662                 cur += len;
1663         }
1664         return 0;
1665
1666 }
1667
1668 static int count_csum_range(struct btrfs_root *root, u64 start,
1669                             u64 len, u64 *found)
1670 {
1671         struct btrfs_key key;
1672         struct btrfs_path path;
1673         struct extent_buffer *leaf;
1674         int ret;
1675         size_t size;
1676         *found = 0;
1677         u64 csum_end;
1678         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1679
1680         btrfs_init_path(&path);
1681
1682         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1683         key.offset = start;
1684         key.type = BTRFS_EXTENT_CSUM_KEY;
1685
1686         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1687                                 &key, &path, 0, 0);
1688         if (ret < 0)
1689                 goto out;
1690         if (ret > 0 && path.slots[0] > 0) {
1691                 leaf = path.nodes[0];
1692                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1693                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1694                     key.type == BTRFS_EXTENT_CSUM_KEY)
1695                         path.slots[0]--;
1696         }
1697
1698         while (len > 0) {
1699                 leaf = path.nodes[0];
1700                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1701                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1702                         if (ret > 0)
1703                                 break;
1704                         else if (ret < 0)
1705                                 goto out;
1706                         leaf = path.nodes[0];
1707                 }
1708
1709                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1710                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1711                     key.type != BTRFS_EXTENT_CSUM_KEY)
1712                         break;
1713
1714                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1715                 if (key.offset >= start + len)
1716                         break;
1717
1718                 if (key.offset > start)
1719                         start = key.offset;
1720
1721                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1722                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1723                 if (csum_end > start) {
1724                         size = min(csum_end - start, len);
1725                         len -= size;
1726                         start += size;
1727                         *found += size;
1728                 }
1729
1730                 path.slots[0]++;
1731         }
1732 out:
1733         btrfs_release_path(&path);
1734         if (ret < 0)
1735                 return ret;
1736         return 0;
1737 }
1738
1739 static int process_file_extent(struct btrfs_root *root,
1740                                 struct extent_buffer *eb,
1741                                 int slot, struct btrfs_key *key,
1742                                 struct shared_node *active_node)
1743 {
1744         struct inode_record *rec;
1745         struct btrfs_file_extent_item *fi;
1746         u64 num_bytes = 0;
1747         u64 disk_bytenr = 0;
1748         u64 extent_offset = 0;
1749         u64 mask = root->sectorsize - 1;
1750         int extent_type;
1751         int ret;
1752
1753         rec = active_node->current;
1754         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1755         rec->found_file_extent = 1;
1756
1757         if (rec->extent_start == (u64)-1) {
1758                 rec->extent_start = key->offset;
1759                 rec->extent_end = key->offset;
1760         }
1761
1762         if (rec->extent_end > key->offset)
1763                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1764         else if (rec->extent_end < key->offset) {
1765                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1766                                            key->offset - rec->extent_end);
1767                 if (ret < 0)
1768                         return ret;
1769         }
1770
1771         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1772         extent_type = btrfs_file_extent_type(eb, fi);
1773
1774         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1775                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1776                 if (num_bytes == 0)
1777                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1778                 rec->found_size += num_bytes;
1779                 num_bytes = (num_bytes + mask) & ~mask;
1780         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1781                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1782                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1783                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1784                 extent_offset = btrfs_file_extent_offset(eb, fi);
1785                 if (num_bytes == 0 || (num_bytes & mask))
1786                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1787                 if (num_bytes + extent_offset >
1788                     btrfs_file_extent_ram_bytes(eb, fi))
1789                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1790                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1791                     (btrfs_file_extent_compression(eb, fi) ||
1792                      btrfs_file_extent_encryption(eb, fi) ||
1793                      btrfs_file_extent_other_encoding(eb, fi)))
1794                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1795                 if (disk_bytenr > 0)
1796                         rec->found_size += num_bytes;
1797         } else {
1798                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1799         }
1800         rec->extent_end = key->offset + num_bytes;
1801
1802         /*
1803          * The data reloc tree will copy full extents into its inode and then
1804          * copy the corresponding csums.  Because the extent it copied could be
1805          * a preallocated extent that hasn't been written to yet there may be no
1806          * csums to copy, ergo we won't have csums for our file extent.  This is
1807          * ok so just don't bother checking csums if the inode belongs to the
1808          * data reloc tree.
1809          */
1810         if (disk_bytenr > 0 &&
1811             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1812                 u64 found;
1813                 if (btrfs_file_extent_compression(eb, fi))
1814                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1815                 else
1816                         disk_bytenr += extent_offset;
1817
1818                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1819                 if (ret < 0)
1820                         return ret;
1821                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1822                         if (found > 0)
1823                                 rec->found_csum_item = 1;
1824                         if (found < num_bytes)
1825                                 rec->some_csum_missing = 1;
1826                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1827                         if (found > 0)
1828                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1829                 }
1830         }
1831         return 0;
1832 }
1833
1834 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1835                             struct walk_control *wc)
1836 {
1837         struct btrfs_key key;
1838         u32 nritems;
1839         int i;
1840         int ret = 0;
1841         struct cache_tree *inode_cache;
1842         struct shared_node *active_node;
1843
1844         if (wc->root_level == wc->active_node &&
1845             btrfs_root_refs(&root->root_item) == 0)
1846                 return 0;
1847
1848         active_node = wc->nodes[wc->active_node];
1849         inode_cache = &active_node->inode_cache;
1850         nritems = btrfs_header_nritems(eb);
1851         for (i = 0; i < nritems; i++) {
1852                 btrfs_item_key_to_cpu(eb, &key, i);
1853
1854                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1855                         continue;
1856                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1857                         continue;
1858
1859                 if (active_node->current == NULL ||
1860                     active_node->current->ino < key.objectid) {
1861                         if (active_node->current) {
1862                                 active_node->current->checked = 1;
1863                                 maybe_free_inode_rec(inode_cache,
1864                                                      active_node->current);
1865                         }
1866                         active_node->current = get_inode_rec(inode_cache,
1867                                                              key.objectid, 1);
1868                         BUG_ON(IS_ERR(active_node->current));
1869                 }
1870                 switch (key.type) {
1871                 case BTRFS_DIR_ITEM_KEY:
1872                 case BTRFS_DIR_INDEX_KEY:
1873                         ret = process_dir_item(root, eb, i, &key, active_node);
1874                         break;
1875                 case BTRFS_INODE_REF_KEY:
1876                         ret = process_inode_ref(eb, i, &key, active_node);
1877                         break;
1878                 case BTRFS_INODE_EXTREF_KEY:
1879                         ret = process_inode_extref(eb, i, &key, active_node);
1880                         break;
1881                 case BTRFS_INODE_ITEM_KEY:
1882                         ret = process_inode_item(eb, i, &key, active_node);
1883                         break;
1884                 case BTRFS_EXTENT_DATA_KEY:
1885                         ret = process_file_extent(root, eb, i, &key,
1886                                                   active_node);
1887                         break;
1888                 default:
1889                         break;
1890                 };
1891         }
1892         return ret;
1893 }
1894
1895 static void reada_walk_down(struct btrfs_root *root,
1896                             struct extent_buffer *node, int slot)
1897 {
1898         u64 bytenr;
1899         u64 ptr_gen;
1900         u32 nritems;
1901         u32 blocksize;
1902         int i;
1903         int level;
1904
1905         level = btrfs_header_level(node);
1906         if (level != 1)
1907                 return;
1908
1909         nritems = btrfs_header_nritems(node);
1910         blocksize = root->nodesize;
1911         for (i = slot; i < nritems; i++) {
1912                 bytenr = btrfs_node_blockptr(node, i);
1913                 ptr_gen = btrfs_node_ptr_generation(node, i);
1914                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1915         }
1916 }
1917
1918 /*
1919  * Check the child node/leaf by the following condition:
1920  * 1. the first item key of the node/leaf should be the same with the one
1921  *    in parent.
1922  * 2. block in parent node should match the child node/leaf.
1923  * 3. generation of parent node and child's header should be consistent.
1924  *
1925  * Or the child node/leaf pointed by the key in parent is not valid.
1926  *
1927  * We hope to check leaf owner too, but since subvol may share leaves,
1928  * which makes leaf owner check not so strong, key check should be
1929  * sufficient enough for that case.
1930  */
1931 static int check_child_node(struct btrfs_root *root,
1932                             struct extent_buffer *parent, int slot,
1933                             struct extent_buffer *child)
1934 {
1935         struct btrfs_key parent_key;
1936         struct btrfs_key child_key;
1937         int ret = 0;
1938
1939         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1940         if (btrfs_header_level(child) == 0)
1941                 btrfs_item_key_to_cpu(child, &child_key, 0);
1942         else
1943                 btrfs_node_key_to_cpu(child, &child_key, 0);
1944
1945         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1946                 ret = -EINVAL;
1947                 fprintf(stderr,
1948                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1949                         parent_key.objectid, parent_key.type, parent_key.offset,
1950                         child_key.objectid, child_key.type, child_key.offset);
1951         }
1952         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1953                 ret = -EINVAL;
1954                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1955                         btrfs_node_blockptr(parent, slot),
1956                         btrfs_header_bytenr(child));
1957         }
1958         if (btrfs_node_ptr_generation(parent, slot) !=
1959             btrfs_header_generation(child)) {
1960                 ret = -EINVAL;
1961                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1962                         btrfs_header_generation(child),
1963                         btrfs_node_ptr_generation(parent, slot));
1964         }
1965         return ret;
1966 }
1967
1968 struct node_refs {
1969         u64 bytenr[BTRFS_MAX_LEVEL];
1970         u64 refs[BTRFS_MAX_LEVEL];
1971 };
1972
1973 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1974                           struct walk_control *wc, int *level,
1975                           struct node_refs *nrefs)
1976 {
1977         enum btrfs_tree_block_status status;
1978         u64 bytenr;
1979         u64 ptr_gen;
1980         struct extent_buffer *next;
1981         struct extent_buffer *cur;
1982         u32 blocksize;
1983         int ret, err = 0;
1984         u64 refs;
1985
1986         WARN_ON(*level < 0);
1987         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1988
1989         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1990                 refs = nrefs->refs[*level];
1991                 ret = 0;
1992         } else {
1993                 ret = btrfs_lookup_extent_info(NULL, root,
1994                                        path->nodes[*level]->start,
1995                                        *level, 1, &refs, NULL);
1996                 if (ret < 0) {
1997                         err = ret;
1998                         goto out;
1999                 }
2000                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2001                 nrefs->refs[*level] = refs;
2002         }
2003
2004         if (refs > 1) {
2005                 ret = enter_shared_node(root, path->nodes[*level]->start,
2006                                         refs, wc, *level);
2007                 if (ret > 0) {
2008                         err = ret;
2009                         goto out;
2010                 }
2011         }
2012
2013         while (*level >= 0) {
2014                 WARN_ON(*level < 0);
2015                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2016                 cur = path->nodes[*level];
2017
2018                 if (btrfs_header_level(cur) != *level)
2019                         WARN_ON(1);
2020
2021                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2022                         break;
2023                 if (*level == 0) {
2024                         ret = process_one_leaf(root, cur, wc);
2025                         if (ret < 0)
2026                                 err = ret;
2027                         break;
2028                 }
2029                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2030                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2031                 blocksize = root->nodesize;
2032
2033                 if (bytenr == nrefs->bytenr[*level - 1]) {
2034                         refs = nrefs->refs[*level - 1];
2035                 } else {
2036                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2037                                         *level - 1, 1, &refs, NULL);
2038                         if (ret < 0) {
2039                                 refs = 0;
2040                         } else {
2041                                 nrefs->bytenr[*level - 1] = bytenr;
2042                                 nrefs->refs[*level - 1] = refs;
2043                         }
2044                 }
2045
2046                 if (refs > 1) {
2047                         ret = enter_shared_node(root, bytenr, refs,
2048                                                 wc, *level - 1);
2049                         if (ret > 0) {
2050                                 path->slots[*level]++;
2051                                 continue;
2052                         }
2053                 }
2054
2055                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2056                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2057                         free_extent_buffer(next);
2058                         reada_walk_down(root, cur, path->slots[*level]);
2059                         next = read_tree_block(root, bytenr, blocksize,
2060                                                ptr_gen);
2061                         if (!extent_buffer_uptodate(next)) {
2062                                 struct btrfs_key node_key;
2063
2064                                 btrfs_node_key_to_cpu(path->nodes[*level],
2065                                                       &node_key,
2066                                                       path->slots[*level]);
2067                                 btrfs_add_corrupt_extent_record(root->fs_info,
2068                                                 &node_key,
2069                                                 path->nodes[*level]->start,
2070                                                 root->nodesize, *level);
2071                                 err = -EIO;
2072                                 goto out;
2073                         }
2074                 }
2075
2076                 ret = check_child_node(root, cur, path->slots[*level], next);
2077                 if (ret) {
2078                         err = ret;
2079                         goto out;
2080                 }
2081
2082                 if (btrfs_is_leaf(next))
2083                         status = btrfs_check_leaf(root, NULL, next);
2084                 else
2085                         status = btrfs_check_node(root, NULL, next);
2086                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2087                         free_extent_buffer(next);
2088                         err = -EIO;
2089                         goto out;
2090                 }
2091
2092                 *level = *level - 1;
2093                 free_extent_buffer(path->nodes[*level]);
2094                 path->nodes[*level] = next;
2095                 path->slots[*level] = 0;
2096         }
2097 out:
2098         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2099         return err;
2100 }
2101
2102 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2103                         struct walk_control *wc, int *level)
2104 {
2105         int i;
2106         struct extent_buffer *leaf;
2107
2108         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2109                 leaf = path->nodes[i];
2110                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2111                         path->slots[i]++;
2112                         *level = i;
2113                         return 0;
2114                 } else {
2115                         free_extent_buffer(path->nodes[*level]);
2116                         path->nodes[*level] = NULL;
2117                         BUG_ON(*level > wc->active_node);
2118                         if (*level == wc->active_node)
2119                                 leave_shared_node(root, wc, *level);
2120                         *level = i + 1;
2121                 }
2122         }
2123         return 1;
2124 }
2125
2126 static int check_root_dir(struct inode_record *rec)
2127 {
2128         struct inode_backref *backref;
2129         int ret = -1;
2130
2131         if (!rec->found_inode_item || rec->errors)
2132                 goto out;
2133         if (rec->nlink != 1 || rec->found_link != 0)
2134                 goto out;
2135         if (list_empty(&rec->backrefs))
2136                 goto out;
2137         backref = to_inode_backref(rec->backrefs.next);
2138         if (!backref->found_inode_ref)
2139                 goto out;
2140         if (backref->index != 0 || backref->namelen != 2 ||
2141             memcmp(backref->name, "..", 2))
2142                 goto out;
2143         if (backref->found_dir_index || backref->found_dir_item)
2144                 goto out;
2145         ret = 0;
2146 out:
2147         return ret;
2148 }
2149
2150 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2151                               struct btrfs_root *root, struct btrfs_path *path,
2152                               struct inode_record *rec)
2153 {
2154         struct btrfs_inode_item *ei;
2155         struct btrfs_key key;
2156         int ret;
2157
2158         key.objectid = rec->ino;
2159         key.type = BTRFS_INODE_ITEM_KEY;
2160         key.offset = (u64)-1;
2161
2162         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2163         if (ret < 0)
2164                 goto out;
2165         if (ret) {
2166                 if (!path->slots[0]) {
2167                         ret = -ENOENT;
2168                         goto out;
2169                 }
2170                 path->slots[0]--;
2171                 ret = 0;
2172         }
2173         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2174         if (key.objectid != rec->ino) {
2175                 ret = -ENOENT;
2176                 goto out;
2177         }
2178
2179         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2180                             struct btrfs_inode_item);
2181         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2182         btrfs_mark_buffer_dirty(path->nodes[0]);
2183         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2184         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2185                root->root_key.objectid);
2186 out:
2187         btrfs_release_path(path);
2188         return ret;
2189 }
2190
2191 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2192                                     struct btrfs_root *root,
2193                                     struct btrfs_path *path,
2194                                     struct inode_record *rec)
2195 {
2196         int ret;
2197
2198         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2199         btrfs_release_path(path);
2200         if (!ret)
2201                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2202         return ret;
2203 }
2204
2205 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2206                                struct btrfs_root *root,
2207                                struct btrfs_path *path,
2208                                struct inode_record *rec)
2209 {
2210         struct btrfs_inode_item *ei;
2211         struct btrfs_key key;
2212         int ret = 0;
2213
2214         key.objectid = rec->ino;
2215         key.type = BTRFS_INODE_ITEM_KEY;
2216         key.offset = 0;
2217
2218         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2219         if (ret) {
2220                 if (ret > 0)
2221                         ret = -ENOENT;
2222                 goto out;
2223         }
2224
2225         /* Since ret == 0, no need to check anything */
2226         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2227                             struct btrfs_inode_item);
2228         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2229         btrfs_mark_buffer_dirty(path->nodes[0]);
2230         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2231         printf("reset nbytes for ino %llu root %llu\n",
2232                rec->ino, root->root_key.objectid);
2233 out:
2234         btrfs_release_path(path);
2235         return ret;
2236 }
2237
2238 static int add_missing_dir_index(struct btrfs_root *root,
2239                                  struct cache_tree *inode_cache,
2240                                  struct inode_record *rec,
2241                                  struct inode_backref *backref)
2242 {
2243         struct btrfs_path *path;
2244         struct btrfs_trans_handle *trans;
2245         struct btrfs_dir_item *dir_item;
2246         struct extent_buffer *leaf;
2247         struct btrfs_key key;
2248         struct btrfs_disk_key disk_key;
2249         struct inode_record *dir_rec;
2250         unsigned long name_ptr;
2251         u32 data_size = sizeof(*dir_item) + backref->namelen;
2252         int ret;
2253
2254         path = btrfs_alloc_path();
2255         if (!path)
2256                 return -ENOMEM;
2257
2258         trans = btrfs_start_transaction(root, 1);
2259         if (IS_ERR(trans)) {
2260                 btrfs_free_path(path);
2261                 return PTR_ERR(trans);
2262         }
2263
2264         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2265                 (unsigned long long)rec->ino);
2266         key.objectid = backref->dir;
2267         key.type = BTRFS_DIR_INDEX_KEY;
2268         key.offset = backref->index;
2269
2270         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2271         BUG_ON(ret);
2272
2273         leaf = path->nodes[0];
2274         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2275
2276         disk_key.objectid = cpu_to_le64(rec->ino);
2277         disk_key.type = BTRFS_INODE_ITEM_KEY;
2278         disk_key.offset = 0;
2279
2280         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2281         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2282         btrfs_set_dir_data_len(leaf, dir_item, 0);
2283         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2284         name_ptr = (unsigned long)(dir_item + 1);
2285         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2286         btrfs_mark_buffer_dirty(leaf);
2287         btrfs_free_path(path);
2288         btrfs_commit_transaction(trans, root);
2289
2290         backref->found_dir_index = 1;
2291         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2292         BUG_ON(IS_ERR(dir_rec));
2293         if (!dir_rec)
2294                 return 0;
2295         dir_rec->found_size += backref->namelen;
2296         if (dir_rec->found_size == dir_rec->isize &&
2297             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2298                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2299         if (dir_rec->found_size != dir_rec->isize)
2300                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2301
2302         return 0;
2303 }
2304
2305 static int delete_dir_index(struct btrfs_root *root,
2306                             struct cache_tree *inode_cache,
2307                             struct inode_record *rec,
2308                             struct inode_backref *backref)
2309 {
2310         struct btrfs_trans_handle *trans;
2311         struct btrfs_dir_item *di;
2312         struct btrfs_path *path;
2313         int ret = 0;
2314
2315         path = btrfs_alloc_path();
2316         if (!path)
2317                 return -ENOMEM;
2318
2319         trans = btrfs_start_transaction(root, 1);
2320         if (IS_ERR(trans)) {
2321                 btrfs_free_path(path);
2322                 return PTR_ERR(trans);
2323         }
2324
2325
2326         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2327                 (unsigned long long)backref->dir,
2328                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2329                 (unsigned long long)root->objectid);
2330
2331         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2332                                     backref->name, backref->namelen,
2333                                     backref->index, -1);
2334         if (IS_ERR(di)) {
2335                 ret = PTR_ERR(di);
2336                 btrfs_free_path(path);
2337                 btrfs_commit_transaction(trans, root);
2338                 if (ret == -ENOENT)
2339                         return 0;
2340                 return ret;
2341         }
2342
2343         if (!di)
2344                 ret = btrfs_del_item(trans, root, path);
2345         else
2346                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2347         BUG_ON(ret);
2348         btrfs_free_path(path);
2349         btrfs_commit_transaction(trans, root);
2350         return ret;
2351 }
2352
2353 static int create_inode_item(struct btrfs_root *root,
2354                              struct inode_record *rec,
2355                              struct inode_backref *backref, int root_dir)
2356 {
2357         struct btrfs_trans_handle *trans;
2358         struct btrfs_inode_item inode_item;
2359         time_t now = time(NULL);
2360         int ret;
2361
2362         trans = btrfs_start_transaction(root, 1);
2363         if (IS_ERR(trans)) {
2364                 ret = PTR_ERR(trans);
2365                 return ret;
2366         }
2367
2368         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2369                 "be incomplete, please check permissions and content after "
2370                 "the fsck completes.\n", (unsigned long long)root->objectid,
2371                 (unsigned long long)rec->ino);
2372
2373         memset(&inode_item, 0, sizeof(inode_item));
2374         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2375         if (root_dir)
2376                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2377         else
2378                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2379         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2380         if (rec->found_dir_item) {
2381                 if (rec->found_file_extent)
2382                         fprintf(stderr, "root %llu inode %llu has both a dir "
2383                                 "item and extents, unsure if it is a dir or a "
2384                                 "regular file so setting it as a directory\n",
2385                                 (unsigned long long)root->objectid,
2386                                 (unsigned long long)rec->ino);
2387                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2388                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2389         } else if (!rec->found_dir_item) {
2390                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2391                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2392         }
2393         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2394         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2395         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2396         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2397         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2398         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2399         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2400         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2401
2402         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2403         BUG_ON(ret);
2404         btrfs_commit_transaction(trans, root);
2405         return 0;
2406 }
2407
2408 static int repair_inode_backrefs(struct btrfs_root *root,
2409                                  struct inode_record *rec,
2410                                  struct cache_tree *inode_cache,
2411                                  int delete)
2412 {
2413         struct inode_backref *tmp, *backref;
2414         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2415         int ret = 0;
2416         int repaired = 0;
2417
2418         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2419                 if (!delete && rec->ino == root_dirid) {
2420                         if (!rec->found_inode_item) {
2421                                 ret = create_inode_item(root, rec, backref, 1);
2422                                 if (ret)
2423                                         break;
2424                                 repaired++;
2425                         }
2426                 }
2427
2428                 /* Index 0 for root dir's are special, don't mess with it */
2429                 if (rec->ino == root_dirid && backref->index == 0)
2430                         continue;
2431
2432                 if (delete &&
2433                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2434                      (backref->found_dir_index && backref->found_inode_ref &&
2435                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2436                         ret = delete_dir_index(root, inode_cache, rec, backref);
2437                         if (ret)
2438                                 break;
2439                         repaired++;
2440                         list_del(&backref->list);
2441                         free(backref);
2442                 }
2443
2444                 if (!delete && !backref->found_dir_index &&
2445                     backref->found_dir_item && backref->found_inode_ref) {
2446                         ret = add_missing_dir_index(root, inode_cache, rec,
2447                                                     backref);
2448                         if (ret)
2449                                 break;
2450                         repaired++;
2451                         if (backref->found_dir_item &&
2452                             backref->found_dir_index &&
2453                             backref->found_dir_index) {
2454                                 if (!backref->errors &&
2455                                     backref->found_inode_ref) {
2456                                         list_del(&backref->list);
2457                                         free(backref);
2458                                 }
2459                         }
2460                 }
2461
2462                 if (!delete && (!backref->found_dir_index &&
2463                                 !backref->found_dir_item &&
2464                                 backref->found_inode_ref)) {
2465                         struct btrfs_trans_handle *trans;
2466                         struct btrfs_key location;
2467
2468                         ret = check_dir_conflict(root, backref->name,
2469                                                  backref->namelen,
2470                                                  backref->dir,
2471                                                  backref->index);
2472                         if (ret) {
2473                                 /*
2474                                  * let nlink fixing routine to handle it,
2475                                  * which can do it better.
2476                                  */
2477                                 ret = 0;
2478                                 break;
2479                         }
2480                         location.objectid = rec->ino;
2481                         location.type = BTRFS_INODE_ITEM_KEY;
2482                         location.offset = 0;
2483
2484                         trans = btrfs_start_transaction(root, 1);
2485                         if (IS_ERR(trans)) {
2486                                 ret = PTR_ERR(trans);
2487                                 break;
2488                         }
2489                         fprintf(stderr, "adding missing dir index/item pair "
2490                                 "for inode %llu\n",
2491                                 (unsigned long long)rec->ino);
2492                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2493                                                     backref->namelen,
2494                                                     backref->dir, &location,
2495                                                     imode_to_type(rec->imode),
2496                                                     backref->index);
2497                         BUG_ON(ret);
2498                         btrfs_commit_transaction(trans, root);
2499                         repaired++;
2500                 }
2501
2502                 if (!delete && (backref->found_inode_ref &&
2503                                 backref->found_dir_index &&
2504                                 backref->found_dir_item &&
2505                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2506                                 !rec->found_inode_item)) {
2507                         ret = create_inode_item(root, rec, backref, 0);
2508                         if (ret)
2509                                 break;
2510                         repaired++;
2511                 }
2512
2513         }
2514         return ret ? ret : repaired;
2515 }
2516
2517 /*
2518  * To determine the file type for nlink/inode_item repair
2519  *
2520  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2521  * Return -ENOENT if file type is not found.
2522  */
2523 static int find_file_type(struct inode_record *rec, u8 *type)
2524 {
2525         struct inode_backref *backref;
2526
2527         /* For inode item recovered case */
2528         if (rec->found_inode_item) {
2529                 *type = imode_to_type(rec->imode);
2530                 return 0;
2531         }
2532
2533         list_for_each_entry(backref, &rec->backrefs, list) {
2534                 if (backref->found_dir_index || backref->found_dir_item) {
2535                         *type = backref->filetype;
2536                         return 0;
2537                 }
2538         }
2539         return -ENOENT;
2540 }
2541
2542 /*
2543  * To determine the file name for nlink repair
2544  *
2545  * Return 0 if file name is found, set name and namelen.
2546  * Return -ENOENT if file name is not found.
2547  */
2548 static int find_file_name(struct inode_record *rec,
2549                           char *name, int *namelen)
2550 {
2551         struct inode_backref *backref;
2552
2553         list_for_each_entry(backref, &rec->backrefs, list) {
2554                 if (backref->found_dir_index || backref->found_dir_item ||
2555                     backref->found_inode_ref) {
2556                         memcpy(name, backref->name, backref->namelen);
2557                         *namelen = backref->namelen;
2558                         return 0;
2559                 }
2560         }
2561         return -ENOENT;
2562 }
2563
2564 /* Reset the nlink of the inode to the correct one */
2565 static int reset_nlink(struct btrfs_trans_handle *trans,
2566                        struct btrfs_root *root,
2567                        struct btrfs_path *path,
2568                        struct inode_record *rec)
2569 {
2570         struct inode_backref *backref;
2571         struct inode_backref *tmp;
2572         struct btrfs_key key;
2573         struct btrfs_inode_item *inode_item;
2574         int ret = 0;
2575
2576         /* We don't believe this either, reset it and iterate backref */
2577         rec->found_link = 0;
2578
2579         /* Remove all backref including the valid ones */
2580         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2581                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2582                                    backref->index, backref->name,
2583                                    backref->namelen, 0);
2584                 if (ret < 0)
2585                         goto out;
2586
2587                 /* remove invalid backref, so it won't be added back */
2588                 if (!(backref->found_dir_index &&
2589                       backref->found_dir_item &&
2590                       backref->found_inode_ref)) {
2591                         list_del(&backref->list);
2592                         free(backref);
2593                 } else {
2594                         rec->found_link++;
2595                 }
2596         }
2597
2598         /* Set nlink to 0 */
2599         key.objectid = rec->ino;
2600         key.type = BTRFS_INODE_ITEM_KEY;
2601         key.offset = 0;
2602         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2603         if (ret < 0)
2604                 goto out;
2605         if (ret > 0) {
2606                 ret = -ENOENT;
2607                 goto out;
2608         }
2609         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2610                                     struct btrfs_inode_item);
2611         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2612         btrfs_mark_buffer_dirty(path->nodes[0]);
2613         btrfs_release_path(path);
2614
2615         /*
2616          * Add back valid inode_ref/dir_item/dir_index,
2617          * add_link() will handle the nlink inc, so new nlink must be correct
2618          */
2619         list_for_each_entry(backref, &rec->backrefs, list) {
2620                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2621                                      backref->name, backref->namelen,
2622                                      backref->filetype, &backref->index, 1);
2623                 if (ret < 0)
2624                         goto out;
2625         }
2626 out:
2627         btrfs_release_path(path);
2628         return ret;
2629 }
2630
2631 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2632                                struct btrfs_root *root,
2633                                struct btrfs_path *path,
2634                                struct inode_record *rec)
2635 {
2636         char *dir_name = "lost+found";
2637         char namebuf[BTRFS_NAME_LEN] = {0};
2638         u64 lost_found_ino;
2639         u32 mode = 0700;
2640         u8 type = 0;
2641         int namelen = 0;
2642         int name_recovered = 0;
2643         int type_recovered = 0;
2644         int ret = 0;
2645
2646         /*
2647          * Get file name and type first before these invalid inode ref
2648          * are deleted by remove_all_invalid_backref()
2649          */
2650         name_recovered = !find_file_name(rec, namebuf, &namelen);
2651         type_recovered = !find_file_type(rec, &type);
2652
2653         if (!name_recovered) {
2654                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2655                        rec->ino, rec->ino);
2656                 namelen = count_digits(rec->ino);
2657                 sprintf(namebuf, "%llu", rec->ino);
2658                 name_recovered = 1;
2659         }
2660         if (!type_recovered) {
2661                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2662                        rec->ino);
2663                 type = BTRFS_FT_REG_FILE;
2664                 type_recovered = 1;
2665         }
2666
2667         ret = reset_nlink(trans, root, path, rec);
2668         if (ret < 0) {
2669                 fprintf(stderr,
2670                         "Failed to reset nlink for inode %llu: %s\n",
2671                         rec->ino, strerror(-ret));
2672                 goto out;
2673         }
2674
2675         if (rec->found_link == 0) {
2676                 lost_found_ino = root->highest_inode;
2677                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2678                         ret = -EOVERFLOW;
2679                         goto out;
2680                 }
2681                 lost_found_ino++;
2682                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2683                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2684                                   mode);
2685                 if (ret < 0) {
2686                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2687                                 dir_name, strerror(-ret));
2688                         goto out;
2689                 }
2690                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2691                                      namebuf, namelen, type, NULL, 1);
2692                 /*
2693                  * Add ".INO" suffix several times to handle case where
2694                  * "FILENAME.INO" is already taken by another file.
2695                  */
2696                 while (ret == -EEXIST) {
2697                         /*
2698                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2699                          */
2700                         if (namelen + count_digits(rec->ino) + 1 >
2701                             BTRFS_NAME_LEN) {
2702                                 ret = -EFBIG;
2703                                 goto out;
2704                         }
2705                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2706                                  ".%llu", rec->ino);
2707                         namelen += count_digits(rec->ino) + 1;
2708                         ret = btrfs_add_link(trans, root, rec->ino,
2709                                              lost_found_ino, namebuf,
2710                                              namelen, type, NULL, 1);
2711                 }
2712                 if (ret < 0) {
2713                         fprintf(stderr,
2714                                 "Failed to link the inode %llu to %s dir: %s\n",
2715                                 rec->ino, dir_name, strerror(-ret));
2716                         goto out;
2717                 }
2718                 /*
2719                  * Just increase the found_link, don't actually add the
2720                  * backref. This will make things easier and this inode
2721                  * record will be freed after the repair is done.
2722                  * So fsck will not report problem about this inode.
2723                  */
2724                 rec->found_link++;
2725                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2726                        namelen, namebuf, dir_name);
2727         }
2728         printf("Fixed the nlink of inode %llu\n", rec->ino);
2729 out:
2730         /*
2731          * Clear the flag anyway, or we will loop forever for the same inode
2732          * as it will not be removed from the bad inode list and the dead loop
2733          * happens.
2734          */
2735         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2736         btrfs_release_path(path);
2737         return ret;
2738 }
2739
2740 /*
2741  * Check if there is any normal(reg or prealloc) file extent for given
2742  * ino.
2743  * This is used to determine the file type when neither its dir_index/item or
2744  * inode_item exists.
2745  *
2746  * This will *NOT* report error, if any error happens, just consider it does
2747  * not have any normal file extent.
2748  */
2749 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2750 {
2751         struct btrfs_path *path;
2752         struct btrfs_key key;
2753         struct btrfs_key found_key;
2754         struct btrfs_file_extent_item *fi;
2755         u8 type;
2756         int ret = 0;
2757
2758         path = btrfs_alloc_path();
2759         if (!path)
2760                 goto out;
2761         key.objectid = ino;
2762         key.type = BTRFS_EXTENT_DATA_KEY;
2763         key.offset = 0;
2764
2765         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2766         if (ret < 0) {
2767                 ret = 0;
2768                 goto out;
2769         }
2770         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2771                 ret = btrfs_next_leaf(root, path);
2772                 if (ret) {
2773                         ret = 0;
2774                         goto out;
2775                 }
2776         }
2777         while (1) {
2778                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2779                                       path->slots[0]);
2780                 if (found_key.objectid != ino ||
2781                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2782                         break;
2783                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2784                                     struct btrfs_file_extent_item);
2785                 type = btrfs_file_extent_type(path->nodes[0], fi);
2786                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2787                         ret = 1;
2788                         goto out;
2789                 }
2790         }
2791 out:
2792         btrfs_free_path(path);
2793         return ret;
2794 }
2795
2796 static u32 btrfs_type_to_imode(u8 type)
2797 {
2798         static u32 imode_by_btrfs_type[] = {
2799                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2800                 [BTRFS_FT_DIR]          = S_IFDIR,
2801                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2802                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2803                 [BTRFS_FT_FIFO]         = S_IFIFO,
2804                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2805                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2806         };
2807
2808         return imode_by_btrfs_type[(type)];
2809 }
2810
2811 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2812                                 struct btrfs_root *root,
2813                                 struct btrfs_path *path,
2814                                 struct inode_record *rec)
2815 {
2816         u8 filetype;
2817         u32 mode = 0700;
2818         int type_recovered = 0;
2819         int ret = 0;
2820
2821         printf("Trying to rebuild inode:%llu\n", rec->ino);
2822
2823         type_recovered = !find_file_type(rec, &filetype);
2824
2825         /*
2826          * Try to determine inode type if type not found.
2827          *
2828          * For found regular file extent, it must be FILE.
2829          * For found dir_item/index, it must be DIR.
2830          *
2831          * For undetermined one, use FILE as fallback.
2832          *
2833          * TODO:
2834          * 1. If found backref(inode_index/item is already handled) to it,
2835          *    it must be DIR.
2836          *    Need new inode-inode ref structure to allow search for that.
2837          */
2838         if (!type_recovered) {
2839                 if (rec->found_file_extent &&
2840                     find_normal_file_extent(root, rec->ino)) {
2841                         type_recovered = 1;
2842                         filetype = BTRFS_FT_REG_FILE;
2843                 } else if (rec->found_dir_item) {
2844                         type_recovered = 1;
2845                         filetype = BTRFS_FT_DIR;
2846                 } else if (!list_empty(&rec->orphan_extents)) {
2847                         type_recovered = 1;
2848                         filetype = BTRFS_FT_REG_FILE;
2849                 } else{
2850                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2851                                rec->ino);
2852                         type_recovered = 1;
2853                         filetype = BTRFS_FT_REG_FILE;
2854                 }
2855         }
2856
2857         ret = btrfs_new_inode(trans, root, rec->ino,
2858                               mode | btrfs_type_to_imode(filetype));
2859         if (ret < 0)
2860                 goto out;
2861
2862         /*
2863          * Here inode rebuild is done, we only rebuild the inode item,
2864          * don't repair the nlink(like move to lost+found).
2865          * That is the job of nlink repair.
2866          *
2867          * We just fill the record and return
2868          */
2869         rec->found_dir_item = 1;
2870         rec->imode = mode | btrfs_type_to_imode(filetype);
2871         rec->nlink = 0;
2872         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2873         /* Ensure the inode_nlinks repair function will be called */
2874         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2875 out:
2876         return ret;
2877 }
2878
2879 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2880                                       struct btrfs_root *root,
2881                                       struct btrfs_path *path,
2882                                       struct inode_record *rec)
2883 {
2884         struct orphan_data_extent *orphan;
2885         struct orphan_data_extent *tmp;
2886         int ret = 0;
2887
2888         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2889                 /*
2890                  * Check for conflicting file extents
2891                  *
2892                  * Here we don't know whether the extents is compressed or not,
2893                  * so we can only assume it not compressed nor data offset,
2894                  * and use its disk_len as extent length.
2895                  */
2896                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2897                                        orphan->offset, orphan->disk_len, 0);
2898                 btrfs_release_path(path);
2899                 if (ret < 0)
2900                         goto out;
2901                 if (!ret) {
2902                         fprintf(stderr,
2903                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2904                                 orphan->disk_bytenr, orphan->disk_len);
2905                         ret = btrfs_free_extent(trans,
2906                                         root->fs_info->extent_root,
2907                                         orphan->disk_bytenr, orphan->disk_len,
2908                                         0, root->objectid, orphan->objectid,
2909                                         orphan->offset);
2910                         if (ret < 0)
2911                                 goto out;
2912                 }
2913                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2914                                 orphan->offset, orphan->disk_bytenr,
2915                                 orphan->disk_len, orphan->disk_len);
2916                 if (ret < 0)
2917                         goto out;
2918
2919                 /* Update file size info */
2920                 rec->found_size += orphan->disk_len;
2921                 if (rec->found_size == rec->nbytes)
2922                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2923
2924                 /* Update the file extent hole info too */
2925                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2926                                            orphan->disk_len);
2927                 if (ret < 0)
2928                         goto out;
2929                 if (RB_EMPTY_ROOT(&rec->holes))
2930                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2931
2932                 list_del(&orphan->list);
2933                 free(orphan);
2934         }
2935         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2936 out:
2937         return ret;
2938 }
2939
2940 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2941                                         struct btrfs_root *root,
2942                                         struct btrfs_path *path,
2943                                         struct inode_record *rec)
2944 {
2945         struct rb_node *node;
2946         struct file_extent_hole *hole;
2947         int found = 0;
2948         int ret = 0;
2949
2950         node = rb_first(&rec->holes);
2951
2952         while (node) {
2953                 found = 1;
2954                 hole = rb_entry(node, struct file_extent_hole, node);
2955                 ret = btrfs_punch_hole(trans, root, rec->ino,
2956                                        hole->start, hole->len);
2957                 if (ret < 0)
2958                         goto out;
2959                 ret = del_file_extent_hole(&rec->holes, hole->start,
2960                                            hole->len);
2961                 if (ret < 0)
2962                         goto out;
2963                 if (RB_EMPTY_ROOT(&rec->holes))
2964                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2965                 node = rb_first(&rec->holes);
2966         }
2967         /* special case for a file losing all its file extent */
2968         if (!found) {
2969                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2970                                        round_up(rec->isize, root->sectorsize));
2971                 if (ret < 0)
2972                         goto out;
2973         }
2974         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2975                rec->ino, root->objectid);
2976 out:
2977         return ret;
2978 }
2979
2980 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2981 {
2982         struct btrfs_trans_handle *trans;
2983         struct btrfs_path *path;
2984         int ret = 0;
2985
2986         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2987                              I_ERR_NO_ORPHAN_ITEM |
2988                              I_ERR_LINK_COUNT_WRONG |
2989                              I_ERR_NO_INODE_ITEM |
2990                              I_ERR_FILE_EXTENT_ORPHAN |
2991                              I_ERR_FILE_EXTENT_DISCOUNT|
2992                              I_ERR_FILE_NBYTES_WRONG)))
2993                 return rec->errors;
2994
2995         path = btrfs_alloc_path();
2996         if (!path)
2997                 return -ENOMEM;
2998
2999         /*
3000          * For nlink repair, it may create a dir and add link, so
3001          * 2 for parent(256)'s dir_index and dir_item
3002          * 2 for lost+found dir's inode_item and inode_ref
3003          * 1 for the new inode_ref of the file
3004          * 2 for lost+found dir's dir_index and dir_item for the file
3005          */
3006         trans = btrfs_start_transaction(root, 7);
3007         if (IS_ERR(trans)) {
3008                 btrfs_free_path(path);
3009                 return PTR_ERR(trans);
3010         }
3011
3012         if (rec->errors & I_ERR_NO_INODE_ITEM)
3013                 ret = repair_inode_no_item(trans, root, path, rec);
3014         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3015                 ret = repair_inode_orphan_extent(trans, root, path, rec);
3016         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3017                 ret = repair_inode_discount_extent(trans, root, path, rec);
3018         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3019                 ret = repair_inode_isize(trans, root, path, rec);
3020         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3021                 ret = repair_inode_orphan_item(trans, root, path, rec);
3022         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3023                 ret = repair_inode_nlinks(trans, root, path, rec);
3024         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3025                 ret = repair_inode_nbytes(trans, root, path, rec);
3026         btrfs_commit_transaction(trans, root);
3027         btrfs_free_path(path);
3028         return ret;
3029 }
3030
3031 static int check_inode_recs(struct btrfs_root *root,
3032                             struct cache_tree *inode_cache)
3033 {
3034         struct cache_extent *cache;
3035         struct ptr_node *node;
3036         struct inode_record *rec;
3037         struct inode_backref *backref;
3038         int stage = 0;
3039         int ret = 0;
3040         int err = 0;
3041         u64 error = 0;
3042         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3043
3044         if (btrfs_root_refs(&root->root_item) == 0) {
3045                 if (!cache_tree_empty(inode_cache))
3046                         fprintf(stderr, "warning line %d\n", __LINE__);
3047                 return 0;
3048         }
3049
3050         /*
3051          * We need to record the highest inode number for later 'lost+found'
3052          * dir creation.
3053          * We must select an ino not used/referred by any existing inode, or
3054          * 'lost+found' ino may be a missing ino in a corrupted leaf,
3055          * this may cause 'lost+found' dir has wrong nlinks.
3056          */
3057         cache = last_cache_extent(inode_cache);
3058         if (cache) {
3059                 node = container_of(cache, struct ptr_node, cache);
3060                 rec = node->data;
3061                 if (rec->ino > root->highest_inode)
3062                         root->highest_inode = rec->ino;
3063         }
3064
3065         /*
3066          * We need to repair backrefs first because we could change some of the
3067          * errors in the inode recs.
3068          *
3069          * We also need to go through and delete invalid backrefs first and then
3070          * add the correct ones second.  We do this because we may get EEXIST
3071          * when adding back the correct index because we hadn't yet deleted the
3072          * invalid index.
3073          *
3074          * For example, if we were missing a dir index then the directories
3075          * isize would be wrong, so if we fixed the isize to what we thought it
3076          * would be and then fixed the backref we'd still have a invalid fs, so
3077          * we need to add back the dir index and then check to see if the isize
3078          * is still wrong.
3079          */
3080         while (stage < 3) {
3081                 stage++;
3082                 if (stage == 3 && !err)
3083                         break;
3084
3085                 cache = search_cache_extent(inode_cache, 0);
3086                 while (repair && cache) {
3087                         node = container_of(cache, struct ptr_node, cache);
3088                         rec = node->data;
3089                         cache = next_cache_extent(cache);
3090
3091                         /* Need to free everything up and rescan */
3092                         if (stage == 3) {
3093                                 remove_cache_extent(inode_cache, &node->cache);
3094                                 free(node);
3095                                 free_inode_rec(rec);
3096                                 continue;
3097                         }
3098
3099                         if (list_empty(&rec->backrefs))
3100                                 continue;
3101
3102                         ret = repair_inode_backrefs(root, rec, inode_cache,
3103                                                     stage == 1);
3104                         if (ret < 0) {
3105                                 err = ret;
3106                                 stage = 2;
3107                                 break;
3108                         } if (ret > 0) {
3109                                 err = -EAGAIN;
3110                         }
3111                 }
3112         }
3113         if (err)
3114                 return err;
3115
3116         rec = get_inode_rec(inode_cache, root_dirid, 0);
3117         BUG_ON(IS_ERR(rec));
3118         if (rec) {
3119                 ret = check_root_dir(rec);
3120                 if (ret) {
3121                         fprintf(stderr, "root %llu root dir %llu error\n",
3122                                 (unsigned long long)root->root_key.objectid,
3123                                 (unsigned long long)root_dirid);
3124                         print_inode_error(root, rec);
3125                         error++;
3126                 }
3127         } else {
3128                 if (repair) {
3129                         struct btrfs_trans_handle *trans;
3130
3131                         trans = btrfs_start_transaction(root, 1);
3132                         if (IS_ERR(trans)) {
3133                                 err = PTR_ERR(trans);
3134                                 return err;
3135                         }
3136
3137                         fprintf(stderr,
3138                                 "root %llu missing its root dir, recreating\n",
3139                                 (unsigned long long)root->objectid);
3140
3141                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3142                         BUG_ON(ret);
3143
3144                         btrfs_commit_transaction(trans, root);
3145                         return -EAGAIN;
3146                 }
3147
3148                 fprintf(stderr, "root %llu root dir %llu not found\n",
3149                         (unsigned long long)root->root_key.objectid,
3150                         (unsigned long long)root_dirid);
3151         }
3152
3153         while (1) {
3154                 cache = search_cache_extent(inode_cache, 0);
3155                 if (!cache)
3156                         break;
3157                 node = container_of(cache, struct ptr_node, cache);
3158                 rec = node->data;
3159                 remove_cache_extent(inode_cache, &node->cache);
3160                 free(node);
3161                 if (rec->ino == root_dirid ||
3162                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3163                         free_inode_rec(rec);
3164                         continue;
3165                 }
3166
3167                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3168                         ret = check_orphan_item(root, rec->ino);
3169                         if (ret == 0)
3170                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3171                         if (can_free_inode_rec(rec)) {
3172                                 free_inode_rec(rec);
3173                                 continue;
3174                         }
3175                 }
3176
3177                 if (!rec->found_inode_item)
3178                         rec->errors |= I_ERR_NO_INODE_ITEM;
3179                 if (rec->found_link != rec->nlink)
3180                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3181                 if (repair) {
3182                         ret = try_repair_inode(root, rec);
3183                         if (ret == 0 && can_free_inode_rec(rec)) {
3184                                 free_inode_rec(rec);
3185                                 continue;
3186                         }
3187                         ret = 0;
3188                 }
3189
3190                 if (!(repair && ret == 0))
3191                         error++;
3192                 print_inode_error(root, rec);
3193                 list_for_each_entry(backref, &rec->backrefs, list) {
3194                         if (!backref->found_dir_item)
3195                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3196                         if (!backref->found_dir_index)
3197                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3198                         if (!backref->found_inode_ref)
3199                                 backref->errors |= REF_ERR_NO_INODE_REF;
3200                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3201                                 " namelen %u name %s filetype %d errors %x",
3202                                 (unsigned long long)backref->dir,
3203                                 (unsigned long long)backref->index,
3204                                 backref->namelen, backref->name,
3205                                 backref->filetype, backref->errors);
3206                         print_ref_error(backref->errors);
3207                 }
3208                 free_inode_rec(rec);
3209         }
3210         return (error > 0) ? -1 : 0;
3211 }
3212
3213 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3214                                         u64 objectid)
3215 {
3216         struct cache_extent *cache;
3217         struct root_record *rec = NULL;
3218         int ret;
3219
3220         cache = lookup_cache_extent(root_cache, objectid, 1);
3221         if (cache) {
3222                 rec = container_of(cache, struct root_record, cache);
3223         } else {
3224                 rec = calloc(1, sizeof(*rec));
3225                 if (!rec)
3226                         return ERR_PTR(-ENOMEM);
3227                 rec->objectid = objectid;
3228                 INIT_LIST_HEAD(&rec->backrefs);
3229                 rec->cache.start = objectid;
3230                 rec->cache.size = 1;
3231
3232                 ret = insert_cache_extent(root_cache, &rec->cache);
3233                 if (ret)
3234                         return ERR_PTR(-EEXIST);
3235         }
3236         return rec;
3237 }
3238
3239 static struct root_backref *get_root_backref(struct root_record *rec,
3240                                              u64 ref_root, u64 dir, u64 index,
3241                                              const char *name, int namelen)
3242 {
3243         struct root_backref *backref;
3244
3245         list_for_each_entry(backref, &rec->backrefs, list) {
3246                 if (backref->ref_root != ref_root || backref->dir != dir ||
3247                     backref->namelen != namelen)
3248                         continue;
3249                 if (memcmp(name, backref->name, namelen))
3250                         continue;
3251                 return backref;
3252         }
3253
3254         backref = calloc(1, sizeof(*backref) + namelen + 1);
3255         if (!backref)
3256                 return NULL;
3257         backref->ref_root = ref_root;
3258         backref->dir = dir;
3259         backref->index = index;
3260         backref->namelen = namelen;
3261         memcpy(backref->name, name, namelen);
3262         backref->name[namelen] = '\0';
3263         list_add_tail(&backref->list, &rec->backrefs);
3264         return backref;
3265 }
3266
3267 static void free_root_record(struct cache_extent *cache)
3268 {
3269         struct root_record *rec;
3270         struct root_backref *backref;
3271
3272         rec = container_of(cache, struct root_record, cache);
3273         while (!list_empty(&rec->backrefs)) {
3274                 backref = to_root_backref(rec->backrefs.next);
3275                 list_del(&backref->list);
3276                 free(backref);
3277         }
3278
3279         kfree(rec);
3280 }
3281
3282 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3283
3284 static int add_root_backref(struct cache_tree *root_cache,
3285                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3286                             const char *name, int namelen,
3287                             int item_type, int errors)
3288 {
3289         struct root_record *rec;
3290         struct root_backref *backref;
3291
3292         rec = get_root_rec(root_cache, root_id);
3293         BUG_ON(IS_ERR(rec));
3294         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3295         BUG_ON(!backref);
3296
3297         backref->errors |= errors;
3298
3299         if (item_type != BTRFS_DIR_ITEM_KEY) {
3300                 if (backref->found_dir_index || backref->found_back_ref ||
3301                     backref->found_forward_ref) {
3302                         if (backref->index != index)
3303                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3304                 } else {
3305                         backref->index = index;
3306                 }
3307         }
3308
3309         if (item_type == BTRFS_DIR_ITEM_KEY) {
3310                 if (backref->found_forward_ref)
3311                         rec->found_ref++;
3312                 backref->found_dir_item = 1;
3313         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3314                 backref->found_dir_index = 1;
3315         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3316                 if (backref->found_forward_ref)
3317                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3318                 else if (backref->found_dir_item)
3319                         rec->found_ref++;
3320                 backref->found_forward_ref = 1;
3321         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3322                 if (backref->found_back_ref)
3323                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3324                 backref->found_back_ref = 1;
3325         } else {
3326                 BUG_ON(1);
3327         }
3328
3329         if (backref->found_forward_ref && backref->found_dir_item)
3330                 backref->reachable = 1;
3331         return 0;
3332 }
3333
3334 static int merge_root_recs(struct btrfs_root *root,
3335                            struct cache_tree *src_cache,
3336                            struct cache_tree *dst_cache)
3337 {
3338         struct cache_extent *cache;
3339         struct ptr_node *node;
3340         struct inode_record *rec;
3341         struct inode_backref *backref;
3342         int ret = 0;
3343
3344         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3345                 free_inode_recs_tree(src_cache);
3346                 return 0;
3347         }
3348
3349         while (1) {
3350                 cache = search_cache_extent(src_cache, 0);
3351                 if (!cache)
3352                         break;
3353                 node = container_of(cache, struct ptr_node, cache);
3354                 rec = node->data;
3355                 remove_cache_extent(src_cache, &node->cache);
3356                 free(node);
3357
3358                 ret = is_child_root(root, root->objectid, rec->ino);
3359                 if (ret < 0)
3360                         break;
3361                 else if (ret == 0)
3362                         goto skip;
3363
3364                 list_for_each_entry(backref, &rec->backrefs, list) {
3365                         BUG_ON(backref->found_inode_ref);
3366                         if (backref->found_dir_item)
3367                                 add_root_backref(dst_cache, rec->ino,
3368                                         root->root_key.objectid, backref->dir,
3369                                         backref->index, backref->name,
3370                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3371                                         backref->errors);
3372                         if (backref->found_dir_index)
3373                                 add_root_backref(dst_cache, rec->ino,
3374                                         root->root_key.objectid, backref->dir,
3375                                         backref->index, backref->name,
3376                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3377                                         backref->errors);
3378                 }
3379 skip:
3380                 free_inode_rec(rec);
3381         }
3382         if (ret < 0)
3383                 return ret;
3384         return 0;
3385 }
3386
3387 static int check_root_refs(struct btrfs_root *root,
3388                            struct cache_tree *root_cache)
3389 {
3390         struct root_record *rec;
3391         struct root_record *ref_root;
3392         struct root_backref *backref;
3393         struct cache_extent *cache;
3394         int loop = 1;
3395         int ret;
3396         int error;
3397         int errors = 0;
3398
3399         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3400         BUG_ON(IS_ERR(rec));
3401         rec->found_ref = 1;
3402
3403         /* fixme: this can not detect circular references */
3404         while (loop) {
3405                 loop = 0;
3406                 cache = search_cache_extent(root_cache, 0);
3407                 while (1) {
3408                         if (!cache)
3409                                 break;
3410                         rec = container_of(cache, struct root_record, cache);
3411                         cache = next_cache_extent(cache);
3412
3413                         if (rec->found_ref == 0)
3414                                 continue;
3415
3416                         list_for_each_entry(backref, &rec->backrefs, list) {
3417                                 if (!backref->reachable)
3418                                         continue;
3419
3420                                 ref_root = get_root_rec(root_cache,
3421                                                         backref->ref_root);
3422                                 BUG_ON(IS_ERR(ref_root));
3423                                 if (ref_root->found_ref > 0)
3424                                         continue;
3425
3426                                 backref->reachable = 0;
3427                                 rec->found_ref--;
3428                                 if (rec->found_ref == 0)
3429                                         loop = 1;
3430                         }
3431                 }
3432         }
3433
3434         cache = search_cache_extent(root_cache, 0);
3435         while (1) {
3436                 if (!cache)
3437                         break;
3438                 rec = container_of(cache, struct root_record, cache);
3439                 cache = next_cache_extent(cache);
3440
3441                 if (rec->found_ref == 0 &&
3442                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3443                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3444                         ret = check_orphan_item(root->fs_info->tree_root,
3445                                                 rec->objectid);
3446                         if (ret == 0)
3447                                 continue;
3448
3449                         /*
3450                          * If we don't have a root item then we likely just have
3451                          * a dir item in a snapshot for this root but no actual
3452                          * ref key or anything so it's meaningless.
3453                          */
3454                         if (!rec->found_root_item)
3455                                 continue;
3456                         errors++;
3457                         fprintf(stderr, "fs tree %llu not referenced\n",
3458                                 (unsigned long long)rec->objectid);
3459                 }
3460
3461                 error = 0;
3462                 if (rec->found_ref > 0 && !rec->found_root_item)
3463                         error = 1;
3464                 list_for_each_entry(backref, &rec->backrefs, list) {
3465                         if (!backref->found_dir_item)
3466                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3467                         if (!backref->found_dir_index)
3468                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3469                         if (!backref->found_back_ref)
3470                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3471                         if (!backref->found_forward_ref)
3472                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3473                         if (backref->reachable && backref->errors)
3474                                 error = 1;
3475                 }
3476                 if (!error)
3477                         continue;
3478
3479                 errors++;
3480                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3481                         (unsigned long long)rec->objectid, rec->found_ref,
3482                          rec->found_root_item ? "" : "not found");
3483
3484                 list_for_each_entry(backref, &rec->backrefs, list) {
3485                         if (!backref->reachable)
3486                                 continue;
3487                         if (!backref->errors && rec->found_root_item)
3488                                 continue;
3489                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3490                                 " index %llu namelen %u name %s errors %x\n",
3491                                 (unsigned long long)backref->ref_root,
3492                                 (unsigned long long)backref->dir,
3493                                 (unsigned long long)backref->index,
3494                                 backref->namelen, backref->name,
3495                                 backref->errors);
3496                         print_ref_error(backref->errors);
3497                 }
3498         }
3499         return errors > 0 ? 1 : 0;
3500 }
3501
3502 static int process_root_ref(struct extent_buffer *eb, int slot,
3503                             struct btrfs_key *key,
3504                             struct cache_tree *root_cache)
3505 {
3506         u64 dirid;
3507         u64 index;
3508         u32 len;
3509         u32 name_len;
3510         struct btrfs_root_ref *ref;
3511         char namebuf[BTRFS_NAME_LEN];
3512         int error;
3513
3514         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3515
3516         dirid = btrfs_root_ref_dirid(eb, ref);
3517         index = btrfs_root_ref_sequence(eb, ref);
3518         name_len = btrfs_root_ref_name_len(eb, ref);
3519
3520         if (name_len <= BTRFS_NAME_LEN) {
3521                 len = name_len;
3522                 error = 0;
3523         } else {
3524                 len = BTRFS_NAME_LEN;
3525                 error = REF_ERR_NAME_TOO_LONG;
3526         }
3527         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3528
3529         if (key->type == BTRFS_ROOT_REF_KEY) {
3530                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3531                                  index, namebuf, len, key->type, error);
3532         } else {
3533                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3534                                  index, namebuf, len, key->type, error);
3535         }
3536         return 0;
3537 }
3538
3539 static void free_corrupt_block(struct cache_extent *cache)
3540 {
3541         struct btrfs_corrupt_block *corrupt;
3542
3543         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3544         free(corrupt);
3545 }
3546
3547 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3548
3549 /*
3550  * Repair the btree of the given root.
3551  *
3552  * The fix is to remove the node key in corrupt_blocks cache_tree.
3553  * and rebalance the tree.
3554  * After the fix, the btree should be writeable.
3555  */
3556 static int repair_btree(struct btrfs_root *root,
3557                         struct cache_tree *corrupt_blocks)
3558 {
3559         struct btrfs_trans_handle *trans;
3560         struct btrfs_path *path;
3561         struct btrfs_corrupt_block *corrupt;
3562         struct cache_extent *cache;
3563         struct btrfs_key key;
3564         u64 offset;
3565         int level;
3566         int ret = 0;
3567
3568         if (cache_tree_empty(corrupt_blocks))
3569                 return 0;
3570
3571         path = btrfs_alloc_path();
3572         if (!path)
3573                 return -ENOMEM;
3574
3575         trans = btrfs_start_transaction(root, 1);
3576         if (IS_ERR(trans)) {
3577                 ret = PTR_ERR(trans);
3578                 fprintf(stderr, "Error starting transaction: %s\n",
3579                         strerror(-ret));
3580                 goto out_free_path;
3581         }
3582         cache = first_cache_extent(corrupt_blocks);
3583         while (cache) {
3584                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3585                                        cache);
3586                 level = corrupt->level;
3587                 path->lowest_level = level;
3588                 key.objectid = corrupt->key.objectid;
3589                 key.type = corrupt->key.type;
3590                 key.offset = corrupt->key.offset;
3591
3592                 /*
3593                  * Here we don't want to do any tree balance, since it may
3594                  * cause a balance with corrupted brother leaf/node,
3595                  * so ins_len set to 0 here.
3596                  * Balance will be done after all corrupt node/leaf is deleted.
3597                  */
3598                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3599                 if (ret < 0)
3600                         goto out;
3601                 offset = btrfs_node_blockptr(path->nodes[level],
3602                                              path->slots[level]);
3603
3604                 /* Remove the ptr */
3605                 ret = btrfs_del_ptr(trans, root, path, level,
3606                                     path->slots[level]);
3607                 if (ret < 0)
3608                         goto out;
3609                 /*
3610                  * Remove the corresponding extent
3611                  * return value is not concerned.
3612                  */
3613                 btrfs_release_path(path);
3614                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3615                                         0, root->root_key.objectid,
3616                                         level - 1, 0);
3617                 cache = next_cache_extent(cache);
3618         }
3619
3620         /* Balance the btree using btrfs_search_slot() */
3621         cache = first_cache_extent(corrupt_blocks);
3622         while (cache) {
3623                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3624                                        cache);
3625                 memcpy(&key, &corrupt->key, sizeof(key));
3626                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3627                 if (ret < 0)
3628                         goto out;
3629                 /* return will always >0 since it won't find the item */
3630                 ret = 0;
3631                 btrfs_release_path(path);
3632                 cache = next_cache_extent(cache);
3633         }
3634 out:
3635         btrfs_commit_transaction(trans, root);
3636 out_free_path:
3637         btrfs_free_path(path);
3638         return ret;
3639 }
3640
3641 static int check_fs_root(struct btrfs_root *root,
3642                          struct cache_tree *root_cache,
3643                          struct walk_control *wc)
3644 {
3645         int ret = 0;
3646         int err = 0;
3647         int wret;
3648         int level;
3649         struct btrfs_path path;
3650         struct shared_node root_node;
3651         struct root_record *rec;
3652         struct btrfs_root_item *root_item = &root->root_item;
3653         struct cache_tree corrupt_blocks;
3654         struct orphan_data_extent *orphan;
3655         struct orphan_data_extent *tmp;
3656         enum btrfs_tree_block_status status;
3657         struct node_refs nrefs;
3658
3659         /*
3660          * Reuse the corrupt_block cache tree to record corrupted tree block
3661          *
3662          * Unlike the usage in extent tree check, here we do it in a per
3663          * fs/subvol tree base.
3664          */
3665         cache_tree_init(&corrupt_blocks);
3666         root->fs_info->corrupt_blocks = &corrupt_blocks;
3667
3668         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3669                 rec = get_root_rec(root_cache, root->root_key.objectid);
3670                 BUG_ON(IS_ERR(rec));
3671                 if (btrfs_root_refs(root_item) > 0)
3672                         rec->found_root_item = 1;
3673         }
3674
3675         btrfs_init_path(&path);
3676         memset(&root_node, 0, sizeof(root_node));
3677         cache_tree_init(&root_node.root_cache);
3678         cache_tree_init(&root_node.inode_cache);
3679         memset(&nrefs, 0, sizeof(nrefs));
3680
3681         /* Move the orphan extent record to corresponding inode_record */
3682         list_for_each_entry_safe(orphan, tmp,
3683                                  &root->orphan_data_extents, list) {
3684                 struct inode_record *inode;
3685
3686                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3687                                       1);
3688                 BUG_ON(IS_ERR(inode));
3689                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3690                 list_move(&orphan->list, &inode->orphan_extents);
3691         }
3692
3693         level = btrfs_header_level(root->node);
3694         memset(wc->nodes, 0, sizeof(wc->nodes));
3695         wc->nodes[level] = &root_node;
3696         wc->active_node = level;
3697         wc->root_level = level;
3698
3699         /* We may not have checked the root block, lets do that now */
3700         if (btrfs_is_leaf(root->node))
3701                 status = btrfs_check_leaf(root, NULL, root->node);
3702         else
3703                 status = btrfs_check_node(root, NULL, root->node);
3704         if (status != BTRFS_TREE_BLOCK_CLEAN)
3705                 return -EIO;
3706
3707         if (btrfs_root_refs(root_item) > 0 ||
3708             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3709                 path.nodes[level] = root->node;
3710                 extent_buffer_get(root->node);
3711                 path.slots[level] = 0;
3712         } else {
3713                 struct btrfs_key key;
3714                 struct btrfs_disk_key found_key;
3715
3716                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3717                 level = root_item->drop_level;
3718                 path.lowest_level = level;
3719                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3720                 if (wret < 0)
3721                         goto skip_walking;
3722                 btrfs_node_key(path.nodes[level], &found_key,
3723                                 path.slots[level]);
3724                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3725                                         sizeof(found_key)));
3726         }
3727
3728         while (1) {
3729                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3730                 if (wret < 0)
3731                         ret = wret;
3732                 if (wret != 0)
3733                         break;
3734
3735                 wret = walk_up_tree(root, &path, wc, &level);
3736                 if (wret < 0)
3737                         ret = wret;
3738                 if (wret != 0)
3739                         break;
3740         }
3741 skip_walking:
3742         btrfs_release_path(&path);
3743
3744         if (!cache_tree_empty(&corrupt_blocks)) {
3745                 struct cache_extent *cache;
3746                 struct btrfs_corrupt_block *corrupt;
3747
3748                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3749                        root->root_key.objectid);
3750                 cache = first_cache_extent(&corrupt_blocks);
3751                 while (cache) {
3752                         corrupt = container_of(cache,
3753                                                struct btrfs_corrupt_block,
3754                                                cache);
3755                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3756                                cache->start, corrupt->level,
3757                                corrupt->key.objectid, corrupt->key.type,
3758                                corrupt->key.offset);
3759                         cache = next_cache_extent(cache);
3760                 }
3761                 if (repair) {
3762                         printf("Try to repair the btree for root %llu\n",
3763                                root->root_key.objectid);
3764                         ret = repair_btree(root, &corrupt_blocks);
3765                         if (ret < 0)
3766                                 fprintf(stderr, "Failed to repair btree: %s\n",
3767                                         strerror(-ret));
3768                         if (!ret)
3769                                 printf("Btree for root %llu is fixed\n",
3770                                        root->root_key.objectid);
3771                 }
3772         }
3773
3774         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3775         if (err < 0)
3776                 ret = err;
3777
3778         if (root_node.current) {
3779                 root_node.current->checked = 1;
3780                 maybe_free_inode_rec(&root_node.inode_cache,
3781                                 root_node.current);
3782         }
3783
3784         err = check_inode_recs(root, &root_node.inode_cache);
3785         if (!ret)
3786                 ret = err;
3787
3788         free_corrupt_blocks_tree(&corrupt_blocks);
3789         root->fs_info->corrupt_blocks = NULL;
3790         free_orphan_data_extents(&root->orphan_data_extents);
3791         return ret;
3792 }
3793
3794 static int fs_root_objectid(u64 objectid)
3795 {
3796         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3797             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3798                 return 1;
3799         return is_fstree(objectid);
3800 }
3801
3802 static int check_fs_roots(struct btrfs_root *root,
3803                           struct cache_tree *root_cache)
3804 {
3805         struct btrfs_path path;
3806         struct btrfs_key key;
3807         struct walk_control wc;
3808         struct extent_buffer *leaf, *tree_node;
3809         struct btrfs_root *tmp_root;
3810         struct btrfs_root *tree_root = root->fs_info->tree_root;
3811         int ret;
3812         int err = 0;
3813
3814         if (ctx.progress_enabled) {
3815                 ctx.tp = TASK_FS_ROOTS;
3816                 task_start(ctx.info);
3817         }
3818
3819         /*
3820          * Just in case we made any changes to the extent tree that weren't
3821          * reflected into the free space cache yet.
3822          */
3823         if (repair)
3824                 reset_cached_block_groups(root->fs_info);
3825         memset(&wc, 0, sizeof(wc));
3826         cache_tree_init(&wc.shared);
3827         btrfs_init_path(&path);
3828
3829 again:
3830         key.offset = 0;
3831         key.objectid = 0;
3832         key.type = BTRFS_ROOT_ITEM_KEY;
3833         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3834         if (ret < 0) {
3835                 err = 1;
3836                 goto out;
3837         }
3838         tree_node = tree_root->node;
3839         while (1) {
3840                 if (tree_node != tree_root->node) {
3841                         free_root_recs_tree(root_cache);
3842                         btrfs_release_path(&path);
3843                         goto again;
3844                 }
3845                 leaf = path.nodes[0];
3846                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3847                         ret = btrfs_next_leaf(tree_root, &path);
3848                         if (ret) {
3849                                 if (ret < 0)
3850                                         err = 1;
3851                                 break;
3852                         }
3853                         leaf = path.nodes[0];
3854                 }
3855                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3856                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3857                     fs_root_objectid(key.objectid)) {
3858                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3859                                 tmp_root = btrfs_read_fs_root_no_cache(
3860                                                 root->fs_info, &key);
3861                         } else {
3862                                 key.offset = (u64)-1;
3863                                 tmp_root = btrfs_read_fs_root(
3864                                                 root->fs_info, &key);
3865                         }
3866                         if (IS_ERR(tmp_root)) {
3867                                 err = 1;
3868                                 goto next;
3869                         }
3870                         ret = check_fs_root(tmp_root, root_cache, &wc);
3871                         if (ret == -EAGAIN) {
3872                                 free_root_recs_tree(root_cache);
3873                                 btrfs_release_path(&path);
3874                                 goto again;
3875                         }
3876                         if (ret)
3877                                 err = 1;
3878                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3879                                 btrfs_free_fs_root(tmp_root);
3880                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3881                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3882                         process_root_ref(leaf, path.slots[0], &key,
3883                                          root_cache);
3884                 }
3885 next:
3886                 path.slots[0]++;
3887         }
3888 out:
3889         btrfs_release_path(&path);
3890         if (err)
3891                 free_extent_cache_tree(&wc.shared);
3892         if (!cache_tree_empty(&wc.shared))
3893                 fprintf(stderr, "warning line %d\n", __LINE__);
3894
3895         task_stop(ctx.info);
3896
3897         return err;
3898 }
3899
3900 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3901 {
3902         struct rb_node *n;
3903         struct extent_backref *back;
3904         struct tree_backref *tback;
3905         struct data_backref *dback;
3906         u64 found = 0;
3907         int err = 0;
3908
3909         for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
3910                 back = rb_node_to_extent_backref(n);
3911                 if (!back->found_extent_tree) {
3912                         err = 1;
3913                         if (!print_errs)
3914                                 goto out;
3915                         if (back->is_data) {
3916                                 dback = to_data_backref(back);
3917                                 fprintf(stderr, "Backref %llu %s %llu"
3918                                         " owner %llu offset %llu num_refs %lu"
3919                                         " not found in extent tree\n",
3920                                         (unsigned long long)rec->start,
3921                                         back->full_backref ?
3922                                         "parent" : "root",
3923                                         back->full_backref ?
3924                                         (unsigned long long)dback->parent:
3925                                         (unsigned long long)dback->root,
3926                                         (unsigned long long)dback->owner,
3927                                         (unsigned long long)dback->offset,
3928                                         (unsigned long)dback->num_refs);
3929                         } else {
3930                                 tback = to_tree_backref(back);
3931                                 fprintf(stderr, "Backref %llu parent %llu"
3932                                         " root %llu not found in extent tree\n",
3933                                         (unsigned long long)rec->start,
3934                                         (unsigned long long)tback->parent,
3935                                         (unsigned long long)tback->root);
3936                         }
3937                 }
3938                 if (!back->is_data && !back->found_ref) {
3939                         err = 1;
3940                         if (!print_errs)
3941                                 goto out;
3942                         tback = to_tree_backref(back);
3943                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3944                                 (unsigned long long)rec->start,
3945                                 back->full_backref ? "parent" : "root",
3946                                 back->full_backref ?
3947                                 (unsigned long long)tback->parent :
3948                                 (unsigned long long)tback->root, back);
3949                 }
3950                 if (back->is_data) {
3951                         dback = to_data_backref(back);
3952                         if (dback->found_ref != dback->num_refs) {
3953                                 err = 1;
3954                                 if (!print_errs)
3955                                         goto out;
3956                                 fprintf(stderr, "Incorrect local backref count"
3957                                         " on %llu %s %llu owner %llu"
3958                                         " offset %llu found %u wanted %u back %p\n",
3959                                         (unsigned long long)rec->start,
3960                                         back->full_backref ?
3961                                         "parent" : "root",
3962                                         back->full_backref ?
3963                                         (unsigned long long)dback->parent:
3964                                         (unsigned long long)dback->root,
3965                                         (unsigned long long)dback->owner,
3966                                         (unsigned long long)dback->offset,
3967                                         dback->found_ref, dback->num_refs, back);
3968                         }
3969                         if (dback->disk_bytenr != rec->start) {
3970                                 err = 1;
3971                                 if (!print_errs)
3972                                         goto out;
3973                                 fprintf(stderr, "Backref disk bytenr does not"
3974                                         " match extent record, bytenr=%llu, "
3975                                         "ref bytenr=%llu\n",
3976                                         (unsigned long long)rec->start,
3977                                         (unsigned long long)dback->disk_bytenr);
3978                         }
3979
3980                         if (dback->bytes != rec->nr) {
3981                                 err = 1;
3982                                 if (!print_errs)
3983                                         goto out;
3984                                 fprintf(stderr, "Backref bytes do not match "
3985                                         "extent backref, bytenr=%llu, ref "
3986                                         "bytes=%llu, backref bytes=%llu\n",
3987                                         (unsigned long long)rec->start,
3988                                         (unsigned long long)rec->nr,
3989                                         (unsigned long long)dback->bytes);
3990                         }
3991                 }
3992                 if (!back->is_data) {
3993                         found += 1;
3994                 } else {
3995                         dback = to_data_backref(back);
3996                         found += dback->found_ref;
3997                 }
3998         }
3999         if (found != rec->refs) {
4000                 err = 1;
4001                 if (!print_errs)
4002                         goto out;
4003                 fprintf(stderr, "Incorrect global backref count "
4004                         "on %llu found %llu wanted %llu\n",
4005                         (unsigned long long)rec->start,
4006                         (unsigned long long)found,
4007                         (unsigned long long)rec->refs);
4008         }
4009 out:
4010         return err;
4011 }
4012
4013 static void __free_one_backref(struct rb_node *node)
4014 {
4015         struct extent_backref *back = rb_node_to_extent_backref(node);
4016
4017         free(back);
4018 }
4019
4020 static void free_all_extent_backrefs(struct extent_record *rec)
4021 {
4022         rb_free_nodes(&rec->backref_tree, __free_one_backref);
4023 }
4024
4025 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4026                                      struct cache_tree *extent_cache)
4027 {
4028         struct cache_extent *cache;
4029         struct extent_record *rec;
4030
4031         while (1) {
4032                 cache = first_cache_extent(extent_cache);
4033                 if (!cache)
4034                         break;
4035                 rec = container_of(cache, struct extent_record, cache);
4036                 remove_cache_extent(extent_cache, cache);
4037                 free_all_extent_backrefs(rec);
4038                 free(rec);
4039         }
4040 }
4041
4042 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4043                                  struct extent_record *rec)
4044 {
4045         if (rec->content_checked && rec->owner_ref_checked &&
4046             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4047             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4048             !rec->bad_full_backref && !rec->crossing_stripes &&
4049             !rec->wrong_chunk_type) {
4050                 remove_cache_extent(extent_cache, &rec->cache);
4051                 free_all_extent_backrefs(rec);
4052                 list_del_init(&rec->list);
4053                 free(rec);
4054         }
4055         return 0;
4056 }
4057
4058 static int check_owner_ref(struct btrfs_root *root,
4059                             struct extent_record *rec,
4060                             struct extent_buffer *buf)
4061 {
4062         struct extent_backref *node, *tmp;
4063         struct tree_backref *back;
4064         struct btrfs_root *ref_root;
4065         struct btrfs_key key;
4066         struct btrfs_path path;
4067         struct extent_buffer *parent;
4068         int level;
4069         int found = 0;
4070         int ret;
4071
4072         rbtree_postorder_for_each_entry_safe(node, tmp,
4073                                              &rec->backref_tree, node) {
4074                 if (node->is_data)
4075                         continue;
4076                 if (!node->found_ref)
4077                         continue;
4078                 if (node->full_backref)
4079                         continue;
4080                 back = to_tree_backref(node);
4081                 if (btrfs_header_owner(buf) == back->root)
4082                         return 0;
4083         }
4084         BUG_ON(rec->is_root);
4085
4086         /* try to find the block by search corresponding fs tree */
4087         key.objectid = btrfs_header_owner(buf);
4088         key.type = BTRFS_ROOT_ITEM_KEY;
4089         key.offset = (u64)-1;
4090
4091         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4092         if (IS_ERR(ref_root))
4093                 return 1;
4094
4095         level = btrfs_header_level(buf);
4096         if (level == 0)
4097                 btrfs_item_key_to_cpu(buf, &key, 0);
4098         else
4099                 btrfs_node_key_to_cpu(buf, &key, 0);
4100
4101         btrfs_init_path(&path);
4102         path.lowest_level = level + 1;
4103         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4104         if (ret < 0)
4105                 return 0;
4106
4107         parent = path.nodes[level + 1];
4108         if (parent && buf->start == btrfs_node_blockptr(parent,
4109                                                         path.slots[level + 1]))
4110                 found = 1;
4111
4112         btrfs_release_path(&path);
4113         return found ? 0 : 1;
4114 }
4115
4116 static int is_extent_tree_record(struct extent_record *rec)
4117 {
4118         struct extent_backref *ref, *tmp;
4119         struct tree_backref *back;
4120         int is_extent = 0;
4121
4122         rbtree_postorder_for_each_entry_safe(ref, tmp,
4123                                              &rec->backref_tree, node) {
4124                 if (ref->is_data)
4125                         return 0;
4126                 back = to_tree_backref(ref);
4127                 if (ref->full_backref)
4128                         return 0;
4129                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4130                         is_extent = 1;
4131         }
4132         return is_extent;
4133 }
4134
4135
4136 static int record_bad_block_io(struct btrfs_fs_info *info,
4137                                struct cache_tree *extent_cache,
4138                                u64 start, u64 len)
4139 {
4140         struct extent_record *rec;
4141         struct cache_extent *cache;
4142         struct btrfs_key key;
4143
4144         cache = lookup_cache_extent(extent_cache, start, len);
4145         if (!cache)
4146                 return 0;
4147
4148         rec = container_of(cache, struct extent_record, cache);
4149         if (!is_extent_tree_record(rec))
4150                 return 0;
4151
4152         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4153         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4154 }
4155
4156 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4157                        struct extent_buffer *buf, int slot)
4158 {
4159         if (btrfs_header_level(buf)) {
4160                 struct btrfs_key_ptr ptr1, ptr2;
4161
4162                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4163                                    sizeof(struct btrfs_key_ptr));
4164                 read_extent_buffer(buf, &ptr2,
4165                                    btrfs_node_key_ptr_offset(slot + 1),
4166                                    sizeof(struct btrfs_key_ptr));
4167                 write_extent_buffer(buf, &ptr1,
4168                                     btrfs_node_key_ptr_offset(slot + 1),
4169                                     sizeof(struct btrfs_key_ptr));
4170                 write_extent_buffer(buf, &ptr2,
4171                                     btrfs_node_key_ptr_offset(slot),
4172                                     sizeof(struct btrfs_key_ptr));
4173                 if (slot == 0) {
4174                         struct btrfs_disk_key key;
4175                         btrfs_node_key(buf, &key, 0);
4176                         btrfs_fixup_low_keys(root, path, &key,
4177                                              btrfs_header_level(buf) + 1);
4178                 }
4179         } else {
4180                 struct btrfs_item *item1, *item2;
4181                 struct btrfs_key k1, k2;
4182                 char *item1_data, *item2_data;
4183                 u32 item1_offset, item2_offset, item1_size, item2_size;
4184
4185                 item1 = btrfs_item_nr(slot);
4186                 item2 = btrfs_item_nr(slot + 1);
4187                 btrfs_item_key_to_cpu(buf, &k1, slot);
4188                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4189                 item1_offset = btrfs_item_offset(buf, item1);
4190                 item2_offset = btrfs_item_offset(buf, item2);
4191                 item1_size = btrfs_item_size(buf, item1);
4192                 item2_size = btrfs_item_size(buf, item2);
4193
4194                 item1_data = malloc(item1_size);
4195                 if (!item1_data)
4196                         return -ENOMEM;
4197                 item2_data = malloc(item2_size);
4198                 if (!item2_data) {
4199                         free(item1_data);
4200                         return -ENOMEM;
4201                 }
4202
4203                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4204                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4205
4206                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4207                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4208                 free(item1_data);
4209                 free(item2_data);
4210
4211                 btrfs_set_item_offset(buf, item1, item2_offset);
4212                 btrfs_set_item_offset(buf, item2, item1_offset);
4213                 btrfs_set_item_size(buf, item1, item2_size);
4214                 btrfs_set_item_size(buf, item2, item1_size);
4215
4216                 path->slots[0] = slot;
4217                 btrfs_set_item_key_unsafe(root, path, &k2);
4218                 path->slots[0] = slot + 1;
4219                 btrfs_set_item_key_unsafe(root, path, &k1);
4220         }
4221         return 0;
4222 }
4223
4224 static int fix_key_order(struct btrfs_trans_handle *trans,
4225                          struct btrfs_root *root,
4226                          struct btrfs_path *path)
4227 {
4228         struct extent_buffer *buf;
4229         struct btrfs_key k1, k2;
4230         int i;
4231         int level = path->lowest_level;
4232         int ret = -EIO;
4233
4234         buf = path->nodes[level];
4235         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4236                 if (level) {
4237                         btrfs_node_key_to_cpu(buf, &k1, i);
4238                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4239                 } else {
4240                         btrfs_item_key_to_cpu(buf, &k1, i);
4241                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4242                 }
4243                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4244                         continue;
4245                 ret = swap_values(root, path, buf, i);
4246                 if (ret)
4247                         break;
4248                 btrfs_mark_buffer_dirty(buf);
4249                 i = 0;
4250         }
4251         return ret;
4252 }
4253
4254 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4255                              struct btrfs_root *root,
4256                              struct btrfs_path *path,
4257                              struct extent_buffer *buf, int slot)
4258 {
4259         struct btrfs_key key;
4260         int nritems = btrfs_header_nritems(buf);
4261
4262         btrfs_item_key_to_cpu(buf, &key, slot);
4263
4264         /* These are all the keys we can deal with missing. */
4265         if (key.type != BTRFS_DIR_INDEX_KEY &&
4266             key.type != BTRFS_EXTENT_ITEM_KEY &&
4267             key.type != BTRFS_METADATA_ITEM_KEY &&
4268             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4269             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4270                 return -1;
4271
4272         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4273                (unsigned long long)key.objectid, key.type,
4274                (unsigned long long)key.offset, slot, buf->start);
4275         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4276                               btrfs_item_nr_offset(slot + 1),
4277                               sizeof(struct btrfs_item) *
4278                               (nritems - slot - 1));
4279         btrfs_set_header_nritems(buf, nritems - 1);
4280         if (slot == 0) {
4281                 struct btrfs_disk_key disk_key;
4282
4283                 btrfs_item_key(buf, &disk_key, 0);
4284                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4285         }
4286         btrfs_mark_buffer_dirty(buf);
4287         return 0;
4288 }
4289
4290 static int fix_item_offset(struct btrfs_trans_handle *trans,
4291                            struct btrfs_root *root,
4292                            struct btrfs_path *path)
4293 {
4294         struct extent_buffer *buf;
4295         int i;
4296         int ret = 0;
4297
4298         /* We should only get this for leaves */
4299         BUG_ON(path->lowest_level);
4300         buf = path->nodes[0];
4301 again:
4302         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4303                 unsigned int shift = 0, offset;
4304
4305                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4306                     BTRFS_LEAF_DATA_SIZE(root)) {
4307                         if (btrfs_item_end_nr(buf, i) >
4308                             BTRFS_LEAF_DATA_SIZE(root)) {
4309                                 ret = delete_bogus_item(trans, root, path,
4310                                                         buf, i);
4311                                 if (!ret)
4312                                         goto again;
4313                                 fprintf(stderr, "item is off the end of the "
4314                                         "leaf, can't fix\n");
4315                                 ret = -EIO;
4316                                 break;
4317                         }
4318                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4319                                 btrfs_item_end_nr(buf, i);
4320                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4321                            btrfs_item_offset_nr(buf, i - 1)) {
4322                         if (btrfs_item_end_nr(buf, i) >
4323                             btrfs_item_offset_nr(buf, i - 1)) {
4324                                 ret = delete_bogus_item(trans, root, path,
4325                                                         buf, i);
4326                                 if (!ret)
4327                                         goto again;
4328                                 fprintf(stderr, "items overlap, can't fix\n");
4329                                 ret = -EIO;
4330                                 break;
4331                         }
4332                         shift = btrfs_item_offset_nr(buf, i - 1) -
4333                                 btrfs_item_end_nr(buf, i);
4334                 }
4335                 if (!shift)
4336                         continue;
4337
4338                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4339                        i, shift, (unsigned long long)buf->start);
4340                 offset = btrfs_item_offset_nr(buf, i);
4341                 memmove_extent_buffer(buf,
4342                                       btrfs_leaf_data(buf) + offset + shift,
4343                                       btrfs_leaf_data(buf) + offset,
4344                                       btrfs_item_size_nr(buf, i));
4345                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4346                                       offset + shift);
4347                 btrfs_mark_buffer_dirty(buf);
4348         }
4349
4350         /*
4351          * We may have moved things, in which case we want to exit so we don't
4352          * write those changes out.  Once we have proper abort functionality in
4353          * progs this can be changed to something nicer.
4354          */
4355         BUG_ON(ret);
4356         return ret;
4357 }
4358
4359 /*
4360  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4361  * then just return -EIO.
4362  */
4363 static int try_to_fix_bad_block(struct btrfs_root *root,
4364                                 struct extent_buffer *buf,
4365                                 enum btrfs_tree_block_status status)
4366 {
4367         struct btrfs_trans_handle *trans;
4368         struct ulist *roots;
4369         struct ulist_node *node;
4370         struct btrfs_root *search_root;
4371         struct btrfs_path *path;
4372         struct ulist_iterator iter;
4373         struct btrfs_key root_key, key;
4374         int ret;
4375
4376         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4377             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4378                 return -EIO;
4379
4380         path = btrfs_alloc_path();
4381         if (!path)
4382                 return -EIO;
4383
4384         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4385                                    0, &roots);
4386         if (ret) {
4387                 btrfs_free_path(path);
4388                 return -EIO;
4389         }
4390
4391         ULIST_ITER_INIT(&iter);
4392         while ((node = ulist_next(roots, &iter))) {
4393                 root_key.objectid = node->val;
4394                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4395                 root_key.offset = (u64)-1;
4396
4397                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4398                 if (IS_ERR(root)) {
4399                         ret = -EIO;
4400                         break;
4401                 }
4402
4403
4404                 trans = btrfs_start_transaction(search_root, 0);
4405                 if (IS_ERR(trans)) {
4406                         ret = PTR_ERR(trans);
4407                         break;
4408                 }
4409
4410                 path->lowest_level = btrfs_header_level(buf);
4411                 path->skip_check_block = 1;
4412                 if (path->lowest_level)
4413                         btrfs_node_key_to_cpu(buf, &key, 0);
4414                 else
4415                         btrfs_item_key_to_cpu(buf, &key, 0);
4416                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4417                 if (ret) {
4418                         ret = -EIO;
4419                         btrfs_commit_transaction(trans, search_root);
4420                         break;
4421                 }
4422                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4423                         ret = fix_key_order(trans, search_root, path);
4424                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4425                         ret = fix_item_offset(trans, search_root, path);
4426                 if (ret) {
4427                         btrfs_commit_transaction(trans, search_root);
4428                         break;
4429                 }
4430                 btrfs_release_path(path);
4431                 btrfs_commit_transaction(trans, search_root);
4432         }
4433         ulist_free(roots);
4434         btrfs_free_path(path);
4435         return ret;
4436 }
4437
4438 static int check_block(struct btrfs_root *root,
4439                        struct cache_tree *extent_cache,
4440                        struct extent_buffer *buf, u64 flags)
4441 {
4442         struct extent_record *rec;
4443         struct cache_extent *cache;
4444         struct btrfs_key key;
4445         enum btrfs_tree_block_status status;
4446         int ret = 0;
4447         int level;
4448
4449         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4450         if (!cache)
4451                 return 1;
4452         rec = container_of(cache, struct extent_record, cache);
4453         rec->generation = btrfs_header_generation(buf);
4454
4455         level = btrfs_header_level(buf);
4456         if (btrfs_header_nritems(buf) > 0) {
4457
4458                 if (level == 0)
4459                         btrfs_item_key_to_cpu(buf, &key, 0);
4460                 else
4461                         btrfs_node_key_to_cpu(buf, &key, 0);
4462
4463                 rec->info_objectid = key.objectid;
4464         }
4465         rec->info_level = level;
4466
4467         if (btrfs_is_leaf(buf))
4468                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4469         else
4470                 status = btrfs_check_node(root, &rec->parent_key, buf);
4471
4472         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4473                 if (repair)
4474                         status = try_to_fix_bad_block(root, buf, status);
4475                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4476                         ret = -EIO;
4477                         fprintf(stderr, "bad block %llu\n",
4478                                 (unsigned long long)buf->start);
4479                 } else {
4480                         /*
4481                          * Signal to callers we need to start the scan over
4482                          * again since we'll have cowed blocks.
4483                          */
4484                         ret = -EAGAIN;
4485                 }
4486         } else {
4487                 rec->content_checked = 1;
4488                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4489                         rec->owner_ref_checked = 1;
4490                 else {
4491                         ret = check_owner_ref(root, rec, buf);
4492                         if (!ret)
4493                                 rec->owner_ref_checked = 1;
4494                 }
4495         }
4496         if (!ret)
4497                 maybe_free_extent_rec(extent_cache, rec);
4498         return ret;
4499 }
4500
4501
4502 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4503                                                 u64 parent, u64 root)
4504 {
4505         struct rb_node *node;
4506         struct tree_backref *back = NULL;
4507         struct tree_backref match = {
4508                 .node = {
4509                         .is_data = 0,
4510                 },
4511         };
4512
4513         if (parent) {
4514                 match.parent = parent;
4515                 match.node.full_backref = 1;
4516         } else {
4517                 match.root = root;
4518         }
4519
4520         node = rb_search(&rec->backref_tree, &match.node.node,
4521                          (rb_compare_keys)compare_extent_backref, NULL);
4522         if (node)
4523                 back = to_tree_backref(rb_node_to_extent_backref(node));
4524
4525         return back;
4526 }
4527
4528 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4529                                                 u64 parent, u64 root)
4530 {
4531         struct tree_backref *ref = malloc(sizeof(*ref));
4532
4533         if (!ref)
4534                 return NULL;
4535         memset(&ref->node, 0, sizeof(ref->node));
4536         if (parent > 0) {
4537                 ref->parent = parent;
4538                 ref->node.full_backref = 1;
4539         } else {
4540                 ref->root = root;
4541                 ref->node.full_backref = 0;
4542         }
4543         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4544
4545         return ref;
4546 }
4547
4548 static struct data_backref *find_data_backref(struct extent_record *rec,
4549                                                 u64 parent, u64 root,
4550                                                 u64 owner, u64 offset,
4551                                                 int found_ref,
4552                                                 u64 disk_bytenr, u64 bytes)
4553 {
4554         struct rb_node *node;
4555         struct data_backref *back = NULL;
4556         struct data_backref match = {
4557                 .node = {
4558                         .is_data = 1,
4559                 },
4560                 .owner = owner,
4561                 .offset = offset,
4562                 .bytes = bytes,
4563                 .found_ref = found_ref,
4564                 .disk_bytenr = disk_bytenr,
4565         };
4566
4567         if (parent) {
4568                 match.parent = parent;
4569                 match.node.full_backref = 1;
4570         } else {
4571                 match.root = root;
4572         }
4573
4574         node = rb_search(&rec->backref_tree, &match.node.node,
4575                          (rb_compare_keys)compare_extent_backref, NULL);
4576         if (node)
4577                 back = to_data_backref(rb_node_to_extent_backref(node));
4578
4579         return back;
4580 }
4581
4582 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4583                                                 u64 parent, u64 root,
4584                                                 u64 owner, u64 offset,
4585                                                 u64 max_size)
4586 {
4587         struct data_backref *ref = malloc(sizeof(*ref));
4588
4589         if (!ref)
4590                 return NULL;
4591         memset(&ref->node, 0, sizeof(ref->node));
4592         ref->node.is_data = 1;
4593
4594         if (parent > 0) {
4595                 ref->parent = parent;
4596                 ref->owner = 0;
4597                 ref->offset = 0;
4598                 ref->node.full_backref = 1;
4599         } else {
4600                 ref->root = root;
4601                 ref->owner = owner;
4602                 ref->offset = offset;
4603                 ref->node.full_backref = 0;
4604         }
4605         ref->bytes = max_size;
4606         ref->found_ref = 0;
4607         ref->num_refs = 0;
4608         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4609         if (max_size > rec->max_size)
4610                 rec->max_size = max_size;
4611         return ref;
4612 }
4613
4614 /* Check if the type of extent matches with its chunk */
4615 static void check_extent_type(struct extent_record *rec)
4616 {
4617         struct btrfs_block_group_cache *bg_cache;
4618
4619         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4620         if (!bg_cache)
4621                 return;
4622
4623         /* data extent, check chunk directly*/
4624         if (!rec->metadata) {
4625                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4626                         rec->wrong_chunk_type = 1;
4627                 return;
4628         }
4629
4630         /* metadata extent, check the obvious case first */
4631         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4632                                  BTRFS_BLOCK_GROUP_METADATA))) {
4633                 rec->wrong_chunk_type = 1;
4634                 return;
4635         }
4636
4637         /*
4638          * Check SYSTEM extent, as it's also marked as metadata, we can only
4639          * make sure it's a SYSTEM extent by its backref
4640          */
4641         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4642                 struct extent_backref *node;
4643                 struct tree_backref *tback;
4644                 u64 bg_type;
4645
4646                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4647                 if (node->is_data) {
4648                         /* tree block shouldn't have data backref */
4649                         rec->wrong_chunk_type = 1;
4650                         return;
4651                 }
4652                 tback = container_of(node, struct tree_backref, node);
4653
4654                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4655                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4656                 else
4657                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4658                 if (!(bg_cache->flags & bg_type))
4659                         rec->wrong_chunk_type = 1;
4660         }
4661 }
4662
4663 /*
4664  * Allocate a new extent record, fill default values from @tmpl and insert int
4665  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4666  * the cache, otherwise it fails.
4667  */
4668 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4669                 struct extent_record *tmpl)
4670 {
4671         struct extent_record *rec;
4672         int ret = 0;
4673
4674         rec = malloc(sizeof(*rec));
4675         if (!rec)
4676                 return -ENOMEM;
4677         rec->start = tmpl->start;
4678         rec->max_size = tmpl->max_size;
4679         rec->nr = max(tmpl->nr, tmpl->max_size);
4680         rec->found_rec = tmpl->found_rec;
4681         rec->content_checked = tmpl->content_checked;
4682         rec->owner_ref_checked = tmpl->owner_ref_checked;
4683         rec->num_duplicates = 0;
4684         rec->metadata = tmpl->metadata;
4685         rec->flag_block_full_backref = FLAG_UNSET;
4686         rec->bad_full_backref = 0;
4687         rec->crossing_stripes = 0;
4688         rec->wrong_chunk_type = 0;
4689         rec->is_root = tmpl->is_root;
4690         rec->refs = tmpl->refs;
4691         rec->extent_item_refs = tmpl->extent_item_refs;
4692         rec->parent_generation = tmpl->parent_generation;
4693         INIT_LIST_HEAD(&rec->backrefs);
4694         INIT_LIST_HEAD(&rec->dups);
4695         INIT_LIST_HEAD(&rec->list);
4696         rec->backref_tree = RB_ROOT;
4697         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4698         rec->cache.start = tmpl->start;
4699         rec->cache.size = tmpl->nr;
4700         ret = insert_cache_extent(extent_cache, &rec->cache);
4701         BUG_ON(ret);
4702         bytes_used += rec->nr;
4703
4704         if (tmpl->metadata)
4705                 rec->crossing_stripes = check_crossing_stripes(rec->start,
4706                                 global_info->tree_root->nodesize);
4707         check_extent_type(rec);
4708         return ret;
4709 }
4710
4711 /*
4712  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4713  * some are hints:
4714  * - refs              - if found, increase refs
4715  * - is_root           - if found, set
4716  * - content_checked   - if found, set
4717  * - owner_ref_checked - if found, set
4718  *
4719  * If not found, create a new one, initialize and insert.
4720  */
4721 static int add_extent_rec(struct cache_tree *extent_cache,
4722                 struct extent_record *tmpl)
4723 {
4724         struct extent_record *rec;
4725         struct cache_extent *cache;
4726         int ret = 0;
4727         int dup = 0;
4728
4729         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4730         if (cache) {
4731                 rec = container_of(cache, struct extent_record, cache);
4732                 if (tmpl->refs)
4733                         rec->refs++;
4734                 if (rec->nr == 1)
4735                         rec->nr = max(tmpl->nr, tmpl->max_size);
4736
4737                 /*
4738                  * We need to make sure to reset nr to whatever the extent
4739                  * record says was the real size, this way we can compare it to
4740                  * the backrefs.
4741                  */
4742                 if (tmpl->found_rec) {
4743                         if (tmpl->start != rec->start || rec->found_rec) {
4744                                 struct extent_record *tmp;
4745
4746                                 dup = 1;
4747                                 if (list_empty(&rec->list))
4748                                         list_add_tail(&rec->list,
4749                                                       &duplicate_extents);
4750
4751                                 /*
4752                                  * We have to do this song and dance in case we
4753                                  * find an extent record that falls inside of
4754                                  * our current extent record but does not have
4755                                  * the same objectid.
4756                                  */
4757                                 tmp = malloc(sizeof(*tmp));
4758                                 if (!tmp)
4759                                         return -ENOMEM;
4760                                 tmp->start = tmpl->start;
4761                                 tmp->max_size = tmpl->max_size;
4762                                 tmp->nr = tmpl->nr;
4763                                 tmp->found_rec = 1;
4764                                 tmp->metadata = tmpl->metadata;
4765                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4766                                 INIT_LIST_HEAD(&tmp->list);
4767                                 list_add_tail(&tmp->list, &rec->dups);
4768                                 rec->num_duplicates++;
4769                         } else {
4770                                 rec->nr = tmpl->nr;
4771                                 rec->found_rec = 1;
4772                         }
4773                 }
4774
4775                 if (tmpl->extent_item_refs && !dup) {
4776                         if (rec->extent_item_refs) {
4777                                 fprintf(stderr, "block %llu rec "
4778                                         "extent_item_refs %llu, passed %llu\n",
4779                                         (unsigned long long)tmpl->start,
4780                                         (unsigned long long)
4781                                                         rec->extent_item_refs,
4782                                         (unsigned long long)tmpl->extent_item_refs);
4783                         }
4784                         rec->extent_item_refs = tmpl->extent_item_refs;
4785                 }
4786                 if (tmpl->is_root)
4787                         rec->is_root = 1;
4788                 if (tmpl->content_checked)
4789                         rec->content_checked = 1;
4790                 if (tmpl->owner_ref_checked)
4791                         rec->owner_ref_checked = 1;
4792                 memcpy(&rec->parent_key, &tmpl->parent_key,
4793                                 sizeof(tmpl->parent_key));
4794                 if (tmpl->parent_generation)
4795                         rec->parent_generation = tmpl->parent_generation;
4796                 if (rec->max_size < tmpl->max_size)
4797                         rec->max_size = tmpl->max_size;
4798
4799                 /*
4800                  * A metadata extent can't cross stripe_len boundary, otherwise
4801                  * kernel scrub won't be able to handle it.
4802                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4803                  * it.
4804                  */
4805                 if (tmpl->metadata)
4806                         rec->crossing_stripes = check_crossing_stripes(
4807                                 rec->start, global_info->tree_root->nodesize);
4808                 check_extent_type(rec);
4809                 maybe_free_extent_rec(extent_cache, rec);
4810                 return ret;
4811         }
4812
4813         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4814
4815         return ret;
4816 }
4817
4818 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4819                             u64 parent, u64 root, int found_ref)
4820 {
4821         struct extent_record *rec;
4822         struct tree_backref *back;
4823         struct cache_extent *cache;
4824
4825         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4826         if (!cache) {
4827                 struct extent_record tmpl;
4828
4829                 memset(&tmpl, 0, sizeof(tmpl));
4830                 tmpl.start = bytenr;
4831                 tmpl.nr = 1;
4832                 tmpl.metadata = 1;
4833
4834                 add_extent_rec_nolookup(extent_cache, &tmpl);
4835
4836                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4837                 if (!cache)
4838                         abort();
4839         }
4840
4841         rec = container_of(cache, struct extent_record, cache);
4842         if (rec->start != bytenr) {
4843                 abort();
4844         }
4845
4846         back = find_tree_backref(rec, parent, root);
4847         if (!back) {
4848                 back = alloc_tree_backref(rec, parent, root);
4849                 BUG_ON(!back);
4850         }
4851
4852         if (found_ref) {
4853                 if (back->node.found_ref) {
4854                         fprintf(stderr, "Extent back ref already exists "
4855                                 "for %llu parent %llu root %llu \n",
4856                                 (unsigned long long)bytenr,
4857                                 (unsigned long long)parent,
4858                                 (unsigned long long)root);
4859                 }
4860                 back->node.found_ref = 1;
4861         } else {
4862                 if (back->node.found_extent_tree) {
4863                         fprintf(stderr, "Extent back ref already exists "
4864                                 "for %llu parent %llu root %llu \n",
4865                                 (unsigned long long)bytenr,
4866                                 (unsigned long long)parent,
4867                                 (unsigned long long)root);
4868                 }
4869                 back->node.found_extent_tree = 1;
4870         }
4871         check_extent_type(rec);
4872         maybe_free_extent_rec(extent_cache, rec);
4873         return 0;
4874 }
4875
4876 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4877                             u64 parent, u64 root, u64 owner, u64 offset,
4878                             u32 num_refs, int found_ref, u64 max_size)
4879 {
4880         struct extent_record *rec;
4881         struct data_backref *back;
4882         struct cache_extent *cache;
4883
4884         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4885         if (!cache) {
4886                 struct extent_record tmpl;
4887
4888                 memset(&tmpl, 0, sizeof(tmpl));
4889                 tmpl.start = bytenr;
4890                 tmpl.nr = 1;
4891                 tmpl.max_size = max_size;
4892
4893                 add_extent_rec_nolookup(extent_cache, &tmpl);
4894
4895                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4896                 if (!cache)
4897                         abort();
4898         }
4899
4900         rec = container_of(cache, struct extent_record, cache);
4901         if (rec->max_size < max_size)
4902                 rec->max_size = max_size;
4903
4904         /*
4905          * If found_ref is set then max_size is the real size and must match the
4906          * existing refs.  So if we have already found a ref then we need to
4907          * make sure that this ref matches the existing one, otherwise we need
4908          * to add a new backref so we can notice that the backrefs don't match
4909          * and we need to figure out who is telling the truth.  This is to
4910          * account for that awful fsync bug I introduced where we'd end up with
4911          * a btrfs_file_extent_item that would have its length include multiple
4912          * prealloc extents or point inside of a prealloc extent.
4913          */
4914         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4915                                  bytenr, max_size);
4916         if (!back) {
4917                 back = alloc_data_backref(rec, parent, root, owner, offset,
4918                                           max_size);
4919                 BUG_ON(!back);
4920         }
4921
4922         if (found_ref) {
4923                 BUG_ON(num_refs != 1);
4924                 if (back->node.found_ref)
4925                         BUG_ON(back->bytes != max_size);
4926                 back->node.found_ref = 1;
4927                 back->found_ref += 1;
4928                 back->bytes = max_size;
4929                 back->disk_bytenr = bytenr;
4930                 rec->refs += 1;
4931                 rec->content_checked = 1;
4932                 rec->owner_ref_checked = 1;
4933         } else {
4934                 if (back->node.found_extent_tree) {
4935                         fprintf(stderr, "Extent back ref already exists "
4936                                 "for %llu parent %llu root %llu "
4937                                 "owner %llu offset %llu num_refs %lu\n",
4938                                 (unsigned long long)bytenr,
4939                                 (unsigned long long)parent,
4940                                 (unsigned long long)root,
4941                                 (unsigned long long)owner,
4942                                 (unsigned long long)offset,
4943                                 (unsigned long)num_refs);
4944                 }
4945                 back->num_refs = num_refs;
4946                 back->node.found_extent_tree = 1;
4947         }
4948         maybe_free_extent_rec(extent_cache, rec);
4949         return 0;
4950 }
4951
4952 static int add_pending(struct cache_tree *pending,
4953                        struct cache_tree *seen, u64 bytenr, u32 size)
4954 {
4955         int ret;
4956         ret = add_cache_extent(seen, bytenr, size);
4957         if (ret)
4958                 return ret;
4959         add_cache_extent(pending, bytenr, size);
4960         return 0;
4961 }
4962
4963 static int pick_next_pending(struct cache_tree *pending,
4964                         struct cache_tree *reada,
4965                         struct cache_tree *nodes,
4966                         u64 last, struct block_info *bits, int bits_nr,
4967                         int *reada_bits)
4968 {
4969         unsigned long node_start = last;
4970         struct cache_extent *cache;
4971         int ret;
4972
4973         cache = search_cache_extent(reada, 0);
4974         if (cache) {
4975                 bits[0].start = cache->start;
4976                 bits[0].size = cache->size;
4977                 *reada_bits = 1;
4978                 return 1;
4979         }
4980         *reada_bits = 0;
4981         if (node_start > 32768)
4982                 node_start -= 32768;
4983
4984         cache = search_cache_extent(nodes, node_start);
4985         if (!cache)
4986                 cache = search_cache_extent(nodes, 0);
4987
4988         if (!cache) {
4989                  cache = search_cache_extent(pending, 0);
4990                  if (!cache)
4991                          return 0;
4992                  ret = 0;
4993                  do {
4994                          bits[ret].start = cache->start;
4995                          bits[ret].size = cache->size;
4996                          cache = next_cache_extent(cache);
4997                          ret++;
4998                  } while (cache && ret < bits_nr);
4999                  return ret;
5000         }
5001
5002         ret = 0;
5003         do {
5004                 bits[ret].start = cache->start;
5005                 bits[ret].size = cache->size;
5006                 cache = next_cache_extent(cache);
5007                 ret++;
5008         } while (cache && ret < bits_nr);
5009
5010         if (bits_nr - ret > 8) {
5011                 u64 lookup = bits[0].start + bits[0].size;
5012                 struct cache_extent *next;
5013                 next = search_cache_extent(pending, lookup);
5014                 while(next) {
5015                         if (next->start - lookup > 32768)
5016                                 break;
5017                         bits[ret].start = next->start;
5018                         bits[ret].size = next->size;
5019                         lookup = next->start + next->size;
5020                         ret++;
5021                         if (ret == bits_nr)
5022                                 break;
5023                         next = next_cache_extent(next);
5024                         if (!next)
5025                                 break;
5026                 }
5027         }
5028         return ret;
5029 }
5030
5031 static void free_chunk_record(struct cache_extent *cache)
5032 {
5033         struct chunk_record *rec;
5034
5035         rec = container_of(cache, struct chunk_record, cache);
5036         list_del_init(&rec->list);
5037         list_del_init(&rec->dextents);
5038         free(rec);
5039 }
5040
5041 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5042 {
5043         cache_tree_free_extents(chunk_cache, free_chunk_record);
5044 }
5045
5046 static void free_device_record(struct rb_node *node)
5047 {
5048         struct device_record *rec;
5049
5050         rec = container_of(node, struct device_record, node);
5051         free(rec);
5052 }
5053
5054 FREE_RB_BASED_TREE(device_cache, free_device_record);
5055
5056 int insert_block_group_record(struct block_group_tree *tree,
5057                               struct block_group_record *bg_rec)
5058 {
5059         int ret;
5060
5061         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5062         if (ret)
5063                 return ret;
5064
5065         list_add_tail(&bg_rec->list, &tree->block_groups);
5066         return 0;
5067 }
5068
5069 static void free_block_group_record(struct cache_extent *cache)
5070 {
5071         struct block_group_record *rec;
5072
5073         rec = container_of(cache, struct block_group_record, cache);
5074         list_del_init(&rec->list);
5075         free(rec);
5076 }
5077
5078 void free_block_group_tree(struct block_group_tree *tree)
5079 {
5080         cache_tree_free_extents(&tree->tree, free_block_group_record);
5081 }
5082
5083 int insert_device_extent_record(struct device_extent_tree *tree,
5084                                 struct device_extent_record *de_rec)
5085 {
5086         int ret;
5087
5088         /*
5089          * Device extent is a bit different from the other extents, because
5090          * the extents which belong to the different devices may have the
5091          * same start and size, so we need use the special extent cache
5092          * search/insert functions.
5093          */
5094         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5095         if (ret)
5096                 return ret;
5097
5098         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5099         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5100         return 0;
5101 }
5102
5103 static void free_device_extent_record(struct cache_extent *cache)
5104 {
5105         struct device_extent_record *rec;
5106
5107         rec = container_of(cache, struct device_extent_record, cache);
5108         if (!list_empty(&rec->chunk_list))
5109                 list_del_init(&rec->chunk_list);
5110         if (!list_empty(&rec->device_list))
5111                 list_del_init(&rec->device_list);
5112         free(rec);
5113 }
5114
5115 void free_device_extent_tree(struct device_extent_tree *tree)
5116 {
5117         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5118 }
5119
5120 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5121 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5122                                  struct extent_buffer *leaf, int slot)
5123 {
5124         struct btrfs_extent_ref_v0 *ref0;
5125         struct btrfs_key key;
5126
5127         btrfs_item_key_to_cpu(leaf, &key, slot);
5128         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5129         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5130                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5131         } else {
5132                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5133                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5134         }
5135         return 0;
5136 }
5137 #endif
5138
5139 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5140                                             struct btrfs_key *key,
5141                                             int slot)
5142 {
5143         struct btrfs_chunk *ptr;
5144         struct chunk_record *rec;
5145         int num_stripes, i;
5146
5147         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5148         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5149
5150         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5151         if (!rec) {
5152                 fprintf(stderr, "memory allocation failed\n");
5153                 exit(-1);
5154         }
5155
5156         INIT_LIST_HEAD(&rec->list);
5157         INIT_LIST_HEAD(&rec->dextents);
5158         rec->bg_rec = NULL;
5159
5160         rec->cache.start = key->offset;
5161         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5162
5163         rec->generation = btrfs_header_generation(leaf);
5164
5165         rec->objectid = key->objectid;
5166         rec->type = key->type;
5167         rec->offset = key->offset;
5168
5169         rec->length = rec->cache.size;
5170         rec->owner = btrfs_chunk_owner(leaf, ptr);
5171         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5172         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5173         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5174         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5175         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5176         rec->num_stripes = num_stripes;
5177         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5178
5179         for (i = 0; i < rec->num_stripes; ++i) {
5180                 rec->stripes[i].devid =
5181                         btrfs_stripe_devid_nr(leaf, ptr, i);
5182                 rec->stripes[i].offset =
5183                         btrfs_stripe_offset_nr(leaf, ptr, i);
5184                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5185                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5186                                 BTRFS_UUID_SIZE);
5187         }
5188
5189         return rec;
5190 }
5191
5192 static int process_chunk_item(struct cache_tree *chunk_cache,
5193                               struct btrfs_key *key, struct extent_buffer *eb,
5194                               int slot)
5195 {
5196         struct chunk_record *rec;
5197         int ret = 0;
5198
5199         rec = btrfs_new_chunk_record(eb, key, slot);
5200         ret = insert_cache_extent(chunk_cache, &rec->cache);
5201         if (ret) {
5202                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5203                         rec->offset, rec->length);
5204                 free(rec);
5205         }
5206
5207         return ret;
5208 }
5209
5210 static int process_device_item(struct rb_root *dev_cache,
5211                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5212 {
5213         struct btrfs_dev_item *ptr;
5214         struct device_record *rec;
5215         int ret = 0;
5216
5217         ptr = btrfs_item_ptr(eb,
5218                 slot, struct btrfs_dev_item);
5219
5220         rec = malloc(sizeof(*rec));
5221         if (!rec) {
5222                 fprintf(stderr, "memory allocation failed\n");
5223                 return -ENOMEM;
5224         }
5225
5226         rec->devid = key->offset;
5227         rec->generation = btrfs_header_generation(eb);
5228
5229         rec->objectid = key->objectid;
5230         rec->type = key->type;
5231         rec->offset = key->offset;
5232
5233         rec->devid = btrfs_device_id(eb, ptr);
5234         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5235         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5236
5237         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5238         if (ret) {
5239                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5240                 free(rec);
5241         }
5242
5243         return ret;
5244 }
5245
5246 struct block_group_record *
5247 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5248                              int slot)
5249 {
5250         struct btrfs_block_group_item *ptr;
5251         struct block_group_record *rec;
5252
5253         rec = calloc(1, sizeof(*rec));
5254         if (!rec) {
5255                 fprintf(stderr, "memory allocation failed\n");
5256                 exit(-1);
5257         }
5258
5259         rec->cache.start = key->objectid;
5260         rec->cache.size = key->offset;
5261
5262         rec->generation = btrfs_header_generation(leaf);
5263
5264         rec->objectid = key->objectid;
5265         rec->type = key->type;
5266         rec->offset = key->offset;
5267
5268         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5269         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5270
5271         INIT_LIST_HEAD(&rec->list);
5272
5273         return rec;
5274 }
5275
5276 static int process_block_group_item(struct block_group_tree *block_group_cache,
5277                                     struct btrfs_key *key,
5278                                     struct extent_buffer *eb, int slot)
5279 {
5280         struct block_group_record *rec;
5281         int ret = 0;
5282
5283         rec = btrfs_new_block_group_record(eb, key, slot);
5284         ret = insert_block_group_record(block_group_cache, rec);
5285         if (ret) {
5286                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5287                         rec->objectid, rec->offset);
5288                 free(rec);
5289         }
5290
5291         return ret;
5292 }
5293
5294 struct device_extent_record *
5295 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5296                                struct btrfs_key *key, int slot)
5297 {
5298         struct device_extent_record *rec;
5299         struct btrfs_dev_extent *ptr;
5300
5301         rec = calloc(1, sizeof(*rec));
5302         if (!rec) {
5303                 fprintf(stderr, "memory allocation failed\n");
5304                 exit(-1);
5305         }
5306
5307         rec->cache.objectid = key->objectid;
5308         rec->cache.start = key->offset;
5309
5310         rec->generation = btrfs_header_generation(leaf);
5311
5312         rec->objectid = key->objectid;
5313         rec->type = key->type;
5314         rec->offset = key->offset;
5315
5316         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5317         rec->chunk_objecteid =
5318                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5319         rec->chunk_offset =
5320                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5321         rec->length = btrfs_dev_extent_length(leaf, ptr);
5322         rec->cache.size = rec->length;
5323
5324         INIT_LIST_HEAD(&rec->chunk_list);
5325         INIT_LIST_HEAD(&rec->device_list);
5326
5327         return rec;
5328 }
5329
5330 static int
5331 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5332                            struct btrfs_key *key, struct extent_buffer *eb,
5333                            int slot)
5334 {
5335         struct device_extent_record *rec;
5336         int ret;
5337
5338         rec = btrfs_new_device_extent_record(eb, key, slot);
5339         ret = insert_device_extent_record(dev_extent_cache, rec);
5340         if (ret) {
5341                 fprintf(stderr,
5342                         "Device extent[%llu, %llu, %llu] existed.\n",
5343                         rec->objectid, rec->offset, rec->length);
5344                 free(rec);
5345         }
5346
5347         return ret;
5348 }
5349
5350 static int process_extent_item(struct btrfs_root *root,
5351                                struct cache_tree *extent_cache,
5352                                struct extent_buffer *eb, int slot)
5353 {
5354         struct btrfs_extent_item *ei;
5355         struct btrfs_extent_inline_ref *iref;
5356         struct btrfs_extent_data_ref *dref;
5357         struct btrfs_shared_data_ref *sref;
5358         struct btrfs_key key;
5359         struct extent_record tmpl;
5360         unsigned long end;
5361         unsigned long ptr;
5362         int type;
5363         u32 item_size = btrfs_item_size_nr(eb, slot);
5364         u64 refs = 0;
5365         u64 offset;
5366         u64 num_bytes;
5367         int metadata = 0;
5368
5369         btrfs_item_key_to_cpu(eb, &key, slot);
5370
5371         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5372                 metadata = 1;
5373                 num_bytes = root->nodesize;
5374         } else {
5375                 num_bytes = key.offset;
5376         }
5377
5378         if (item_size < sizeof(*ei)) {
5379 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5380                 struct btrfs_extent_item_v0 *ei0;
5381                 BUG_ON(item_size != sizeof(*ei0));
5382                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5383                 refs = btrfs_extent_refs_v0(eb, ei0);
5384 #else
5385                 BUG();
5386 #endif
5387                 memset(&tmpl, 0, sizeof(tmpl));
5388                 tmpl.start = key.objectid;
5389                 tmpl.nr = num_bytes;
5390                 tmpl.extent_item_refs = refs;
5391                 tmpl.metadata = metadata;
5392                 tmpl.found_rec = 1;
5393                 tmpl.max_size = num_bytes;
5394
5395                 return add_extent_rec(extent_cache, &tmpl);
5396         }
5397
5398         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5399         refs = btrfs_extent_refs(eb, ei);
5400         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5401                 metadata = 1;
5402         else
5403                 metadata = 0;
5404
5405         memset(&tmpl, 0, sizeof(tmpl));
5406         tmpl.start = key.objectid;
5407         tmpl.nr = num_bytes;
5408         tmpl.extent_item_refs = refs;
5409         tmpl.metadata = metadata;
5410         tmpl.found_rec = 1;
5411         tmpl.max_size = num_bytes;
5412         add_extent_rec(extent_cache, &tmpl);
5413
5414         ptr = (unsigned long)(ei + 1);
5415         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5416             key.type == BTRFS_EXTENT_ITEM_KEY)
5417                 ptr += sizeof(struct btrfs_tree_block_info);
5418
5419         end = (unsigned long)ei + item_size;
5420         while (ptr < end) {
5421                 iref = (struct btrfs_extent_inline_ref *)ptr;
5422                 type = btrfs_extent_inline_ref_type(eb, iref);
5423                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5424                 switch (type) {
5425                 case BTRFS_TREE_BLOCK_REF_KEY:
5426                         add_tree_backref(extent_cache, key.objectid,
5427                                          0, offset, 0);
5428                         break;
5429                 case BTRFS_SHARED_BLOCK_REF_KEY:
5430                         add_tree_backref(extent_cache, key.objectid,
5431                                          offset, 0, 0);
5432                         break;
5433                 case BTRFS_EXTENT_DATA_REF_KEY:
5434                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5435                         add_data_backref(extent_cache, key.objectid, 0,
5436                                         btrfs_extent_data_ref_root(eb, dref),
5437                                         btrfs_extent_data_ref_objectid(eb,
5438                                                                        dref),
5439                                         btrfs_extent_data_ref_offset(eb, dref),
5440                                         btrfs_extent_data_ref_count(eb, dref),
5441                                         0, num_bytes);
5442                         break;
5443                 case BTRFS_SHARED_DATA_REF_KEY:
5444                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5445                         add_data_backref(extent_cache, key.objectid, offset,
5446                                         0, 0, 0,
5447                                         btrfs_shared_data_ref_count(eb, sref),
5448                                         0, num_bytes);
5449                         break;
5450                 default:
5451                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5452                                 key.objectid, key.type, num_bytes);
5453                         goto out;
5454                 }
5455                 ptr += btrfs_extent_inline_ref_size(type);
5456         }
5457         WARN_ON(ptr > end);
5458 out:
5459         return 0;
5460 }
5461
5462 static int check_cache_range(struct btrfs_root *root,
5463                              struct btrfs_block_group_cache *cache,
5464                              u64 offset, u64 bytes)
5465 {
5466         struct btrfs_free_space *entry;
5467         u64 *logical;
5468         u64 bytenr;
5469         int stripe_len;
5470         int i, nr, ret;
5471
5472         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5473                 bytenr = btrfs_sb_offset(i);
5474                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5475                                        cache->key.objectid, bytenr, 0,
5476                                        &logical, &nr, &stripe_len);
5477                 if (ret)
5478                         return ret;
5479
5480                 while (nr--) {
5481                         if (logical[nr] + stripe_len <= offset)
5482                                 continue;
5483                         if (offset + bytes <= logical[nr])
5484                                 continue;
5485                         if (logical[nr] == offset) {
5486                                 if (stripe_len >= bytes) {
5487                                         kfree(logical);
5488                                         return 0;
5489                                 }
5490                                 bytes -= stripe_len;
5491                                 offset += stripe_len;
5492                         } else if (logical[nr] < offset) {
5493                                 if (logical[nr] + stripe_len >=
5494                                     offset + bytes) {
5495                                         kfree(logical);
5496                                         return 0;
5497                                 }
5498                                 bytes = (offset + bytes) -
5499                                         (logical[nr] + stripe_len);
5500                                 offset = logical[nr] + stripe_len;
5501                         } else {
5502                                 /*
5503                                  * Could be tricky, the super may land in the
5504                                  * middle of the area we're checking.  First
5505                                  * check the easiest case, it's at the end.
5506                                  */
5507                                 if (logical[nr] + stripe_len >=
5508                                     bytes + offset) {
5509                                         bytes = logical[nr] - offset;
5510                                         continue;
5511                                 }
5512
5513                                 /* Check the left side */
5514                                 ret = check_cache_range(root, cache,
5515                                                         offset,
5516                                                         logical[nr] - offset);
5517                                 if (ret) {
5518                                         kfree(logical);
5519                                         return ret;
5520                                 }
5521
5522                                 /* Now we continue with the right side */
5523                                 bytes = (offset + bytes) -
5524                                         (logical[nr] + stripe_len);
5525                                 offset = logical[nr] + stripe_len;
5526                         }
5527                 }
5528
5529                 kfree(logical);
5530         }
5531
5532         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5533         if (!entry) {
5534                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5535                         offset, offset+bytes);
5536                 return -EINVAL;
5537         }
5538
5539         if (entry->offset != offset) {
5540                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5541                         entry->offset);
5542                 return -EINVAL;
5543         }
5544
5545         if (entry->bytes != bytes) {
5546                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5547                         bytes, entry->bytes, offset);
5548                 return -EINVAL;
5549         }
5550
5551         unlink_free_space(cache->free_space_ctl, entry);
5552         free(entry);
5553         return 0;
5554 }
5555
5556 static int verify_space_cache(struct btrfs_root *root,
5557                               struct btrfs_block_group_cache *cache)
5558 {
5559         struct btrfs_path *path;
5560         struct extent_buffer *leaf;
5561         struct btrfs_key key;
5562         u64 last;
5563         int ret = 0;
5564
5565         path = btrfs_alloc_path();
5566         if (!path)
5567                 return -ENOMEM;
5568
5569         root = root->fs_info->extent_root;
5570
5571         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5572
5573         key.objectid = last;
5574         key.offset = 0;
5575         key.type = BTRFS_EXTENT_ITEM_KEY;
5576
5577         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5578         if (ret < 0)
5579                 goto out;
5580         ret = 0;
5581         while (1) {
5582                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5583                         ret = btrfs_next_leaf(root, path);
5584                         if (ret < 0)
5585                                 goto out;
5586                         if (ret > 0) {
5587                                 ret = 0;
5588                                 break;
5589                         }
5590                 }
5591                 leaf = path->nodes[0];
5592                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5593                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5594                         break;
5595                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5596                     key.type != BTRFS_METADATA_ITEM_KEY) {
5597                         path->slots[0]++;
5598                         continue;
5599                 }
5600
5601                 if (last == key.objectid) {
5602                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5603                                 last = key.objectid + key.offset;
5604                         else
5605                                 last = key.objectid + root->nodesize;
5606                         path->slots[0]++;
5607                         continue;
5608                 }
5609
5610                 ret = check_cache_range(root, cache, last,
5611                                         key.objectid - last);
5612                 if (ret)
5613                         break;
5614                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5615                         last = key.objectid + key.offset;
5616                 else
5617                         last = key.objectid + root->nodesize;
5618                 path->slots[0]++;
5619         }
5620
5621         if (last < cache->key.objectid + cache->key.offset)
5622                 ret = check_cache_range(root, cache, last,
5623                                         cache->key.objectid +
5624                                         cache->key.offset - last);
5625
5626 out:
5627         btrfs_free_path(path);
5628
5629         if (!ret &&
5630             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5631                 fprintf(stderr, "There are still entries left in the space "
5632                         "cache\n");
5633                 ret = -EINVAL;
5634         }
5635
5636         return ret;
5637 }
5638
5639 static int check_space_cache(struct btrfs_root *root)
5640 {
5641         struct btrfs_block_group_cache *cache;
5642         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5643         int ret;
5644         int error = 0;
5645
5646         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5647             btrfs_super_generation(root->fs_info->super_copy) !=
5648             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5649                 printf("cache and super generation don't match, space cache "
5650                        "will be invalidated\n");
5651                 return 0;
5652         }
5653
5654         if (ctx.progress_enabled) {
5655                 ctx.tp = TASK_FREE_SPACE;
5656                 task_start(ctx.info);
5657         }
5658
5659         while (1) {
5660                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5661                 if (!cache)
5662                         break;
5663
5664                 start = cache->key.objectid + cache->key.offset;
5665                 if (!cache->free_space_ctl) {
5666                         if (btrfs_init_free_space_ctl(cache,
5667                                                       root->sectorsize)) {
5668                                 ret = -ENOMEM;
5669                                 break;
5670                         }
5671                 } else {
5672                         btrfs_remove_free_space_cache(cache);
5673                 }
5674
5675                 if (btrfs_fs_compat_ro(root->fs_info,
5676                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5677                         ret = exclude_super_stripes(root, cache);
5678                         if (ret) {
5679                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5680                                         strerror(-ret));
5681                                 error++;
5682                                 continue;
5683                         }
5684                         ret = load_free_space_tree(root->fs_info, cache);
5685                         free_excluded_extents(root, cache);
5686                         if (ret < 0) {
5687                                 fprintf(stderr, "could not load free space tree: %s\n",
5688                                         strerror(-ret));
5689                                 error++;
5690                                 continue;
5691                         }
5692                         error += ret;
5693                 } else {
5694                         ret = load_free_space_cache(root->fs_info, cache);
5695                         if (!ret)
5696                                 continue;
5697                 }
5698
5699                 ret = verify_space_cache(root, cache);
5700                 if (ret) {
5701                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5702                                 cache->key.objectid);
5703                         error++;
5704                 }
5705         }
5706
5707         task_stop(ctx.info);
5708
5709         return error ? -EINVAL : 0;
5710 }
5711
5712 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5713                         u64 num_bytes, unsigned long leaf_offset,
5714                         struct extent_buffer *eb) {
5715
5716         u64 offset = 0;
5717         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5718         char *data;
5719         unsigned long csum_offset;
5720         u32 csum;
5721         u32 csum_expected;
5722         u64 read_len;
5723         u64 data_checked = 0;
5724         u64 tmp;
5725         int ret = 0;
5726         int mirror;
5727         int num_copies;
5728
5729         if (num_bytes % root->sectorsize)
5730                 return -EINVAL;
5731
5732         data = malloc(num_bytes);
5733         if (!data)
5734                 return -ENOMEM;
5735
5736         while (offset < num_bytes) {
5737                 mirror = 0;
5738 again:
5739                 read_len = num_bytes - offset;
5740                 /* read as much space once a time */
5741                 ret = read_extent_data(root, data + offset,
5742                                 bytenr + offset, &read_len, mirror);
5743                 if (ret)
5744                         goto out;
5745                 data_checked = 0;
5746                 /* verify every 4k data's checksum */
5747                 while (data_checked < read_len) {
5748                         csum = ~(u32)0;
5749                         tmp = offset + data_checked;
5750
5751                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5752                                                csum, root->sectorsize);
5753                         btrfs_csum_final(csum, (char *)&csum);
5754
5755                         csum_offset = leaf_offset +
5756                                  tmp / root->sectorsize * csum_size;
5757                         read_extent_buffer(eb, (char *)&csum_expected,
5758                                            csum_offset, csum_size);
5759                         /* try another mirror */
5760                         if (csum != csum_expected) {
5761                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5762                                                 mirror, bytenr + tmp,
5763                                                 csum, csum_expected);
5764                                 num_copies = btrfs_num_copies(
5765                                                 &root->fs_info->mapping_tree,
5766                                                 bytenr, num_bytes);
5767                                 if (mirror < num_copies - 1) {
5768                                         mirror += 1;
5769                                         goto again;
5770                                 }
5771                         }
5772                         data_checked += root->sectorsize;
5773                 }
5774                 offset += read_len;
5775         }
5776 out:
5777         free(data);
5778         return ret;
5779 }
5780
5781 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5782                                u64 num_bytes)
5783 {
5784         struct btrfs_path *path;
5785         struct extent_buffer *leaf;
5786         struct btrfs_key key;
5787         int ret;
5788
5789         path = btrfs_alloc_path();
5790         if (!path) {
5791                 fprintf(stderr, "Error allocating path\n");
5792                 return -ENOMEM;
5793         }
5794
5795         key.objectid = bytenr;
5796         key.type = BTRFS_EXTENT_ITEM_KEY;
5797         key.offset = (u64)-1;
5798
5799 again:
5800         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5801                                 0, 0);
5802         if (ret < 0) {
5803                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5804                 btrfs_free_path(path);
5805                 return ret;
5806         } else if (ret) {
5807                 if (path->slots[0] > 0) {
5808                         path->slots[0]--;
5809                 } else {
5810                         ret = btrfs_prev_leaf(root, path);
5811                         if (ret < 0) {
5812                                 goto out;
5813                         } else if (ret > 0) {
5814                                 ret = 0;
5815                                 goto out;
5816                         }
5817                 }
5818         }
5819
5820         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5821
5822         /*
5823          * Block group items come before extent items if they have the same
5824          * bytenr, so walk back one more just in case.  Dear future traveller,
5825          * first congrats on mastering time travel.  Now if it's not too much
5826          * trouble could you go back to 2006 and tell Chris to make the
5827          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5828          * EXTENT_ITEM_KEY please?
5829          */
5830         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5831                 if (path->slots[0] > 0) {
5832                         path->slots[0]--;
5833                 } else {
5834                         ret = btrfs_prev_leaf(root, path);
5835                         if (ret < 0) {
5836                                 goto out;
5837                         } else if (ret > 0) {
5838                                 ret = 0;
5839                                 goto out;
5840                         }
5841                 }
5842                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5843         }
5844
5845         while (num_bytes) {
5846                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5847                         ret = btrfs_next_leaf(root, path);
5848                         if (ret < 0) {
5849                                 fprintf(stderr, "Error going to next leaf "
5850                                         "%d\n", ret);
5851                                 btrfs_free_path(path);
5852                                 return ret;
5853                         } else if (ret) {
5854                                 break;
5855                         }
5856                 }
5857                 leaf = path->nodes[0];
5858                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5859                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5860                         path->slots[0]++;
5861                         continue;
5862                 }
5863                 if (key.objectid + key.offset < bytenr) {
5864                         path->slots[0]++;
5865                         continue;
5866                 }
5867                 if (key.objectid > bytenr + num_bytes)
5868                         break;
5869
5870                 if (key.objectid == bytenr) {
5871                         if (key.offset >= num_bytes) {
5872                                 num_bytes = 0;
5873                                 break;
5874                         }
5875                         num_bytes -= key.offset;
5876                         bytenr += key.offset;
5877                 } else if (key.objectid < bytenr) {
5878                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5879                                 num_bytes = 0;
5880                                 break;
5881                         }
5882                         num_bytes = (bytenr + num_bytes) -
5883                                 (key.objectid + key.offset);
5884                         bytenr = key.objectid + key.offset;
5885                 } else {
5886                         if (key.objectid + key.offset < bytenr + num_bytes) {
5887                                 u64 new_start = key.objectid + key.offset;
5888                                 u64 new_bytes = bytenr + num_bytes - new_start;
5889
5890                                 /*
5891                                  * Weird case, the extent is in the middle of
5892                                  * our range, we'll have to search one side
5893                                  * and then the other.  Not sure if this happens
5894                                  * in real life, but no harm in coding it up
5895                                  * anyway just in case.
5896                                  */
5897                                 btrfs_release_path(path);
5898                                 ret = check_extent_exists(root, new_start,
5899                                                           new_bytes);
5900                                 if (ret) {
5901                                         fprintf(stderr, "Right section didn't "
5902                                                 "have a record\n");
5903                                         break;
5904                                 }
5905                                 num_bytes = key.objectid - bytenr;
5906                                 goto again;
5907                         }
5908                         num_bytes = key.objectid - bytenr;
5909                 }
5910                 path->slots[0]++;
5911         }
5912         ret = 0;
5913
5914 out:
5915         if (num_bytes && !ret) {
5916                 fprintf(stderr, "There are no extents for csum range "
5917                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5918                 ret = 1;
5919         }
5920
5921         btrfs_free_path(path);
5922         return ret;
5923 }
5924
5925 static int check_csums(struct btrfs_root *root)
5926 {
5927         struct btrfs_path *path;
5928         struct extent_buffer *leaf;
5929         struct btrfs_key key;
5930         u64 offset = 0, num_bytes = 0;
5931         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5932         int errors = 0;
5933         int ret;
5934         u64 data_len;
5935         unsigned long leaf_offset;
5936
5937         root = root->fs_info->csum_root;
5938         if (!extent_buffer_uptodate(root->node)) {
5939                 fprintf(stderr, "No valid csum tree found\n");
5940                 return -ENOENT;
5941         }
5942
5943         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5944         key.type = BTRFS_EXTENT_CSUM_KEY;
5945         key.offset = 0;
5946
5947         path = btrfs_alloc_path();
5948         if (!path)
5949                 return -ENOMEM;
5950
5951         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5952         if (ret < 0) {
5953                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5954                 btrfs_free_path(path);
5955                 return ret;
5956         }
5957
5958         if (ret > 0 && path->slots[0])
5959                 path->slots[0]--;
5960         ret = 0;
5961
5962         while (1) {
5963                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5964                         ret = btrfs_next_leaf(root, path);
5965                         if (ret < 0) {
5966                                 fprintf(stderr, "Error going to next leaf "
5967                                         "%d\n", ret);
5968                                 break;
5969                         }
5970                         if (ret)
5971                                 break;
5972                 }
5973                 leaf = path->nodes[0];
5974
5975                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5976                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5977                         path->slots[0]++;
5978                         continue;
5979                 }
5980
5981                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5982                               csum_size) * root->sectorsize;
5983                 if (!check_data_csum)
5984                         goto skip_csum_check;
5985                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5986                 ret = check_extent_csums(root, key.offset, data_len,
5987                                          leaf_offset, leaf);
5988                 if (ret)
5989                         break;
5990 skip_csum_check:
5991                 if (!num_bytes) {
5992                         offset = key.offset;
5993                 } else if (key.offset != offset + num_bytes) {
5994                         ret = check_extent_exists(root, offset, num_bytes);
5995                         if (ret) {
5996                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5997                                         "there is no extent record\n",
5998                                         offset, offset+num_bytes);
5999                                 errors++;
6000                         }
6001                         offset = key.offset;
6002                         num_bytes = 0;
6003                 }
6004                 num_bytes += data_len;
6005                 path->slots[0]++;
6006         }
6007
6008         btrfs_free_path(path);
6009         return errors;
6010 }
6011
6012 static int is_dropped_key(struct btrfs_key *key,
6013                           struct btrfs_key *drop_key) {
6014         if (key->objectid < drop_key->objectid)
6015                 return 1;
6016         else if (key->objectid == drop_key->objectid) {
6017                 if (key->type < drop_key->type)
6018                         return 1;
6019                 else if (key->type == drop_key->type) {
6020                         if (key->offset < drop_key->offset)
6021                                 return 1;
6022                 }
6023         }
6024         return 0;
6025 }
6026
6027 /*
6028  * Here are the rules for FULL_BACKREF.
6029  *
6030  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6031  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6032  *      FULL_BACKREF set.
6033  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6034  *    if it happened after the relocation occurred since we'll have dropped the
6035  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6036  *    have no real way to know for sure.
6037  *
6038  * We process the blocks one root at a time, and we start from the lowest root
6039  * objectid and go to the highest.  So we can just lookup the owner backref for
6040  * the record and if we don't find it then we know it doesn't exist and we have
6041  * a FULL BACKREF.
6042  *
6043  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6044  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6045  * be set or not and then we can check later once we've gathered all the refs.
6046  */
6047 static int calc_extent_flag(struct btrfs_root *root,
6048                            struct cache_tree *extent_cache,
6049                            struct extent_buffer *buf,
6050                            struct root_item_record *ri,
6051                            u64 *flags)
6052 {
6053         struct extent_record *rec;
6054         struct cache_extent *cache;
6055         struct tree_backref *tback;
6056         u64 owner = 0;
6057
6058         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6059         /* we have added this extent before */
6060         BUG_ON(!cache);
6061         rec = container_of(cache, struct extent_record, cache);
6062
6063         /*
6064          * Except file/reloc tree, we can not have
6065          * FULL BACKREF MODE
6066          */
6067         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6068                 goto normal;
6069         /*
6070          * root node
6071          */
6072         if (buf->start == ri->bytenr)
6073                 goto normal;
6074
6075         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6076                 goto full_backref;
6077
6078         owner = btrfs_header_owner(buf);
6079         if (owner == ri->objectid)
6080                 goto normal;
6081
6082         tback = find_tree_backref(rec, 0, owner);
6083         if (!tback)
6084                 goto full_backref;
6085 normal:
6086         *flags = 0;
6087         if (rec->flag_block_full_backref != FLAG_UNSET &&
6088             rec->flag_block_full_backref != 0)
6089                 rec->bad_full_backref = 1;
6090         return 0;
6091 full_backref:
6092         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6093         if (rec->flag_block_full_backref != FLAG_UNSET &&
6094             rec->flag_block_full_backref != 1)
6095                 rec->bad_full_backref = 1;
6096         return 0;
6097 }
6098
6099 static int run_next_block(struct btrfs_root *root,
6100                           struct block_info *bits,
6101                           int bits_nr,
6102                           u64 *last,
6103                           struct cache_tree *pending,
6104                           struct cache_tree *seen,
6105                           struct cache_tree *reada,
6106                           struct cache_tree *nodes,
6107                           struct cache_tree *extent_cache,
6108                           struct cache_tree *chunk_cache,
6109                           struct rb_root *dev_cache,
6110                           struct block_group_tree *block_group_cache,
6111                           struct device_extent_tree *dev_extent_cache,
6112                           struct root_item_record *ri)
6113 {
6114         struct extent_buffer *buf;
6115         struct extent_record *rec = NULL;
6116         u64 bytenr;
6117         u32 size;
6118         u64 parent;
6119         u64 owner;
6120         u64 flags;
6121         u64 ptr;
6122         u64 gen = 0;
6123         int ret = 0;
6124         int i;
6125         int nritems;
6126         struct btrfs_key key;
6127         struct cache_extent *cache;
6128         int reada_bits;
6129
6130         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6131                                     bits_nr, &reada_bits);
6132         if (nritems == 0)
6133                 return 1;
6134
6135         if (!reada_bits) {
6136                 for(i = 0; i < nritems; i++) {
6137                         ret = add_cache_extent(reada, bits[i].start,
6138                                                bits[i].size);
6139                         if (ret == -EEXIST)
6140                                 continue;
6141
6142                         /* fixme, get the parent transid */
6143                         readahead_tree_block(root, bits[i].start,
6144                                              bits[i].size, 0);
6145                 }
6146         }
6147         *last = bits[0].start;
6148         bytenr = bits[0].start;
6149         size = bits[0].size;
6150
6151         cache = lookup_cache_extent(pending, bytenr, size);
6152         if (cache) {
6153                 remove_cache_extent(pending, cache);
6154                 free(cache);
6155         }
6156         cache = lookup_cache_extent(reada, bytenr, size);
6157         if (cache) {
6158                 remove_cache_extent(reada, cache);
6159                 free(cache);
6160         }
6161         cache = lookup_cache_extent(nodes, bytenr, size);
6162         if (cache) {
6163                 remove_cache_extent(nodes, cache);
6164                 free(cache);
6165         }
6166         cache = lookup_cache_extent(extent_cache, bytenr, size);
6167         if (cache) {
6168                 rec = container_of(cache, struct extent_record, cache);
6169                 gen = rec->parent_generation;
6170         }
6171
6172         /* fixme, get the real parent transid */
6173         buf = read_tree_block(root, bytenr, size, gen);
6174         if (!extent_buffer_uptodate(buf)) {
6175                 record_bad_block_io(root->fs_info,
6176                                     extent_cache, bytenr, size);
6177                 goto out;
6178         }
6179
6180         nritems = btrfs_header_nritems(buf);
6181
6182         flags = 0;
6183         if (!init_extent_tree) {
6184                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6185                                        btrfs_header_level(buf), 1, NULL,
6186                                        &flags);
6187                 if (ret < 0) {
6188                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6189                         if (ret < 0) {
6190                                 fprintf(stderr, "Couldn't calc extent flags\n");
6191                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6192                         }
6193                 }
6194         } else {
6195                 flags = 0;
6196                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6197                 if (ret < 0) {
6198                         fprintf(stderr, "Couldn't calc extent flags\n");
6199                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6200                 }
6201         }
6202
6203         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6204                 if (ri != NULL &&
6205                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6206                     ri->objectid == btrfs_header_owner(buf)) {
6207                         /*
6208                          * Ok we got to this block from it's original owner and
6209                          * we have FULL_BACKREF set.  Relocation can leave
6210                          * converted blocks over so this is altogether possible,
6211                          * however it's not possible if the generation > the
6212                          * last snapshot, so check for this case.
6213                          */
6214                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6215                             btrfs_header_generation(buf) > ri->last_snapshot) {
6216                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6217                                 rec->bad_full_backref = 1;
6218                         }
6219                 }
6220         } else {
6221                 if (ri != NULL &&
6222                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6223                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6224                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6225                         rec->bad_full_backref = 1;
6226                 }
6227         }
6228
6229         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6230                 rec->flag_block_full_backref = 1;
6231                 parent = bytenr;
6232                 owner = 0;
6233         } else {
6234                 rec->flag_block_full_backref = 0;
6235                 parent = 0;
6236                 owner = btrfs_header_owner(buf);
6237         }
6238
6239         ret = check_block(root, extent_cache, buf, flags);
6240         if (ret)
6241                 goto out;
6242
6243         if (btrfs_is_leaf(buf)) {
6244                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6245                 for (i = 0; i < nritems; i++) {
6246                         struct btrfs_file_extent_item *fi;
6247                         btrfs_item_key_to_cpu(buf, &key, i);
6248                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6249                                 process_extent_item(root, extent_cache, buf,
6250                                                     i);
6251                                 continue;
6252                         }
6253                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6254                                 process_extent_item(root, extent_cache, buf,
6255                                                     i);
6256                                 continue;
6257                         }
6258                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6259                                 total_csum_bytes +=
6260                                         btrfs_item_size_nr(buf, i);
6261                                 continue;
6262                         }
6263                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6264                                 process_chunk_item(chunk_cache, &key, buf, i);
6265                                 continue;
6266                         }
6267                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6268                                 process_device_item(dev_cache, &key, buf, i);
6269                                 continue;
6270                         }
6271                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6272                                 process_block_group_item(block_group_cache,
6273                                         &key, buf, i);
6274                                 continue;
6275                         }
6276                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6277                                 process_device_extent_item(dev_extent_cache,
6278                                         &key, buf, i);
6279                                 continue;
6280
6281                         }
6282                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6283 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6284                                 process_extent_ref_v0(extent_cache, buf, i);
6285 #else
6286                                 BUG();
6287 #endif
6288                                 continue;
6289                         }
6290
6291                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6292                                 add_tree_backref(extent_cache, key.objectid, 0,
6293                                                  key.offset, 0);
6294                                 continue;
6295                         }
6296                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6297                                 add_tree_backref(extent_cache, key.objectid,
6298                                                  key.offset, 0, 0);
6299                                 continue;
6300                         }
6301                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6302                                 struct btrfs_extent_data_ref *ref;
6303                                 ref = btrfs_item_ptr(buf, i,
6304                                                 struct btrfs_extent_data_ref);
6305                                 add_data_backref(extent_cache,
6306                                         key.objectid, 0,
6307                                         btrfs_extent_data_ref_root(buf, ref),
6308                                         btrfs_extent_data_ref_objectid(buf,
6309                                                                        ref),
6310                                         btrfs_extent_data_ref_offset(buf, ref),
6311                                         btrfs_extent_data_ref_count(buf, ref),
6312                                         0, root->sectorsize);
6313                                 continue;
6314                         }
6315                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6316                                 struct btrfs_shared_data_ref *ref;
6317                                 ref = btrfs_item_ptr(buf, i,
6318                                                 struct btrfs_shared_data_ref);
6319                                 add_data_backref(extent_cache,
6320                                         key.objectid, key.offset, 0, 0, 0,
6321                                         btrfs_shared_data_ref_count(buf, ref),
6322                                         0, root->sectorsize);
6323                                 continue;
6324                         }
6325                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6326                                 struct bad_item *bad;
6327
6328                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6329                                         continue;
6330                                 if (!owner)
6331                                         continue;
6332                                 bad = malloc(sizeof(struct bad_item));
6333                                 if (!bad)
6334                                         continue;
6335                                 INIT_LIST_HEAD(&bad->list);
6336                                 memcpy(&bad->key, &key,
6337                                        sizeof(struct btrfs_key));
6338                                 bad->root_id = owner;
6339                                 list_add_tail(&bad->list, &delete_items);
6340                                 continue;
6341                         }
6342                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6343                                 continue;
6344                         fi = btrfs_item_ptr(buf, i,
6345                                             struct btrfs_file_extent_item);
6346                         if (btrfs_file_extent_type(buf, fi) ==
6347                             BTRFS_FILE_EXTENT_INLINE)
6348                                 continue;
6349                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6350                                 continue;
6351
6352                         data_bytes_allocated +=
6353                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6354                         if (data_bytes_allocated < root->sectorsize) {
6355                                 abort();
6356                         }
6357                         data_bytes_referenced +=
6358                                 btrfs_file_extent_num_bytes(buf, fi);
6359                         add_data_backref(extent_cache,
6360                                 btrfs_file_extent_disk_bytenr(buf, fi),
6361                                 parent, owner, key.objectid, key.offset -
6362                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6363                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6364                 }
6365         } else {
6366                 int level;
6367                 struct btrfs_key first_key;
6368
6369                 first_key.objectid = 0;
6370
6371                 if (nritems > 0)
6372                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6373                 level = btrfs_header_level(buf);
6374                 for (i = 0; i < nritems; i++) {
6375                         struct extent_record tmpl;
6376
6377                         ptr = btrfs_node_blockptr(buf, i);
6378                         size = root->nodesize;
6379                         btrfs_node_key_to_cpu(buf, &key, i);
6380                         if (ri != NULL) {
6381                                 if ((level == ri->drop_level)
6382                                     && is_dropped_key(&key, &ri->drop_key)) {
6383                                         continue;
6384                                 }
6385                         }
6386
6387                         memset(&tmpl, 0, sizeof(tmpl));
6388                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6389                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6390                         tmpl.start = ptr;
6391                         tmpl.nr = size;
6392                         tmpl.refs = 1;
6393                         tmpl.metadata = 1;
6394                         tmpl.max_size = size;
6395                         ret = add_extent_rec(extent_cache, &tmpl);
6396                         BUG_ON(ret);
6397
6398                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6399
6400                         if (level > 1) {
6401                                 add_pending(nodes, seen, ptr, size);
6402                         } else {
6403                                 add_pending(pending, seen, ptr, size);
6404                         }
6405                 }
6406                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6407                                       nritems) * sizeof(struct btrfs_key_ptr);
6408         }
6409         total_btree_bytes += buf->len;
6410         if (fs_root_objectid(btrfs_header_owner(buf)))
6411                 total_fs_tree_bytes += buf->len;
6412         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6413                 total_extent_tree_bytes += buf->len;
6414         if (!found_old_backref &&
6415             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6416             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6417             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6418                 found_old_backref = 1;
6419 out:
6420         free_extent_buffer(buf);
6421         return ret;
6422 }
6423
6424 static int add_root_to_pending(struct extent_buffer *buf,
6425                                struct cache_tree *extent_cache,
6426                                struct cache_tree *pending,
6427                                struct cache_tree *seen,
6428                                struct cache_tree *nodes,
6429                                u64 objectid)
6430 {
6431         struct extent_record tmpl;
6432
6433         if (btrfs_header_level(buf) > 0)
6434                 add_pending(nodes, seen, buf->start, buf->len);
6435         else
6436                 add_pending(pending, seen, buf->start, buf->len);
6437
6438         memset(&tmpl, 0, sizeof(tmpl));
6439         tmpl.start = buf->start;
6440         tmpl.nr = buf->len;
6441         tmpl.is_root = 1;
6442         tmpl.refs = 1;
6443         tmpl.metadata = 1;
6444         tmpl.max_size = buf->len;
6445         add_extent_rec(extent_cache, &tmpl);
6446
6447         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6448             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6449                 add_tree_backref(extent_cache, buf->start, buf->start,
6450                                  0, 1);
6451         else
6452                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6453         return 0;
6454 }
6455
6456 /* as we fix the tree, we might be deleting blocks that
6457  * we're tracking for repair.  This hook makes sure we
6458  * remove any backrefs for blocks as we are fixing them.
6459  */
6460 static int free_extent_hook(struct btrfs_trans_handle *trans,
6461                             struct btrfs_root *root,
6462                             u64 bytenr, u64 num_bytes, u64 parent,
6463                             u64 root_objectid, u64 owner, u64 offset,
6464                             int refs_to_drop)
6465 {
6466         struct extent_record *rec;
6467         struct cache_extent *cache;
6468         int is_data;
6469         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6470
6471         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6472         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6473         if (!cache)
6474                 return 0;
6475
6476         rec = container_of(cache, struct extent_record, cache);
6477         if (is_data) {
6478                 struct data_backref *back;
6479                 back = find_data_backref(rec, parent, root_objectid, owner,
6480                                          offset, 1, bytenr, num_bytes);
6481                 if (!back)
6482                         goto out;
6483                 if (back->node.found_ref) {
6484                         back->found_ref -= refs_to_drop;
6485                         if (rec->refs)
6486                                 rec->refs -= refs_to_drop;
6487                 }
6488                 if (back->node.found_extent_tree) {
6489                         back->num_refs -= refs_to_drop;
6490                         if (rec->extent_item_refs)
6491                                 rec->extent_item_refs -= refs_to_drop;
6492                 }
6493                 if (back->found_ref == 0)
6494                         back->node.found_ref = 0;
6495                 if (back->num_refs == 0)
6496                         back->node.found_extent_tree = 0;
6497
6498                 if (!back->node.found_extent_tree && back->node.found_ref) {
6499                         rb_erase(&back->node.node, &rec->backref_tree);
6500                         free(back);
6501                 }
6502         } else {
6503                 struct tree_backref *back;
6504                 back = find_tree_backref(rec, parent, root_objectid);
6505                 if (!back)
6506                         goto out;
6507                 if (back->node.found_ref) {
6508                         if (rec->refs)
6509                                 rec->refs--;
6510                         back->node.found_ref = 0;
6511                 }
6512                 if (back->node.found_extent_tree) {
6513                         if (rec->extent_item_refs)
6514                                 rec->extent_item_refs--;
6515                         back->node.found_extent_tree = 0;
6516                 }
6517                 if (!back->node.found_extent_tree && back->node.found_ref) {
6518                         rb_erase(&back->node.node, &rec->backref_tree);
6519                         free(back);
6520                 }
6521         }
6522         maybe_free_extent_rec(extent_cache, rec);
6523 out:
6524         return 0;
6525 }
6526
6527 static int delete_extent_records(struct btrfs_trans_handle *trans,
6528                                  struct btrfs_root *root,
6529                                  struct btrfs_path *path,
6530                                  u64 bytenr, u64 new_len)
6531 {
6532         struct btrfs_key key;
6533         struct btrfs_key found_key;
6534         struct extent_buffer *leaf;
6535         int ret;
6536         int slot;
6537
6538
6539         key.objectid = bytenr;
6540         key.type = (u8)-1;
6541         key.offset = (u64)-1;
6542
6543         while(1) {
6544                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6545                                         &key, path, 0, 1);
6546                 if (ret < 0)
6547                         break;
6548
6549                 if (ret > 0) {
6550                         ret = 0;
6551                         if (path->slots[0] == 0)
6552                                 break;
6553                         path->slots[0]--;
6554                 }
6555                 ret = 0;
6556
6557                 leaf = path->nodes[0];
6558                 slot = path->slots[0];
6559
6560                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6561                 if (found_key.objectid != bytenr)
6562                         break;
6563
6564                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6565                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6566                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6567                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6568                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6569                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6570                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6571                         btrfs_release_path(path);
6572                         if (found_key.type == 0) {
6573                                 if (found_key.offset == 0)
6574                                         break;
6575                                 key.offset = found_key.offset - 1;
6576                                 key.type = found_key.type;
6577                         }
6578                         key.type = found_key.type - 1;
6579                         key.offset = (u64)-1;
6580                         continue;
6581                 }
6582
6583                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6584                         found_key.objectid, found_key.type, found_key.offset);
6585
6586                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6587                 if (ret)
6588                         break;
6589                 btrfs_release_path(path);
6590
6591                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6592                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6593                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6594                                 found_key.offset : root->nodesize;
6595
6596                         ret = btrfs_update_block_group(trans, root, bytenr,
6597                                                        bytes, 0, 0);
6598                         if (ret)
6599                                 break;
6600                 }
6601         }
6602
6603         btrfs_release_path(path);
6604         return ret;
6605 }
6606
6607 /*
6608  * for a single backref, this will allocate a new extent
6609  * and add the backref to it.
6610  */
6611 static int record_extent(struct btrfs_trans_handle *trans,
6612                          struct btrfs_fs_info *info,
6613                          struct btrfs_path *path,
6614                          struct extent_record *rec,
6615                          struct extent_backref *back,
6616                          int allocated, u64 flags)
6617 {
6618         int ret;
6619         struct btrfs_root *extent_root = info->extent_root;
6620         struct extent_buffer *leaf;
6621         struct btrfs_key ins_key;
6622         struct btrfs_extent_item *ei;
6623         struct tree_backref *tback;
6624         struct data_backref *dback;
6625         struct btrfs_tree_block_info *bi;
6626
6627         if (!back->is_data)
6628                 rec->max_size = max_t(u64, rec->max_size,
6629                                     info->extent_root->nodesize);
6630
6631         if (!allocated) {
6632                 u32 item_size = sizeof(*ei);
6633
6634                 if (!back->is_data)
6635                         item_size += sizeof(*bi);
6636
6637                 ins_key.objectid = rec->start;
6638                 ins_key.offset = rec->max_size;
6639                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6640
6641                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6642                                         &ins_key, item_size);
6643                 if (ret)
6644                         goto fail;
6645
6646                 leaf = path->nodes[0];
6647                 ei = btrfs_item_ptr(leaf, path->slots[0],
6648                                     struct btrfs_extent_item);
6649
6650                 btrfs_set_extent_refs(leaf, ei, 0);
6651                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6652
6653                 if (back->is_data) {
6654                         btrfs_set_extent_flags(leaf, ei,
6655                                                BTRFS_EXTENT_FLAG_DATA);
6656                 } else {
6657                         struct btrfs_disk_key copy_key;;
6658
6659                         tback = to_tree_backref(back);
6660                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6661                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6662                                              sizeof(*bi));
6663
6664                         btrfs_set_disk_key_objectid(&copy_key,
6665                                                     rec->info_objectid);
6666                         btrfs_set_disk_key_type(&copy_key, 0);
6667                         btrfs_set_disk_key_offset(&copy_key, 0);
6668
6669                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6670                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6671
6672                         btrfs_set_extent_flags(leaf, ei,
6673                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6674                 }
6675
6676                 btrfs_mark_buffer_dirty(leaf);
6677                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6678                                                rec->max_size, 1, 0);
6679                 if (ret)
6680                         goto fail;
6681                 btrfs_release_path(path);
6682         }
6683
6684         if (back->is_data) {
6685                 u64 parent;
6686                 int i;
6687
6688                 dback = to_data_backref(back);
6689                 if (back->full_backref)
6690                         parent = dback->parent;
6691                 else
6692                         parent = 0;
6693
6694                 for (i = 0; i < dback->found_ref; i++) {
6695                         /* if parent != 0, we're doing a full backref
6696                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6697                          * just makes the backref allocator create a data
6698                          * backref
6699                          */
6700                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6701                                                    rec->start, rec->max_size,
6702                                                    parent,
6703                                                    dback->root,
6704                                                    parent ?
6705                                                    BTRFS_FIRST_FREE_OBJECTID :
6706                                                    dback->owner,
6707                                                    dback->offset);
6708                         if (ret)
6709                                 break;
6710                 }
6711                 fprintf(stderr, "adding new data backref"
6712                                 " on %llu %s %llu owner %llu"
6713                                 " offset %llu found %d\n",
6714                                 (unsigned long long)rec->start,
6715                                 back->full_backref ?
6716                                 "parent" : "root",
6717                                 back->full_backref ?
6718                                 (unsigned long long)parent :
6719                                 (unsigned long long)dback->root,
6720                                 (unsigned long long)dback->owner,
6721                                 (unsigned long long)dback->offset,
6722                                 dback->found_ref);
6723         } else {
6724                 u64 parent;
6725
6726                 tback = to_tree_backref(back);
6727                 if (back->full_backref)
6728                         parent = tback->parent;
6729                 else
6730                         parent = 0;
6731
6732                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6733                                            rec->start, rec->max_size,
6734                                            parent, tback->root, 0, 0);
6735                 fprintf(stderr, "adding new tree backref on "
6736                         "start %llu len %llu parent %llu root %llu\n",
6737                         rec->start, rec->max_size, parent, tback->root);
6738         }
6739 fail:
6740         btrfs_release_path(path);
6741         return ret;
6742 }
6743
6744 static struct extent_entry *find_entry(struct list_head *entries,
6745                                        u64 bytenr, u64 bytes)
6746 {
6747         struct extent_entry *entry = NULL;
6748
6749         list_for_each_entry(entry, entries, list) {
6750                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6751                         return entry;
6752         }
6753
6754         return NULL;
6755 }
6756
6757 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6758 {
6759         struct extent_entry *entry, *best = NULL, *prev = NULL;
6760
6761         list_for_each_entry(entry, entries, list) {
6762                 if (!prev) {
6763                         prev = entry;
6764                         continue;
6765                 }
6766
6767                 /*
6768                  * If there are as many broken entries as entries then we know
6769                  * not to trust this particular entry.
6770                  */
6771                 if (entry->broken == entry->count)
6772                         continue;
6773
6774                 /*
6775                  * If our current entry == best then we can't be sure our best
6776                  * is really the best, so we need to keep searching.
6777                  */
6778                 if (best && best->count == entry->count) {
6779                         prev = entry;
6780                         best = NULL;
6781                         continue;
6782                 }
6783
6784                 /* Prev == entry, not good enough, have to keep searching */
6785                 if (!prev->broken && prev->count == entry->count)
6786                         continue;
6787
6788                 if (!best)
6789                         best = (prev->count > entry->count) ? prev : entry;
6790                 else if (best->count < entry->count)
6791                         best = entry;
6792                 prev = entry;
6793         }
6794
6795         return best;
6796 }
6797
6798 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6799                       struct data_backref *dback, struct extent_entry *entry)
6800 {
6801         struct btrfs_trans_handle *trans;
6802         struct btrfs_root *root;
6803         struct btrfs_file_extent_item *fi;
6804         struct extent_buffer *leaf;
6805         struct btrfs_key key;
6806         u64 bytenr, bytes;
6807         int ret, err;
6808
6809         key.objectid = dback->root;
6810         key.type = BTRFS_ROOT_ITEM_KEY;
6811         key.offset = (u64)-1;
6812         root = btrfs_read_fs_root(info, &key);
6813         if (IS_ERR(root)) {
6814                 fprintf(stderr, "Couldn't find root for our ref\n");
6815                 return -EINVAL;
6816         }
6817
6818         /*
6819          * The backref points to the original offset of the extent if it was
6820          * split, so we need to search down to the offset we have and then walk
6821          * forward until we find the backref we're looking for.
6822          */
6823         key.objectid = dback->owner;
6824         key.type = BTRFS_EXTENT_DATA_KEY;
6825         key.offset = dback->offset;
6826         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6827         if (ret < 0) {
6828                 fprintf(stderr, "Error looking up ref %d\n", ret);
6829                 return ret;
6830         }
6831
6832         while (1) {
6833                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6834                         ret = btrfs_next_leaf(root, path);
6835                         if (ret) {
6836                                 fprintf(stderr, "Couldn't find our ref, next\n");
6837                                 return -EINVAL;
6838                         }
6839                 }
6840                 leaf = path->nodes[0];
6841                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6842                 if (key.objectid != dback->owner ||
6843                     key.type != BTRFS_EXTENT_DATA_KEY) {
6844                         fprintf(stderr, "Couldn't find our ref, search\n");
6845                         return -EINVAL;
6846                 }
6847                 fi = btrfs_item_ptr(leaf, path->slots[0],
6848                                     struct btrfs_file_extent_item);
6849                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6850                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6851
6852                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6853                         break;
6854                 path->slots[0]++;
6855         }
6856
6857         btrfs_release_path(path);
6858
6859         trans = btrfs_start_transaction(root, 1);
6860         if (IS_ERR(trans))
6861                 return PTR_ERR(trans);
6862
6863         /*
6864          * Ok we have the key of the file extent we want to fix, now we can cow
6865          * down to the thing and fix it.
6866          */
6867         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6868         if (ret < 0) {
6869                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6870                         key.objectid, key.type, key.offset, ret);
6871                 goto out;
6872         }
6873         if (ret > 0) {
6874                 fprintf(stderr, "Well that's odd, we just found this key "
6875                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6876                         key.offset);
6877                 ret = -EINVAL;
6878                 goto out;
6879         }
6880         leaf = path->nodes[0];
6881         fi = btrfs_item_ptr(leaf, path->slots[0],
6882                             struct btrfs_file_extent_item);
6883
6884         if (btrfs_file_extent_compression(leaf, fi) &&
6885             dback->disk_bytenr != entry->bytenr) {
6886                 fprintf(stderr, "Ref doesn't match the record start and is "
6887                         "compressed, please take a btrfs-image of this file "
6888                         "system and send it to a btrfs developer so they can "
6889                         "complete this functionality for bytenr %Lu\n",
6890                         dback->disk_bytenr);
6891                 ret = -EINVAL;
6892                 goto out;
6893         }
6894
6895         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6896                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6897         } else if (dback->disk_bytenr > entry->bytenr) {
6898                 u64 off_diff, offset;
6899
6900                 off_diff = dback->disk_bytenr - entry->bytenr;
6901                 offset = btrfs_file_extent_offset(leaf, fi);
6902                 if (dback->disk_bytenr + offset +
6903                     btrfs_file_extent_num_bytes(leaf, fi) >
6904                     entry->bytenr + entry->bytes) {
6905                         fprintf(stderr, "Ref is past the entry end, please "
6906                                 "take a btrfs-image of this file system and "
6907                                 "send it to a btrfs developer, ref %Lu\n",
6908                                 dback->disk_bytenr);
6909                         ret = -EINVAL;
6910                         goto out;
6911                 }
6912                 offset += off_diff;
6913                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6914                 btrfs_set_file_extent_offset(leaf, fi, offset);
6915         } else if (dback->disk_bytenr < entry->bytenr) {
6916                 u64 offset;
6917
6918                 offset = btrfs_file_extent_offset(leaf, fi);
6919                 if (dback->disk_bytenr + offset < entry->bytenr) {
6920                         fprintf(stderr, "Ref is before the entry start, please"
6921                                 " take a btrfs-image of this file system and "
6922                                 "send it to a btrfs developer, ref %Lu\n",
6923                                 dback->disk_bytenr);
6924                         ret = -EINVAL;
6925                         goto out;
6926                 }
6927
6928                 offset += dback->disk_bytenr;
6929                 offset -= entry->bytenr;
6930                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6931                 btrfs_set_file_extent_offset(leaf, fi, offset);
6932         }
6933
6934         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6935
6936         /*
6937          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6938          * only do this if we aren't using compression, otherwise it's a
6939          * trickier case.
6940          */
6941         if (!btrfs_file_extent_compression(leaf, fi))
6942                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6943         else
6944                 printf("ram bytes may be wrong?\n");
6945         btrfs_mark_buffer_dirty(leaf);
6946 out:
6947         err = btrfs_commit_transaction(trans, root);
6948         btrfs_release_path(path);
6949         return ret ? ret : err;
6950 }
6951
6952 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6953                            struct extent_record *rec)
6954 {
6955         struct extent_backref *back, *tmp;
6956         struct data_backref *dback;
6957         struct extent_entry *entry, *best = NULL;
6958         LIST_HEAD(entries);
6959         int nr_entries = 0;
6960         int broken_entries = 0;
6961         int ret = 0;
6962         short mismatch = 0;
6963
6964         /*
6965          * Metadata is easy and the backrefs should always agree on bytenr and
6966          * size, if not we've got bigger issues.
6967          */
6968         if (rec->metadata)
6969                 return 0;
6970
6971         rbtree_postorder_for_each_entry_safe(back, tmp,
6972                                              &rec->backref_tree, node) {
6973                 if (back->full_backref || !back->is_data)
6974                         continue;
6975
6976                 dback = to_data_backref(back);
6977
6978                 /*
6979                  * We only pay attention to backrefs that we found a real
6980                  * backref for.
6981                  */
6982                 if (dback->found_ref == 0)
6983                         continue;
6984
6985                 /*
6986                  * For now we only catch when the bytes don't match, not the
6987                  * bytenr.  We can easily do this at the same time, but I want
6988                  * to have a fs image to test on before we just add repair
6989                  * functionality willy-nilly so we know we won't screw up the
6990                  * repair.
6991                  */
6992
6993                 entry = find_entry(&entries, dback->disk_bytenr,
6994                                    dback->bytes);
6995                 if (!entry) {
6996                         entry = malloc(sizeof(struct extent_entry));
6997                         if (!entry) {
6998                                 ret = -ENOMEM;
6999                                 goto out;
7000                         }
7001                         memset(entry, 0, sizeof(*entry));
7002                         entry->bytenr = dback->disk_bytenr;
7003                         entry->bytes = dback->bytes;
7004                         list_add_tail(&entry->list, &entries);
7005                         nr_entries++;
7006                 }
7007
7008                 /*
7009                  * If we only have on entry we may think the entries agree when
7010                  * in reality they don't so we have to do some extra checking.
7011                  */
7012                 if (dback->disk_bytenr != rec->start ||
7013                     dback->bytes != rec->nr || back->broken)
7014                         mismatch = 1;
7015
7016                 if (back->broken) {
7017                         entry->broken++;
7018                         broken_entries++;
7019                 }
7020
7021                 entry->count++;
7022         }
7023
7024         /* Yay all the backrefs agree, carry on good sir */
7025         if (nr_entries <= 1 && !mismatch)
7026                 goto out;
7027
7028         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7029                 "%Lu\n", rec->start);
7030
7031         /*
7032          * First we want to see if the backrefs can agree amongst themselves who
7033          * is right, so figure out which one of the entries has the highest
7034          * count.
7035          */
7036         best = find_most_right_entry(&entries);
7037
7038         /*
7039          * Ok so we may have an even split between what the backrefs think, so
7040          * this is where we use the extent ref to see what it thinks.
7041          */
7042         if (!best) {
7043                 entry = find_entry(&entries, rec->start, rec->nr);
7044                 if (!entry && (!broken_entries || !rec->found_rec)) {
7045                         fprintf(stderr, "Backrefs don't agree with each other "
7046                                 "and extent record doesn't agree with anybody,"
7047                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7048                                 rec->start, rec->nr);
7049                         ret = -EINVAL;
7050                         goto out;
7051                 } else if (!entry) {
7052                         /*
7053                          * Ok our backrefs were broken, we'll assume this is the
7054                          * correct value and add an entry for this range.
7055                          */
7056                         entry = malloc(sizeof(struct extent_entry));
7057                         if (!entry) {
7058                                 ret = -ENOMEM;
7059                                 goto out;
7060                         }
7061                         memset(entry, 0, sizeof(*entry));
7062                         entry->bytenr = rec->start;
7063                         entry->bytes = rec->nr;
7064                         list_add_tail(&entry->list, &entries);
7065                         nr_entries++;
7066                 }
7067                 entry->count++;
7068                 best = find_most_right_entry(&entries);
7069                 if (!best) {
7070                         fprintf(stderr, "Backrefs and extent record evenly "
7071                                 "split on who is right, this is going to "
7072                                 "require user input to fix bytenr %Lu bytes "
7073                                 "%Lu\n", rec->start, rec->nr);
7074                         ret = -EINVAL;
7075                         goto out;
7076                 }
7077         }
7078
7079         /*
7080          * I don't think this can happen currently as we'll abort() if we catch
7081          * this case higher up, but in case somebody removes that we still can't
7082          * deal with it properly here yet, so just bail out of that's the case.
7083          */
7084         if (best->bytenr != rec->start) {
7085                 fprintf(stderr, "Extent start and backref starts don't match, "
7086                         "please use btrfs-image on this file system and send "
7087                         "it to a btrfs developer so they can make fsck fix "
7088                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7089                         rec->start, rec->nr);
7090                 ret = -EINVAL;
7091                 goto out;
7092         }
7093
7094         /*
7095          * Ok great we all agreed on an extent record, let's go find the real
7096          * references and fix up the ones that don't match.
7097          */
7098         rbtree_postorder_for_each_entry_safe(back, tmp,
7099                                              &rec->backref_tree, node) {
7100                 if (back->full_backref || !back->is_data)
7101                         continue;
7102
7103                 dback = to_data_backref(back);
7104
7105                 /*
7106                  * Still ignoring backrefs that don't have a real ref attached
7107                  * to them.
7108                  */
7109                 if (dback->found_ref == 0)
7110                         continue;
7111
7112                 if (dback->bytes == best->bytes &&
7113                     dback->disk_bytenr == best->bytenr)
7114                         continue;
7115
7116                 ret = repair_ref(info, path, dback, best);
7117                 if (ret)
7118                         goto out;
7119         }
7120
7121         /*
7122          * Ok we messed with the actual refs, which means we need to drop our
7123          * entire cache and go back and rescan.  I know this is a huge pain and
7124          * adds a lot of extra work, but it's the only way to be safe.  Once all
7125          * the backrefs agree we may not need to do anything to the extent
7126          * record itself.
7127          */
7128         ret = -EAGAIN;
7129 out:
7130         while (!list_empty(&entries)) {
7131                 entry = list_entry(entries.next, struct extent_entry, list);
7132                 list_del_init(&entry->list);
7133                 free(entry);
7134         }
7135         return ret;
7136 }
7137
7138 static int process_duplicates(struct btrfs_root *root,
7139                               struct cache_tree *extent_cache,
7140                               struct extent_record *rec)
7141 {
7142         struct extent_record *good, *tmp;
7143         struct cache_extent *cache;
7144         int ret;
7145
7146         /*
7147          * If we found a extent record for this extent then return, or if we
7148          * have more than one duplicate we are likely going to need to delete
7149          * something.
7150          */
7151         if (rec->found_rec || rec->num_duplicates > 1)
7152                 return 0;
7153
7154         /* Shouldn't happen but just in case */
7155         BUG_ON(!rec->num_duplicates);
7156
7157         /*
7158          * So this happens if we end up with a backref that doesn't match the
7159          * actual extent entry.  So either the backref is bad or the extent
7160          * entry is bad.  Either way we want to have the extent_record actually
7161          * reflect what we found in the extent_tree, so we need to take the
7162          * duplicate out and use that as the extent_record since the only way we
7163          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7164          */
7165         remove_cache_extent(extent_cache, &rec->cache);
7166
7167         good = to_extent_record(rec->dups.next);
7168         list_del_init(&good->list);
7169         INIT_LIST_HEAD(&good->backrefs);
7170         INIT_LIST_HEAD(&good->dups);
7171         good->cache.start = good->start;
7172         good->cache.size = good->nr;
7173         good->content_checked = 0;
7174         good->owner_ref_checked = 0;
7175         good->num_duplicates = 0;
7176         good->refs = rec->refs;
7177         list_splice_init(&rec->backrefs, &good->backrefs);
7178         while (1) {
7179                 cache = lookup_cache_extent(extent_cache, good->start,
7180                                             good->nr);
7181                 if (!cache)
7182                         break;
7183                 tmp = container_of(cache, struct extent_record, cache);
7184
7185                 /*
7186                  * If we find another overlapping extent and it's found_rec is
7187                  * set then it's a duplicate and we need to try and delete
7188                  * something.
7189                  */
7190                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7191                         if (list_empty(&good->list))
7192                                 list_add_tail(&good->list,
7193                                               &duplicate_extents);
7194                         good->num_duplicates += tmp->num_duplicates + 1;
7195                         list_splice_init(&tmp->dups, &good->dups);
7196                         list_del_init(&tmp->list);
7197                         list_add_tail(&tmp->list, &good->dups);
7198                         remove_cache_extent(extent_cache, &tmp->cache);
7199                         continue;
7200                 }
7201
7202                 /*
7203                  * Ok we have another non extent item backed extent rec, so lets
7204                  * just add it to this extent and carry on like we did above.
7205                  */
7206                 good->refs += tmp->refs;
7207                 list_splice_init(&tmp->backrefs, &good->backrefs);
7208                 remove_cache_extent(extent_cache, &tmp->cache);
7209                 free(tmp);
7210         }
7211         ret = insert_cache_extent(extent_cache, &good->cache);
7212         BUG_ON(ret);
7213         free(rec);
7214         return good->num_duplicates ? 0 : 1;
7215 }
7216
7217 static int delete_duplicate_records(struct btrfs_root *root,
7218                                     struct extent_record *rec)
7219 {
7220         struct btrfs_trans_handle *trans;
7221         LIST_HEAD(delete_list);
7222         struct btrfs_path *path;
7223         struct extent_record *tmp, *good, *n;
7224         int nr_del = 0;
7225         int ret = 0, err;
7226         struct btrfs_key key;
7227
7228         path = btrfs_alloc_path();
7229         if (!path) {
7230                 ret = -ENOMEM;
7231                 goto out;
7232         }
7233
7234         good = rec;
7235         /* Find the record that covers all of the duplicates. */
7236         list_for_each_entry(tmp, &rec->dups, list) {
7237                 if (good->start < tmp->start)
7238                         continue;
7239                 if (good->nr > tmp->nr)
7240                         continue;
7241
7242                 if (tmp->start + tmp->nr < good->start + good->nr) {
7243                         fprintf(stderr, "Ok we have overlapping extents that "
7244                                 "aren't completely covered by each other, this "
7245                                 "is going to require more careful thought.  "
7246                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7247                                 tmp->start, tmp->nr, good->start, good->nr);
7248                         abort();
7249                 }
7250                 good = tmp;
7251         }
7252
7253         if (good != rec)
7254                 list_add_tail(&rec->list, &delete_list);
7255
7256         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7257                 if (tmp == good)
7258                         continue;
7259                 list_move_tail(&tmp->list, &delete_list);
7260         }
7261
7262         root = root->fs_info->extent_root;
7263         trans = btrfs_start_transaction(root, 1);
7264         if (IS_ERR(trans)) {
7265                 ret = PTR_ERR(trans);
7266                 goto out;
7267         }
7268
7269         list_for_each_entry(tmp, &delete_list, list) {
7270                 if (tmp->found_rec == 0)
7271                         continue;
7272                 key.objectid = tmp->start;
7273                 key.type = BTRFS_EXTENT_ITEM_KEY;
7274                 key.offset = tmp->nr;
7275
7276                 /* Shouldn't happen but just in case */
7277                 if (tmp->metadata) {
7278                         fprintf(stderr, "Well this shouldn't happen, extent "
7279                                 "record overlaps but is metadata? "
7280                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7281                         abort();
7282                 }
7283
7284                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7285                 if (ret) {
7286                         if (ret > 0)
7287                                 ret = -EINVAL;
7288                         break;
7289                 }
7290                 ret = btrfs_del_item(trans, root, path);
7291                 if (ret)
7292                         break;
7293                 btrfs_release_path(path);
7294                 nr_del++;
7295         }
7296         err = btrfs_commit_transaction(trans, root);
7297         if (err && !ret)
7298                 ret = err;
7299 out:
7300         while (!list_empty(&delete_list)) {
7301                 tmp = to_extent_record(delete_list.next);
7302                 list_del_init(&tmp->list);
7303                 if (tmp == rec)
7304                         continue;
7305                 free(tmp);
7306         }
7307
7308         while (!list_empty(&rec->dups)) {
7309                 tmp = to_extent_record(rec->dups.next);
7310                 list_del_init(&tmp->list);
7311                 free(tmp);
7312         }
7313
7314         btrfs_free_path(path);
7315
7316         if (!ret && !nr_del)
7317                 rec->num_duplicates = 0;
7318
7319         return ret ? ret : nr_del;
7320 }
7321
7322 static int find_possible_backrefs(struct btrfs_fs_info *info,
7323                                   struct btrfs_path *path,
7324                                   struct cache_tree *extent_cache,
7325                                   struct extent_record *rec)
7326 {
7327         struct btrfs_root *root;
7328         struct extent_backref *back, *tmp;
7329         struct data_backref *dback;
7330         struct cache_extent *cache;
7331         struct btrfs_file_extent_item *fi;
7332         struct btrfs_key key;
7333         u64 bytenr, bytes;
7334         int ret;
7335
7336         rbtree_postorder_for_each_entry_safe(back, tmp,
7337                                              &rec->backref_tree, node) {
7338                 /* Don't care about full backrefs (poor unloved backrefs) */
7339                 if (back->full_backref || !back->is_data)
7340                         continue;
7341
7342                 dback = to_data_backref(back);
7343
7344                 /* We found this one, we don't need to do a lookup */
7345                 if (dback->found_ref)
7346                         continue;
7347
7348                 key.objectid = dback->root;
7349                 key.type = BTRFS_ROOT_ITEM_KEY;
7350                 key.offset = (u64)-1;
7351
7352                 root = btrfs_read_fs_root(info, &key);
7353
7354                 /* No root, definitely a bad ref, skip */
7355                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7356                         continue;
7357                 /* Other err, exit */
7358                 if (IS_ERR(root))
7359                         return PTR_ERR(root);
7360
7361                 key.objectid = dback->owner;
7362                 key.type = BTRFS_EXTENT_DATA_KEY;
7363                 key.offset = dback->offset;
7364                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7365                 if (ret) {
7366                         btrfs_release_path(path);
7367                         if (ret < 0)
7368                                 return ret;
7369                         /* Didn't find it, we can carry on */
7370                         ret = 0;
7371                         continue;
7372                 }
7373
7374                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7375                                     struct btrfs_file_extent_item);
7376                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7377                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7378                 btrfs_release_path(path);
7379                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7380                 if (cache) {
7381                         struct extent_record *tmp;
7382                         tmp = container_of(cache, struct extent_record, cache);
7383
7384                         /*
7385                          * If we found an extent record for the bytenr for this
7386                          * particular backref then we can't add it to our
7387                          * current extent record.  We only want to add backrefs
7388                          * that don't have a corresponding extent item in the
7389                          * extent tree since they likely belong to this record
7390                          * and we need to fix it if it doesn't match bytenrs.
7391                          */
7392                         if  (tmp->found_rec)
7393                                 continue;
7394                 }
7395
7396                 dback->found_ref += 1;
7397                 dback->disk_bytenr = bytenr;
7398                 dback->bytes = bytes;
7399
7400                 /*
7401                  * Set this so the verify backref code knows not to trust the
7402                  * values in this backref.
7403                  */
7404                 back->broken = 1;
7405         }
7406
7407         return 0;
7408 }
7409
7410 /*
7411  * Record orphan data ref into corresponding root.
7412  *
7413  * Return 0 if the extent item contains data ref and recorded.
7414  * Return 1 if the extent item contains no useful data ref
7415  *   On that case, it may contains only shared_dataref or metadata backref
7416  *   or the file extent exists(this should be handled by the extent bytenr
7417  *   recovery routine)
7418  * Return <0 if something goes wrong.
7419  */
7420 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7421                                       struct extent_record *rec)
7422 {
7423         struct btrfs_key key;
7424         struct btrfs_root *dest_root;
7425         struct extent_backref *back, *tmp;
7426         struct data_backref *dback;
7427         struct orphan_data_extent *orphan;
7428         struct btrfs_path *path;
7429         int recorded_data_ref = 0;
7430         int ret = 0;
7431
7432         if (rec->metadata)
7433                 return 1;
7434         path = btrfs_alloc_path();
7435         if (!path)
7436                 return -ENOMEM;
7437         rbtree_postorder_for_each_entry_safe(back, tmp,
7438                                              &rec->backref_tree, node) {
7439                 if (back->full_backref || !back->is_data ||
7440                     !back->found_extent_tree)
7441                         continue;
7442                 dback = to_data_backref(back);
7443                 if (dback->found_ref)
7444                         continue;
7445                 key.objectid = dback->root;
7446                 key.type = BTRFS_ROOT_ITEM_KEY;
7447                 key.offset = (u64)-1;
7448
7449                 dest_root = btrfs_read_fs_root(fs_info, &key);
7450
7451                 /* For non-exist root we just skip it */
7452                 if (IS_ERR(dest_root) || !dest_root)
7453                         continue;
7454
7455                 key.objectid = dback->owner;
7456                 key.type = BTRFS_EXTENT_DATA_KEY;
7457                 key.offset = dback->offset;
7458
7459                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7460                 /*
7461                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7462                  * we need to record it for inode/file extent rebuild.
7463                  * For ret > 0, we record it only for file extent rebuild.
7464                  * For ret == 0, the file extent exists but only bytenr
7465                  * mismatch, let the original bytenr fix routine to handle,
7466                  * don't record it.
7467                  */
7468                 if (ret == 0)
7469                         continue;
7470                 ret = 0;
7471                 orphan = malloc(sizeof(*orphan));
7472                 if (!orphan) {
7473                         ret = -ENOMEM;
7474                         goto out;
7475                 }
7476                 INIT_LIST_HEAD(&orphan->list);
7477                 orphan->root = dback->root;
7478                 orphan->objectid = dback->owner;
7479                 orphan->offset = dback->offset;
7480                 orphan->disk_bytenr = rec->cache.start;
7481                 orphan->disk_len = rec->cache.size;
7482                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7483                 recorded_data_ref = 1;
7484         }
7485 out:
7486         btrfs_free_path(path);
7487         if (!ret)
7488                 return !recorded_data_ref;
7489         else
7490                 return ret;
7491 }
7492
7493 /*
7494  * when an incorrect extent item is found, this will delete
7495  * all of the existing entries for it and recreate them
7496  * based on what the tree scan found.
7497  */
7498 static int fixup_extent_refs(struct btrfs_fs_info *info,
7499                              struct cache_tree *extent_cache,
7500                              struct extent_record *rec)
7501 {
7502         struct btrfs_trans_handle *trans = NULL;
7503         int ret;
7504         struct btrfs_path *path;
7505         struct cache_extent *cache;
7506         struct extent_backref *back, *tmp;
7507         int allocated = 0;
7508         u64 flags = 0;
7509
7510         if (rec->flag_block_full_backref)
7511                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7512
7513         path = btrfs_alloc_path();
7514         if (!path)
7515                 return -ENOMEM;
7516
7517         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7518                 /*
7519                  * Sometimes the backrefs themselves are so broken they don't
7520                  * get attached to any meaningful rec, so first go back and
7521                  * check any of our backrefs that we couldn't find and throw
7522                  * them into the list if we find the backref so that
7523                  * verify_backrefs can figure out what to do.
7524                  */
7525                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7526                 if (ret < 0)
7527                         goto out;
7528         }
7529
7530         /* step one, make sure all of the backrefs agree */
7531         ret = verify_backrefs(info, path, rec);
7532         if (ret < 0)
7533                 goto out;
7534
7535         trans = btrfs_start_transaction(info->extent_root, 1);
7536         if (IS_ERR(trans)) {
7537                 ret = PTR_ERR(trans);
7538                 goto out;
7539         }
7540
7541         /* step two, delete all the existing records */
7542         ret = delete_extent_records(trans, info->extent_root, path,
7543                                     rec->start, rec->max_size);
7544
7545         if (ret < 0)
7546                 goto out;
7547
7548         /* was this block corrupt?  If so, don't add references to it */
7549         cache = lookup_cache_extent(info->corrupt_blocks,
7550                                     rec->start, rec->max_size);
7551         if (cache) {
7552                 ret = 0;
7553                 goto out;
7554         }
7555
7556         /* step three, recreate all the refs we did find */
7557         rbtree_postorder_for_each_entry_safe(back, tmp,
7558                                              &rec->backref_tree, node) {
7559                 /*
7560                  * if we didn't find any references, don't create a
7561                  * new extent record
7562                  */
7563                 if (!back->found_ref)
7564                         continue;
7565
7566                 rec->bad_full_backref = 0;
7567                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7568                 allocated = 1;
7569
7570                 if (ret)
7571                         goto out;
7572         }
7573 out:
7574         if (trans) {
7575                 int err = btrfs_commit_transaction(trans, info->extent_root);
7576                 if (!ret)
7577                         ret = err;
7578         }
7579
7580         btrfs_free_path(path);
7581         return ret;
7582 }
7583
7584 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7585                               struct extent_record *rec)
7586 {
7587         struct btrfs_trans_handle *trans;
7588         struct btrfs_root *root = fs_info->extent_root;
7589         struct btrfs_path *path;
7590         struct btrfs_extent_item *ei;
7591         struct btrfs_key key;
7592         u64 flags;
7593         int ret = 0;
7594
7595         key.objectid = rec->start;
7596         if (rec->metadata) {
7597                 key.type = BTRFS_METADATA_ITEM_KEY;
7598                 key.offset = rec->info_level;
7599         } else {
7600                 key.type = BTRFS_EXTENT_ITEM_KEY;
7601                 key.offset = rec->max_size;
7602         }
7603
7604         path = btrfs_alloc_path();
7605         if (!path)
7606                 return -ENOMEM;
7607
7608         trans = btrfs_start_transaction(root, 0);
7609         if (IS_ERR(trans)) {
7610                 btrfs_free_path(path);
7611                 return PTR_ERR(trans);
7612         }
7613
7614         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7615         if (ret < 0) {
7616                 btrfs_free_path(path);
7617                 btrfs_commit_transaction(trans, root);
7618                 return ret;
7619         } else if (ret) {
7620                 fprintf(stderr, "Didn't find extent for %llu\n",
7621                         (unsigned long long)rec->start);
7622                 btrfs_free_path(path);
7623                 btrfs_commit_transaction(trans, root);
7624                 return -ENOENT;
7625         }
7626
7627         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7628                             struct btrfs_extent_item);
7629         flags = btrfs_extent_flags(path->nodes[0], ei);
7630         if (rec->flag_block_full_backref) {
7631                 fprintf(stderr, "setting full backref on %llu\n",
7632                         (unsigned long long)key.objectid);
7633                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7634         } else {
7635                 fprintf(stderr, "clearing full backref on %llu\n",
7636                         (unsigned long long)key.objectid);
7637                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7638         }
7639         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7640         btrfs_mark_buffer_dirty(path->nodes[0]);
7641         btrfs_free_path(path);
7642         return btrfs_commit_transaction(trans, root);
7643 }
7644
7645 /* right now we only prune from the extent allocation tree */
7646 static int prune_one_block(struct btrfs_trans_handle *trans,
7647                            struct btrfs_fs_info *info,
7648                            struct btrfs_corrupt_block *corrupt)
7649 {
7650         int ret;
7651         struct btrfs_path path;
7652         struct extent_buffer *eb;
7653         u64 found;
7654         int slot;
7655         int nritems;
7656         int level = corrupt->level + 1;
7657
7658         btrfs_init_path(&path);
7659 again:
7660         /* we want to stop at the parent to our busted block */
7661         path.lowest_level = level;
7662
7663         ret = btrfs_search_slot(trans, info->extent_root,
7664                                 &corrupt->key, &path, -1, 1);
7665
7666         if (ret < 0)
7667                 goto out;
7668
7669         eb = path.nodes[level];
7670         if (!eb) {
7671                 ret = -ENOENT;
7672                 goto out;
7673         }
7674
7675         /*
7676          * hopefully the search gave us the block we want to prune,
7677          * lets try that first
7678          */
7679         slot = path.slots[level];
7680         found =  btrfs_node_blockptr(eb, slot);
7681         if (found == corrupt->cache.start)
7682                 goto del_ptr;
7683
7684         nritems = btrfs_header_nritems(eb);
7685
7686         /* the search failed, lets scan this node and hope we find it */
7687         for (slot = 0; slot < nritems; slot++) {
7688                 found =  btrfs_node_blockptr(eb, slot);
7689                 if (found == corrupt->cache.start)
7690                         goto del_ptr;
7691         }
7692         /*
7693          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7694          * to this block
7695          */
7696         if (eb == info->extent_root->node) {
7697                 ret = -ENOENT;
7698                 goto out;
7699         } else {
7700                 level++;
7701                 btrfs_release_path(&path);
7702                 goto again;
7703         }
7704
7705 del_ptr:
7706         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7707         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7708
7709 out:
7710         btrfs_release_path(&path);
7711         return ret;
7712 }
7713
7714 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7715 {
7716         struct btrfs_trans_handle *trans = NULL;
7717         struct cache_extent *cache;
7718         struct btrfs_corrupt_block *corrupt;
7719
7720         while (1) {
7721                 cache = search_cache_extent(info->corrupt_blocks, 0);
7722                 if (!cache)
7723                         break;
7724                 if (!trans) {
7725                         trans = btrfs_start_transaction(info->extent_root, 1);
7726                         if (IS_ERR(trans))
7727                                 return PTR_ERR(trans);
7728                 }
7729                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7730                 prune_one_block(trans, info, corrupt);
7731                 remove_cache_extent(info->corrupt_blocks, cache);
7732         }
7733         if (trans)
7734                 return btrfs_commit_transaction(trans, info->extent_root);
7735         return 0;
7736 }
7737
7738 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7739 {
7740         struct btrfs_block_group_cache *cache;
7741         u64 start, end;
7742         int ret;
7743
7744         while (1) {
7745                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7746                                             &start, &end, EXTENT_DIRTY);
7747                 if (ret)
7748                         break;
7749                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7750                                    GFP_NOFS);
7751         }
7752
7753         start = 0;
7754         while (1) {
7755                 cache = btrfs_lookup_first_block_group(fs_info, start);
7756                 if (!cache)
7757                         break;
7758                 if (cache->cached)
7759                         cache->cached = 0;
7760                 start = cache->key.objectid + cache->key.offset;
7761         }
7762 }
7763
7764 static int check_extent_refs(struct btrfs_root *root,
7765                              struct cache_tree *extent_cache)
7766 {
7767         struct extent_record *rec;
7768         struct cache_extent *cache;
7769         int err = 0;
7770         int ret = 0;
7771         int fixed = 0;
7772         int had_dups = 0;
7773         int recorded = 0;
7774
7775         if (repair) {
7776                 /*
7777                  * if we're doing a repair, we have to make sure
7778                  * we don't allocate from the problem extents.
7779                  * In the worst case, this will be all the
7780                  * extents in the FS
7781                  */
7782                 cache = search_cache_extent(extent_cache, 0);
7783                 while(cache) {
7784                         rec = container_of(cache, struct extent_record, cache);
7785                         set_extent_dirty(root->fs_info->excluded_extents,
7786                                          rec->start,
7787                                          rec->start + rec->max_size - 1,
7788                                          GFP_NOFS);
7789                         cache = next_cache_extent(cache);
7790                 }
7791
7792                 /* pin down all the corrupted blocks too */
7793                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7794                 while(cache) {
7795                         set_extent_dirty(root->fs_info->excluded_extents,
7796                                          cache->start,
7797                                          cache->start + cache->size - 1,
7798                                          GFP_NOFS);
7799                         cache = next_cache_extent(cache);
7800                 }
7801                 prune_corrupt_blocks(root->fs_info);
7802                 reset_cached_block_groups(root->fs_info);
7803         }
7804
7805         reset_cached_block_groups(root->fs_info);
7806
7807         /*
7808          * We need to delete any duplicate entries we find first otherwise we
7809          * could mess up the extent tree when we have backrefs that actually
7810          * belong to a different extent item and not the weird duplicate one.
7811          */
7812         while (repair && !list_empty(&duplicate_extents)) {
7813                 rec = to_extent_record(duplicate_extents.next);
7814                 list_del_init(&rec->list);
7815
7816                 /* Sometimes we can find a backref before we find an actual
7817                  * extent, so we need to process it a little bit to see if there
7818                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7819                  * if this is a backref screwup.  If we need to delete stuff
7820                  * process_duplicates() will return 0, otherwise it will return
7821                  * 1 and we
7822                  */
7823                 if (process_duplicates(root, extent_cache, rec))
7824                         continue;
7825                 ret = delete_duplicate_records(root, rec);
7826                 if (ret < 0)
7827                         return ret;
7828                 /*
7829                  * delete_duplicate_records will return the number of entries
7830                  * deleted, so if it's greater than 0 then we know we actually
7831                  * did something and we need to remove.
7832                  */
7833                 if (ret)
7834                         had_dups = 1;
7835         }
7836
7837         if (had_dups)
7838                 return -EAGAIN;
7839
7840         while(1) {
7841                 int cur_err = 0;
7842
7843                 fixed = 0;
7844                 recorded = 0;
7845                 cache = search_cache_extent(extent_cache, 0);
7846                 if (!cache)
7847                         break;
7848                 rec = container_of(cache, struct extent_record, cache);
7849                 if (rec->num_duplicates) {
7850                         fprintf(stderr, "extent item %llu has multiple extent "
7851                                 "items\n", (unsigned long long)rec->start);
7852                         err = 1;
7853                         cur_err = 1;
7854                 }
7855
7856                 if (rec->refs != rec->extent_item_refs) {
7857                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7858                                 (unsigned long long)rec->start,
7859                                 (unsigned long long)rec->nr);
7860                         fprintf(stderr, "extent item %llu, found %llu\n",
7861                                 (unsigned long long)rec->extent_item_refs,
7862                                 (unsigned long long)rec->refs);
7863                         ret = record_orphan_data_extents(root->fs_info, rec);
7864                         if (ret < 0)
7865                                 goto repair_abort;
7866                         if (ret == 0) {
7867                                 recorded = 1;
7868                         } else {
7869                                 /*
7870                                  * we can't use the extent to repair file
7871                                  * extent, let the fallback method handle it.
7872                                  */
7873                                 if (!fixed && repair) {
7874                                         ret = fixup_extent_refs(
7875                                                         root->fs_info,
7876                                                         extent_cache, rec);
7877                                         if (ret)
7878                                                 goto repair_abort;
7879                                         fixed = 1;
7880                                 }
7881                         }
7882                         err = 1;
7883                         cur_err = 1;
7884                 }
7885                 if (all_backpointers_checked(rec, 1)) {
7886                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7887                                 (unsigned long long)rec->start,
7888                                 (unsigned long long)rec->nr);
7889
7890                         if (!fixed && !recorded && repair) {
7891                                 ret = fixup_extent_refs(root->fs_info,
7892                                                         extent_cache, rec);
7893                                 if (ret)
7894                                         goto repair_abort;
7895                                 fixed = 1;
7896                         }
7897                         cur_err = 1;
7898                         err = 1;
7899                 }
7900                 if (!rec->owner_ref_checked) {
7901                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7902                                 (unsigned long long)rec->start,
7903                                 (unsigned long long)rec->nr);
7904                         if (!fixed && !recorded && repair) {
7905                                 ret = fixup_extent_refs(root->fs_info,
7906                                                         extent_cache, rec);
7907                                 if (ret)
7908                                         goto repair_abort;
7909                                 fixed = 1;
7910                         }
7911                         err = 1;
7912                         cur_err = 1;
7913                 }
7914                 if (rec->bad_full_backref) {
7915                         fprintf(stderr, "bad full backref, on [%llu]\n",
7916                                 (unsigned long long)rec->start);
7917                         if (repair) {
7918                                 ret = fixup_extent_flags(root->fs_info, rec);
7919                                 if (ret)
7920                                         goto repair_abort;
7921                                 fixed = 1;
7922                         }
7923                         err = 1;
7924                         cur_err = 1;
7925                 }
7926                 /*
7927                  * Although it's not a extent ref's problem, we reuse this
7928                  * routine for error reporting.
7929                  * No repair function yet.
7930                  */
7931                 if (rec->crossing_stripes) {
7932                         fprintf(stderr,
7933                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7934                                 rec->start, rec->start + rec->max_size);
7935                         err = 1;
7936                         cur_err = 1;
7937                 }
7938
7939                 if (rec->wrong_chunk_type) {
7940                         fprintf(stderr,
7941                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7942                                 rec->start, rec->start + rec->max_size);
7943                         err = 1;
7944                         cur_err = 1;
7945                 }
7946
7947                 remove_cache_extent(extent_cache, cache);
7948                 free_all_extent_backrefs(rec);
7949                 if (!init_extent_tree && repair && (!cur_err || fixed))
7950                         clear_extent_dirty(root->fs_info->excluded_extents,
7951                                            rec->start,
7952                                            rec->start + rec->max_size - 1,
7953                                            GFP_NOFS);
7954                 free(rec);
7955         }
7956 repair_abort:
7957         if (repair) {
7958                 if (ret && ret != -EAGAIN) {
7959                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7960                         exit(1);
7961                 } else if (!ret) {
7962                         struct btrfs_trans_handle *trans;
7963
7964                         root = root->fs_info->extent_root;
7965                         trans = btrfs_start_transaction(root, 1);
7966                         if (IS_ERR(trans)) {
7967                                 ret = PTR_ERR(trans);
7968                                 goto repair_abort;
7969                         }
7970
7971                         btrfs_fix_block_accounting(trans, root);
7972                         ret = btrfs_commit_transaction(trans, root);
7973                         if (ret)
7974                                 goto repair_abort;
7975                 }
7976                 if (err)
7977                         fprintf(stderr, "repaired damaged extent references\n");
7978                 return ret;
7979         }
7980         return err;
7981 }
7982
7983 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7984 {
7985         u64 stripe_size;
7986
7987         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7988                 stripe_size = length;
7989                 stripe_size /= num_stripes;
7990         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7991                 stripe_size = length * 2;
7992                 stripe_size /= num_stripes;
7993         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7994                 stripe_size = length;
7995                 stripe_size /= (num_stripes - 1);
7996         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7997                 stripe_size = length;
7998                 stripe_size /= (num_stripes - 2);
7999         } else {
8000                 stripe_size = length;
8001         }
8002         return stripe_size;
8003 }
8004
8005 /*
8006  * Check the chunk with its block group/dev list ref:
8007  * Return 0 if all refs seems valid.
8008  * Return 1 if part of refs seems valid, need later check for rebuild ref
8009  * like missing block group and needs to search extent tree to rebuild them.
8010  * Return -1 if essential refs are missing and unable to rebuild.
8011  */
8012 static int check_chunk_refs(struct chunk_record *chunk_rec,
8013                             struct block_group_tree *block_group_cache,
8014                             struct device_extent_tree *dev_extent_cache,
8015                             int silent)
8016 {
8017         struct cache_extent *block_group_item;
8018         struct block_group_record *block_group_rec;
8019         struct cache_extent *dev_extent_item;
8020         struct device_extent_record *dev_extent_rec;
8021         u64 devid;
8022         u64 offset;
8023         u64 length;
8024         int metadump_v2 = 0;
8025         int i;
8026         int ret = 0;
8027
8028         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8029                                                chunk_rec->offset,
8030                                                chunk_rec->length);
8031         if (block_group_item) {
8032                 block_group_rec = container_of(block_group_item,
8033                                                struct block_group_record,
8034                                                cache);
8035                 if (chunk_rec->length != block_group_rec->offset ||
8036                     chunk_rec->offset != block_group_rec->objectid ||
8037                     (!metadump_v2 &&
8038                      chunk_rec->type_flags != block_group_rec->flags)) {
8039                         if (!silent)
8040                                 fprintf(stderr,
8041                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8042                                         chunk_rec->objectid,
8043                                         chunk_rec->type,
8044                                         chunk_rec->offset,
8045                                         chunk_rec->length,
8046                                         chunk_rec->offset,
8047                                         chunk_rec->type_flags,
8048                                         block_group_rec->objectid,
8049                                         block_group_rec->type,
8050                                         block_group_rec->offset,
8051                                         block_group_rec->offset,
8052                                         block_group_rec->objectid,
8053                                         block_group_rec->flags);
8054                         ret = -1;
8055                 } else {
8056                         list_del_init(&block_group_rec->list);
8057                         chunk_rec->bg_rec = block_group_rec;
8058                 }
8059         } else {
8060                 if (!silent)
8061                         fprintf(stderr,
8062                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8063                                 chunk_rec->objectid,
8064                                 chunk_rec->type,
8065                                 chunk_rec->offset,
8066                                 chunk_rec->length,
8067                                 chunk_rec->offset,
8068                                 chunk_rec->type_flags);
8069                 ret = 1;
8070         }
8071
8072         if (metadump_v2)
8073                 return ret;
8074
8075         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8076                                     chunk_rec->num_stripes);
8077         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8078                 devid = chunk_rec->stripes[i].devid;
8079                 offset = chunk_rec->stripes[i].offset;
8080                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8081                                                        devid, offset, length);
8082                 if (dev_extent_item) {
8083                         dev_extent_rec = container_of(dev_extent_item,
8084                                                 struct device_extent_record,
8085                                                 cache);
8086                         if (dev_extent_rec->objectid != devid ||
8087                             dev_extent_rec->offset != offset ||
8088                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8089                             dev_extent_rec->length != length) {
8090                                 if (!silent)
8091                                         fprintf(stderr,
8092                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8093                                                 chunk_rec->objectid,
8094                                                 chunk_rec->type,
8095                                                 chunk_rec->offset,
8096                                                 chunk_rec->stripes[i].devid,
8097                                                 chunk_rec->stripes[i].offset,
8098                                                 dev_extent_rec->objectid,
8099                                                 dev_extent_rec->offset,
8100                                                 dev_extent_rec->length);
8101                                 ret = -1;
8102                         } else {
8103                                 list_move(&dev_extent_rec->chunk_list,
8104                                           &chunk_rec->dextents);
8105                         }
8106                 } else {
8107                         if (!silent)
8108                                 fprintf(stderr,
8109                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8110                                         chunk_rec->objectid,
8111                                         chunk_rec->type,
8112                                         chunk_rec->offset,
8113                                         chunk_rec->stripes[i].devid,
8114                                         chunk_rec->stripes[i].offset);
8115                         ret = -1;
8116                 }
8117         }
8118         return ret;
8119 }
8120
8121 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8122 int check_chunks(struct cache_tree *chunk_cache,
8123                  struct block_group_tree *block_group_cache,
8124                  struct device_extent_tree *dev_extent_cache,
8125                  struct list_head *good, struct list_head *bad,
8126                  struct list_head *rebuild, int silent)
8127 {
8128         struct cache_extent *chunk_item;
8129         struct chunk_record *chunk_rec;
8130         struct block_group_record *bg_rec;
8131         struct device_extent_record *dext_rec;
8132         int err;
8133         int ret = 0;
8134
8135         chunk_item = first_cache_extent(chunk_cache);
8136         while (chunk_item) {
8137                 chunk_rec = container_of(chunk_item, struct chunk_record,
8138                                          cache);
8139                 err = check_chunk_refs(chunk_rec, block_group_cache,
8140                                        dev_extent_cache, silent);
8141                 if (err < 0)
8142                         ret = err;
8143                 if (err == 0 && good)
8144                         list_add_tail(&chunk_rec->list, good);
8145                 if (err > 0 && rebuild)
8146                         list_add_tail(&chunk_rec->list, rebuild);
8147                 if (err < 0 && bad)
8148                         list_add_tail(&chunk_rec->list, bad);
8149                 chunk_item = next_cache_extent(chunk_item);
8150         }
8151
8152         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8153                 if (!silent)
8154                         fprintf(stderr,
8155                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8156                                 bg_rec->objectid,
8157                                 bg_rec->offset,
8158                                 bg_rec->flags);
8159                 if (!ret)
8160                         ret = 1;
8161         }
8162
8163         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8164                             chunk_list) {
8165                 if (!silent)
8166                         fprintf(stderr,
8167                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8168                                 dext_rec->objectid,
8169                                 dext_rec->offset,
8170                                 dext_rec->length);
8171                 if (!ret)
8172                         ret = 1;
8173         }
8174         return ret;
8175 }
8176
8177
8178 static int check_device_used(struct device_record *dev_rec,
8179                              struct device_extent_tree *dext_cache)
8180 {
8181         struct cache_extent *cache;
8182         struct device_extent_record *dev_extent_rec;
8183         u64 total_byte = 0;
8184
8185         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8186         while (cache) {
8187                 dev_extent_rec = container_of(cache,
8188                                               struct device_extent_record,
8189                                               cache);
8190                 if (dev_extent_rec->objectid != dev_rec->devid)
8191                         break;
8192
8193                 list_del_init(&dev_extent_rec->device_list);
8194                 total_byte += dev_extent_rec->length;
8195                 cache = next_cache_extent(cache);
8196         }
8197
8198         if (total_byte != dev_rec->byte_used) {
8199                 fprintf(stderr,
8200                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8201                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8202                         dev_rec->type, dev_rec->offset);
8203                 return -1;
8204         } else {
8205                 return 0;
8206         }
8207 }
8208
8209 /* check btrfs_dev_item -> btrfs_dev_extent */
8210 static int check_devices(struct rb_root *dev_cache,
8211                          struct device_extent_tree *dev_extent_cache)
8212 {
8213         struct rb_node *dev_node;
8214         struct device_record *dev_rec;
8215         struct device_extent_record *dext_rec;
8216         int err;
8217         int ret = 0;
8218
8219         dev_node = rb_first(dev_cache);
8220         while (dev_node) {
8221                 dev_rec = container_of(dev_node, struct device_record, node);
8222                 err = check_device_used(dev_rec, dev_extent_cache);
8223                 if (err)
8224                         ret = err;
8225
8226                 dev_node = rb_next(dev_node);
8227         }
8228         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8229                             device_list) {
8230                 fprintf(stderr,
8231                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8232                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8233                 if (!ret)
8234                         ret = 1;
8235         }
8236         return ret;
8237 }
8238
8239 static int add_root_item_to_list(struct list_head *head,
8240                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8241                                   u8 level, u8 drop_level,
8242                                   int level_size, struct btrfs_key *drop_key)
8243 {
8244
8245         struct root_item_record *ri_rec;
8246         ri_rec = malloc(sizeof(*ri_rec));
8247         if (!ri_rec)
8248                 return -ENOMEM;
8249         ri_rec->bytenr = bytenr;
8250         ri_rec->objectid = objectid;
8251         ri_rec->level = level;
8252         ri_rec->level_size = level_size;
8253         ri_rec->drop_level = drop_level;
8254         ri_rec->last_snapshot = last_snapshot;
8255         if (drop_key)
8256                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8257         list_add_tail(&ri_rec->list, head);
8258
8259         return 0;
8260 }
8261
8262 static void free_root_item_list(struct list_head *list)
8263 {
8264         struct root_item_record *ri_rec;
8265
8266         while (!list_empty(list)) {
8267                 ri_rec = list_first_entry(list, struct root_item_record,
8268                                           list);
8269                 list_del_init(&ri_rec->list);
8270                 free(ri_rec);
8271         }
8272 }
8273
8274 static int deal_root_from_list(struct list_head *list,
8275                                struct btrfs_root *root,
8276                                struct block_info *bits,
8277                                int bits_nr,
8278                                struct cache_tree *pending,
8279                                struct cache_tree *seen,
8280                                struct cache_tree *reada,
8281                                struct cache_tree *nodes,
8282                                struct cache_tree *extent_cache,
8283                                struct cache_tree *chunk_cache,
8284                                struct rb_root *dev_cache,
8285                                struct block_group_tree *block_group_cache,
8286                                struct device_extent_tree *dev_extent_cache)
8287 {
8288         int ret = 0;
8289         u64 last;
8290
8291         while (!list_empty(list)) {
8292                 struct root_item_record *rec;
8293                 struct extent_buffer *buf;
8294                 rec = list_entry(list->next,
8295                                  struct root_item_record, list);
8296                 last = 0;
8297                 buf = read_tree_block(root->fs_info->tree_root,
8298                                       rec->bytenr, rec->level_size, 0);
8299                 if (!extent_buffer_uptodate(buf)) {
8300                         free_extent_buffer(buf);
8301                         ret = -EIO;
8302                         break;
8303                 }
8304                 add_root_to_pending(buf, extent_cache, pending,
8305                                     seen, nodes, rec->objectid);
8306                 /*
8307                  * To rebuild extent tree, we need deal with snapshot
8308                  * one by one, otherwise we deal with node firstly which
8309                  * can maximize readahead.
8310                  */
8311                 while (1) {
8312                         ret = run_next_block(root, bits, bits_nr, &last,
8313                                              pending, seen, reada, nodes,
8314                                              extent_cache, chunk_cache,
8315                                              dev_cache, block_group_cache,
8316                                              dev_extent_cache, rec);
8317                         if (ret != 0)
8318                                 break;
8319                 }
8320                 free_extent_buffer(buf);
8321                 list_del(&rec->list);
8322                 free(rec);
8323                 if (ret < 0)
8324                         break;
8325         }
8326         while (ret >= 0) {
8327                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8328                                      reada, nodes, extent_cache, chunk_cache,
8329                                      dev_cache, block_group_cache,
8330                                      dev_extent_cache, NULL);
8331                 if (ret != 0) {
8332                         if (ret > 0)
8333                                 ret = 0;
8334                         break;
8335                 }
8336         }
8337         return ret;
8338 }
8339
8340 static int check_chunks_and_extents(struct btrfs_root *root)
8341 {
8342         struct rb_root dev_cache;
8343         struct cache_tree chunk_cache;
8344         struct block_group_tree block_group_cache;
8345         struct device_extent_tree dev_extent_cache;
8346         struct cache_tree extent_cache;
8347         struct cache_tree seen;
8348         struct cache_tree pending;
8349         struct cache_tree reada;
8350         struct cache_tree nodes;
8351         struct extent_io_tree excluded_extents;
8352         struct cache_tree corrupt_blocks;
8353         struct btrfs_path path;
8354         struct btrfs_key key;
8355         struct btrfs_key found_key;
8356         int ret, err = 0;
8357         struct block_info *bits;
8358         int bits_nr;
8359         struct extent_buffer *leaf;
8360         int slot;
8361         struct btrfs_root_item ri;
8362         struct list_head dropping_trees;
8363         struct list_head normal_trees;
8364         struct btrfs_root *root1;
8365         u64 objectid;
8366         u32 level_size;
8367         u8 level;
8368
8369         dev_cache = RB_ROOT;
8370         cache_tree_init(&chunk_cache);
8371         block_group_tree_init(&block_group_cache);
8372         device_extent_tree_init(&dev_extent_cache);
8373
8374         cache_tree_init(&extent_cache);
8375         cache_tree_init(&seen);
8376         cache_tree_init(&pending);
8377         cache_tree_init(&nodes);
8378         cache_tree_init(&reada);
8379         cache_tree_init(&corrupt_blocks);
8380         extent_io_tree_init(&excluded_extents);
8381         INIT_LIST_HEAD(&dropping_trees);
8382         INIT_LIST_HEAD(&normal_trees);
8383
8384         if (repair) {
8385                 root->fs_info->excluded_extents = &excluded_extents;
8386                 root->fs_info->fsck_extent_cache = &extent_cache;
8387                 root->fs_info->free_extent_hook = free_extent_hook;
8388                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8389         }
8390
8391         bits_nr = 1024;
8392         bits = malloc(bits_nr * sizeof(struct block_info));
8393         if (!bits) {
8394                 perror("malloc");
8395                 exit(1);
8396         }
8397
8398         if (ctx.progress_enabled) {
8399                 ctx.tp = TASK_EXTENTS;
8400                 task_start(ctx.info);
8401         }
8402
8403 again:
8404         root1 = root->fs_info->tree_root;
8405         level = btrfs_header_level(root1->node);
8406         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8407                                     root1->node->start, 0, level, 0,
8408                                     root1->nodesize, NULL);
8409         if (ret < 0)
8410                 goto out;
8411         root1 = root->fs_info->chunk_root;
8412         level = btrfs_header_level(root1->node);
8413         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8414                                     root1->node->start, 0, level, 0,
8415                                     root1->nodesize, NULL);
8416         if (ret < 0)
8417                 goto out;
8418         btrfs_init_path(&path);
8419         key.offset = 0;
8420         key.objectid = 0;
8421         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8422         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8423                                         &key, &path, 0, 0);
8424         if (ret < 0)
8425                 goto out;
8426         while(1) {
8427                 leaf = path.nodes[0];
8428                 slot = path.slots[0];
8429                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8430                         ret = btrfs_next_leaf(root, &path);
8431                         if (ret != 0)
8432                                 break;
8433                         leaf = path.nodes[0];
8434                         slot = path.slots[0];
8435                 }
8436                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8437                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8438                         unsigned long offset;
8439                         u64 last_snapshot;
8440
8441                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8442                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8443                         last_snapshot = btrfs_root_last_snapshot(&ri);
8444                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8445                                 level = btrfs_root_level(&ri);
8446                                 level_size = root->nodesize;
8447                                 ret = add_root_item_to_list(&normal_trees,
8448                                                 found_key.objectid,
8449                                                 btrfs_root_bytenr(&ri),
8450                                                 last_snapshot, level,
8451                                                 0, level_size, NULL);
8452                                 if (ret < 0)
8453                                         goto out;
8454                         } else {
8455                                 level = btrfs_root_level(&ri);
8456                                 level_size = root->nodesize;
8457                                 objectid = found_key.objectid;
8458                                 btrfs_disk_key_to_cpu(&found_key,
8459                                                       &ri.drop_progress);
8460                                 ret = add_root_item_to_list(&dropping_trees,
8461                                                 objectid,
8462                                                 btrfs_root_bytenr(&ri),
8463                                                 last_snapshot, level,
8464                                                 ri.drop_level,
8465                                                 level_size, &found_key);
8466                                 if (ret < 0)
8467                                         goto out;
8468                         }
8469                 }
8470                 path.slots[0]++;
8471         }
8472         btrfs_release_path(&path);
8473
8474         /*
8475          * check_block can return -EAGAIN if it fixes something, please keep
8476          * this in mind when dealing with return values from these functions, if
8477          * we get -EAGAIN we want to fall through and restart the loop.
8478          */
8479         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8480                                   &seen, &reada, &nodes, &extent_cache,
8481                                   &chunk_cache, &dev_cache, &block_group_cache,
8482                                   &dev_extent_cache);
8483         if (ret < 0) {
8484                 if (ret == -EAGAIN)
8485                         goto loop;
8486                 goto out;
8487         }
8488         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8489                                   &pending, &seen, &reada, &nodes,
8490                                   &extent_cache, &chunk_cache, &dev_cache,
8491                                   &block_group_cache, &dev_extent_cache);
8492         if (ret < 0) {
8493                 if (ret == -EAGAIN)
8494                         goto loop;
8495                 goto out;
8496         }
8497
8498         ret = check_chunks(&chunk_cache, &block_group_cache,
8499                            &dev_extent_cache, NULL, NULL, NULL, 0);
8500         if (ret) {
8501                 if (ret == -EAGAIN)
8502                         goto loop;
8503                 err = ret;
8504         }
8505
8506         ret = check_extent_refs(root, &extent_cache);
8507         if (ret < 0) {
8508                 if (ret == -EAGAIN)
8509                         goto loop;
8510                 goto out;
8511         }
8512
8513         ret = check_devices(&dev_cache, &dev_extent_cache);
8514         if (ret && err)
8515                 ret = err;
8516
8517 out:
8518         task_stop(ctx.info);
8519         if (repair) {
8520                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8521                 extent_io_tree_cleanup(&excluded_extents);
8522                 root->fs_info->fsck_extent_cache = NULL;
8523                 root->fs_info->free_extent_hook = NULL;
8524                 root->fs_info->corrupt_blocks = NULL;
8525                 root->fs_info->excluded_extents = NULL;
8526         }
8527         free(bits);
8528         free_chunk_cache_tree(&chunk_cache);
8529         free_device_cache_tree(&dev_cache);
8530         free_block_group_tree(&block_group_cache);
8531         free_device_extent_tree(&dev_extent_cache);
8532         free_extent_cache_tree(&seen);
8533         free_extent_cache_tree(&pending);
8534         free_extent_cache_tree(&reada);
8535         free_extent_cache_tree(&nodes);
8536         return ret;
8537 loop:
8538         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8539         free_extent_cache_tree(&seen);
8540         free_extent_cache_tree(&pending);
8541         free_extent_cache_tree(&reada);
8542         free_extent_cache_tree(&nodes);
8543         free_chunk_cache_tree(&chunk_cache);
8544         free_block_group_tree(&block_group_cache);
8545         free_device_cache_tree(&dev_cache);
8546         free_device_extent_tree(&dev_extent_cache);
8547         free_extent_record_cache(root->fs_info, &extent_cache);
8548         free_root_item_list(&normal_trees);
8549         free_root_item_list(&dropping_trees);
8550         extent_io_tree_cleanup(&excluded_extents);
8551         goto again;
8552 }
8553
8554 /*
8555  * Check backrefs of a tree block given by @bytenr or @eb.
8556  *
8557  * @root:       the root containing the @bytenr or @eb
8558  * @eb:         tree block extent buffer, can be NULL
8559  * @bytenr:     bytenr of the tree block to search
8560  * @level:      tree level of the tree block
8561  * @owner:      owner of the tree block
8562  *
8563  * Return >0 for any error found and output error message
8564  * Return 0 for no error found
8565  */
8566 static int check_tree_block_ref(struct btrfs_root *root,
8567                                 struct extent_buffer *eb, u64 bytenr,
8568                                 int level, u64 owner)
8569 {
8570         struct btrfs_key key;
8571         struct btrfs_root *extent_root = root->fs_info->extent_root;
8572         struct btrfs_path path;
8573         struct btrfs_extent_item *ei;
8574         struct btrfs_extent_inline_ref *iref;
8575         struct extent_buffer *leaf;
8576         unsigned long end;
8577         unsigned long ptr;
8578         int slot;
8579         int skinny_level;
8580         int type;
8581         u32 nodesize = root->nodesize;
8582         u32 item_size;
8583         u64 offset;
8584         int found_ref = 0;
8585         int err = 0;
8586         int ret;
8587
8588         btrfs_init_path(&path);
8589         key.objectid = bytenr;
8590         if (btrfs_fs_incompat(root->fs_info,
8591                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8592                 key.type = BTRFS_METADATA_ITEM_KEY;
8593         else
8594                 key.type = BTRFS_EXTENT_ITEM_KEY;
8595         key.offset = (u64)-1;
8596
8597         /* Search for the backref in extent tree */
8598         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8599         if (ret < 0) {
8600                 err |= BACKREF_MISSING;
8601                 goto out;
8602         }
8603         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8604         if (ret) {
8605                 err |= BACKREF_MISSING;
8606                 goto out;
8607         }
8608
8609         leaf = path.nodes[0];
8610         slot = path.slots[0];
8611         btrfs_item_key_to_cpu(leaf, &key, slot);
8612
8613         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8614
8615         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8616                 skinny_level = (int)key.offset;
8617                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8618         } else {
8619                 struct btrfs_tree_block_info *info;
8620
8621                 info = (struct btrfs_tree_block_info *)(ei + 1);
8622                 skinny_level = btrfs_tree_block_level(leaf, info);
8623                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8624         }
8625
8626         if (eb) {
8627                 u64 header_gen;
8628                 u64 extent_gen;
8629
8630                 if (!(btrfs_extent_flags(leaf, ei) &
8631                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8632                         error(
8633                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8634                                 key.objectid, nodesize,
8635                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8636                         err = BACKREF_MISMATCH;
8637                 }
8638                 header_gen = btrfs_header_generation(eb);
8639                 extent_gen = btrfs_extent_generation(leaf, ei);
8640                 if (header_gen != extent_gen) {
8641                         error(
8642         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8643                                 key.objectid, nodesize, header_gen,
8644                                 extent_gen);
8645                         err = BACKREF_MISMATCH;
8646                 }
8647                 if (level != skinny_level) {
8648                         error(
8649                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8650                                 key.objectid, nodesize, level, skinny_level);
8651                         err = BACKREF_MISMATCH;
8652                 }
8653                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8654                         error(
8655                         "extent[%llu %u] is referred by other roots than %llu",
8656                                 key.objectid, nodesize, root->objectid);
8657                         err = BACKREF_MISMATCH;
8658                 }
8659         }
8660
8661         /*
8662          * Iterate the extent/metadata item to find the exact backref
8663          */
8664         item_size = btrfs_item_size_nr(leaf, slot);
8665         ptr = (unsigned long)iref;
8666         end = (unsigned long)ei + item_size;
8667         while (ptr < end) {
8668                 iref = (struct btrfs_extent_inline_ref *)ptr;
8669                 type = btrfs_extent_inline_ref_type(leaf, iref);
8670                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8671
8672                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8673                         (offset == root->objectid || offset == owner)) {
8674                         found_ref = 1;
8675                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8676                         /* Check if the backref points to valid referencer */
8677                         found_ref = !check_tree_block_ref(root, NULL, offset,
8678                                                           level + 1, owner);
8679                 }
8680
8681                 if (found_ref)
8682                         break;
8683                 ptr += btrfs_extent_inline_ref_size(type);
8684         }
8685
8686         /*
8687          * Inlined extent item doesn't have what we need, check
8688          * TREE_BLOCK_REF_KEY
8689          */
8690         if (!found_ref) {
8691                 btrfs_release_path(&path);
8692                 key.objectid = bytenr;
8693                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8694                 key.offset = root->objectid;
8695
8696                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8697                 if (!ret)
8698                         found_ref = 1;
8699         }
8700         if (!found_ref)
8701                 err |= BACKREF_MISSING;
8702 out:
8703         btrfs_release_path(&path);
8704         if (eb && (err & BACKREF_MISSING))
8705                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8706                         bytenr, nodesize, owner, level);
8707         return err;
8708 }
8709
8710 /*
8711  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8712  *
8713  * Return >0 any error found and output error message
8714  * Return 0 for no error found
8715  */
8716 static int check_extent_data_item(struct btrfs_root *root,
8717                                   struct extent_buffer *eb, int slot)
8718 {
8719         struct btrfs_file_extent_item *fi;
8720         struct btrfs_path path;
8721         struct btrfs_root *extent_root = root->fs_info->extent_root;
8722         struct btrfs_key fi_key;
8723         struct btrfs_key dbref_key;
8724         struct extent_buffer *leaf;
8725         struct btrfs_extent_item *ei;
8726         struct btrfs_extent_inline_ref *iref;
8727         struct btrfs_extent_data_ref *dref;
8728         u64 owner;
8729         u64 file_extent_gen;
8730         u64 disk_bytenr;
8731         u64 disk_num_bytes;
8732         u64 extent_num_bytes;
8733         u64 extent_flags;
8734         u64 extent_gen;
8735         u32 item_size;
8736         unsigned long end;
8737         unsigned long ptr;
8738         int type;
8739         u64 ref_root;
8740         int found_dbackref = 0;
8741         int err = 0;
8742         int ret;
8743
8744         btrfs_item_key_to_cpu(eb, &fi_key, slot);
8745         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8746         file_extent_gen = btrfs_file_extent_generation(eb, fi);
8747
8748         /* Nothing to check for hole and inline data extents */
8749         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8750             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8751                 return 0;
8752
8753         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8754         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8755         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8756
8757         /* Check unaligned disk_num_bytes and num_bytes */
8758         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8759                 error(
8760 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8761                         fi_key.objectid, fi_key.offset, disk_num_bytes,
8762                         root->sectorsize);
8763                 err |= BYTES_UNALIGNED;
8764         } else {
8765                 data_bytes_allocated += disk_num_bytes;
8766         }
8767         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8768                 error(
8769 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8770                         fi_key.objectid, fi_key.offset, extent_num_bytes,
8771                         root->sectorsize);
8772                 err |= BYTES_UNALIGNED;
8773         } else {
8774                 data_bytes_referenced += extent_num_bytes;
8775         }
8776         owner = btrfs_header_owner(eb);
8777
8778         /* Check the extent item of the file extent in extent tree */
8779         btrfs_init_path(&path);
8780         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8781         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8782         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8783
8784         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8785         if (ret) {
8786                 err |= BACKREF_MISSING;
8787                 goto error;
8788         }
8789
8790         leaf = path.nodes[0];
8791         slot = path.slots[0];
8792         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8793
8794         extent_flags = btrfs_extent_flags(leaf, ei);
8795         extent_gen = btrfs_extent_generation(leaf, ei);
8796
8797         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8798                 error(
8799                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8800                     disk_bytenr, disk_num_bytes,
8801                     BTRFS_EXTENT_FLAG_DATA);
8802                 err |= BACKREF_MISMATCH;
8803         }
8804
8805         if (file_extent_gen < extent_gen) {
8806                 error(
8807 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8808                         disk_bytenr, disk_num_bytes, file_extent_gen,
8809                         extent_gen);
8810                 err |= BACKREF_MISMATCH;
8811         }
8812
8813         /* Check data backref inside that extent item */
8814         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8815         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8816         ptr = (unsigned long)iref;
8817         end = (unsigned long)ei + item_size;
8818         while (ptr < end) {
8819                 iref = (struct btrfs_extent_inline_ref *)ptr;
8820                 type = btrfs_extent_inline_ref_type(leaf, iref);
8821                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8822
8823                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8824                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
8825                         if (ref_root == owner || ref_root == root->objectid)
8826                                 found_dbackref = 1;
8827                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8828                         found_dbackref = !check_tree_block_ref(root, NULL,
8829                                 btrfs_extent_inline_ref_offset(leaf, iref),
8830                                 0, owner);
8831                 }
8832
8833                 if (found_dbackref)
8834                         break;
8835                 ptr += btrfs_extent_inline_ref_size(type);
8836         }
8837
8838         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8839         if (!found_dbackref) {
8840                 btrfs_release_path(&path);
8841
8842                 btrfs_init_path(&path);
8843                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8844                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8845                 dbref_key.offset = hash_extent_data_ref(root->objectid,
8846                                 fi_key.objectid, fi_key.offset);
8847
8848                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8849                                         &dbref_key, &path, 0, 0);
8850                 if (!ret)
8851                         found_dbackref = 1;
8852         }
8853
8854         if (!found_dbackref)
8855                 err |= BACKREF_MISSING;
8856 error:
8857         btrfs_release_path(&path);
8858         if (err & BACKREF_MISSING) {
8859                 error("data extent[%llu %llu] backref lost",
8860                       disk_bytenr, disk_num_bytes);
8861         }
8862         return err;
8863 }
8864
8865 /*
8866  * Get real tree block level for the case like shared block
8867  * Return >= 0 as tree level
8868  * Return <0 for error
8869  */
8870 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8871 {
8872         struct extent_buffer *eb;
8873         struct btrfs_path path;
8874         struct btrfs_key key;
8875         struct btrfs_extent_item *ei;
8876         u64 flags;
8877         u64 transid;
8878         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8879         u8 backref_level;
8880         u8 header_level;
8881         int ret;
8882
8883         /* Search extent tree for extent generation and level */
8884         key.objectid = bytenr;
8885         key.type = BTRFS_METADATA_ITEM_KEY;
8886         key.offset = (u64)-1;
8887
8888         btrfs_init_path(&path);
8889         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8890         if (ret < 0)
8891                 goto release_out;
8892         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8893         if (ret < 0)
8894                 goto release_out;
8895         if (ret > 0) {
8896                 ret = -ENOENT;
8897                 goto release_out;
8898         }
8899
8900         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8901         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8902                             struct btrfs_extent_item);
8903         flags = btrfs_extent_flags(path.nodes[0], ei);
8904         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8905                 ret = -ENOENT;
8906                 goto release_out;
8907         }
8908
8909         /* Get transid for later read_tree_block() check */
8910         transid = btrfs_extent_generation(path.nodes[0], ei);
8911
8912         /* Get backref level as one source */
8913         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8914                 backref_level = key.offset;
8915         } else {
8916                 struct btrfs_tree_block_info *info;
8917
8918                 info = (struct btrfs_tree_block_info *)(ei + 1);
8919                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
8920         }
8921         btrfs_release_path(&path);
8922
8923         /* Get level from tree block as an alternative source */
8924         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
8925         if (!extent_buffer_uptodate(eb)) {
8926                 free_extent_buffer(eb);
8927                 return -EIO;
8928         }
8929         header_level = btrfs_header_level(eb);
8930         free_extent_buffer(eb);
8931
8932         if (header_level != backref_level)
8933                 return -EIO;
8934         return header_level;
8935
8936 release_out:
8937         btrfs_release_path(&path);
8938         return ret;
8939 }
8940
8941 /*
8942  * Check if a tree block backref is valid (points to a valid tree block)
8943  * if level == -1, level will be resolved
8944  * Return >0 for any error found and print error message
8945  */
8946 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
8947                                     u64 bytenr, int level)
8948 {
8949         struct btrfs_root *root;
8950         struct btrfs_key key;
8951         struct btrfs_path path;
8952         struct extent_buffer *eb;
8953         struct extent_buffer *node;
8954         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8955         int err = 0;
8956         int ret;
8957
8958         /* Query level for level == -1 special case */
8959         if (level == -1)
8960                 level = query_tree_block_level(fs_info, bytenr);
8961         if (level < 0) {
8962                 err |= REFERENCER_MISSING;
8963                 goto out;
8964         }
8965
8966         key.objectid = root_id;
8967         key.type = BTRFS_ROOT_ITEM_KEY;
8968         key.offset = (u64)-1;
8969
8970         root = btrfs_read_fs_root(fs_info, &key);
8971         if (IS_ERR(root)) {
8972                 err |= REFERENCER_MISSING;
8973                 goto out;
8974         }
8975
8976         /* Read out the tree block to get item/node key */
8977         eb = read_tree_block(root, bytenr, root->nodesize, 0);
8978         if (!extent_buffer_uptodate(eb)) {
8979                 err |= REFERENCER_MISSING;
8980                 free_extent_buffer(eb);
8981                 goto out;
8982         }
8983
8984         /* Empty tree, no need to check key */
8985         if (!btrfs_header_nritems(eb) && !level) {
8986                 free_extent_buffer(eb);
8987                 goto out;
8988         }
8989
8990         if (level)
8991                 btrfs_node_key_to_cpu(eb, &key, 0);
8992         else
8993                 btrfs_item_key_to_cpu(eb, &key, 0);
8994
8995         free_extent_buffer(eb);
8996
8997         btrfs_init_path(&path);
8998         /* Search with the first key, to ensure we can reach it */
8999         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9000         if (ret) {
9001                 err |= REFERENCER_MISSING;
9002                 goto release_out;
9003         }
9004
9005         node = path.nodes[level];
9006         if (btrfs_header_bytenr(node) != bytenr) {
9007                 error(
9008         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9009                         bytenr, nodesize, bytenr,
9010                         btrfs_header_bytenr(node));
9011                 err |= REFERENCER_MISMATCH;
9012         }
9013         if (btrfs_header_level(node) != level) {
9014                 error(
9015         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9016                         bytenr, nodesize, level,
9017                         btrfs_header_level(node));
9018                 err |= REFERENCER_MISMATCH;
9019         }
9020
9021 release_out:
9022         btrfs_release_path(&path);
9023 out:
9024         if (err & REFERENCER_MISSING) {
9025                 if (level < 0)
9026                         error("extent [%llu %d] lost referencer (owner: %llu)",
9027                                 bytenr, nodesize, root_id);
9028                 else
9029                         error(
9030                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9031                                 bytenr, nodesize, root_id, level);
9032         }
9033
9034         return err;
9035 }
9036
9037 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
9038                            struct btrfs_root *root, int overwrite)
9039 {
9040         struct extent_buffer *c;
9041         struct extent_buffer *old = root->node;
9042         int level;
9043         int ret;
9044         struct btrfs_disk_key disk_key = {0,0,0};
9045
9046         level = 0;
9047
9048         if (overwrite) {
9049                 c = old;
9050                 extent_buffer_get(c);
9051                 goto init;
9052         }
9053         c = btrfs_alloc_free_block(trans, root,
9054                                    root->nodesize,
9055                                    root->root_key.objectid,
9056                                    &disk_key, level, 0, 0);
9057         if (IS_ERR(c)) {
9058                 c = old;
9059                 extent_buffer_get(c);
9060                 overwrite = 1;
9061         }
9062 init:
9063         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
9064         btrfs_set_header_level(c, level);
9065         btrfs_set_header_bytenr(c, c->start);
9066         btrfs_set_header_generation(c, trans->transid);
9067         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
9068         btrfs_set_header_owner(c, root->root_key.objectid);
9069
9070         write_extent_buffer(c, root->fs_info->fsid,
9071                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
9072
9073         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
9074                             btrfs_header_chunk_tree_uuid(c),
9075                             BTRFS_UUID_SIZE);
9076
9077         btrfs_mark_buffer_dirty(c);
9078         /*
9079          * this case can happen in the following case:
9080          *
9081          * 1.overwrite previous root.
9082          *
9083          * 2.reinit reloc data root, this is because we skip pin
9084          * down reloc data tree before which means we can allocate
9085          * same block bytenr here.
9086          */
9087         if (old->start == c->start) {
9088                 btrfs_set_root_generation(&root->root_item,
9089                                           trans->transid);
9090                 root->root_item.level = btrfs_header_level(root->node);
9091                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
9092                                         &root->root_key, &root->root_item);
9093                 if (ret) {
9094                         free_extent_buffer(c);
9095                         return ret;
9096                 }
9097         }
9098         free_extent_buffer(old);
9099         root->node = c;
9100         add_root_to_dirty_list(root);
9101         return 0;
9102 }
9103
9104 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
9105                                 struct extent_buffer *eb, int tree_root)
9106 {
9107         struct extent_buffer *tmp;
9108         struct btrfs_root_item *ri;
9109         struct btrfs_key key;
9110         u64 bytenr;
9111         u32 nodesize;
9112         int level = btrfs_header_level(eb);
9113         int nritems;
9114         int ret;
9115         int i;
9116
9117         /*
9118          * If we have pinned this block before, don't pin it again.
9119          * This can not only avoid forever loop with broken filesystem
9120          * but also give us some speedups.
9121          */
9122         if (test_range_bit(&fs_info->pinned_extents, eb->start,
9123                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
9124                 return 0;
9125
9126         btrfs_pin_extent(fs_info, eb->start, eb->len);
9127
9128         nodesize = btrfs_super_nodesize(fs_info->super_copy);
9129         nritems = btrfs_header_nritems(eb);
9130         for (i = 0; i < nritems; i++) {
9131                 if (level == 0) {
9132                         btrfs_item_key_to_cpu(eb, &key, i);
9133                         if (key.type != BTRFS_ROOT_ITEM_KEY)
9134                                 continue;
9135                         /* Skip the extent root and reloc roots */
9136                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
9137                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
9138                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
9139                                 continue;
9140                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
9141                         bytenr = btrfs_disk_root_bytenr(eb, ri);
9142
9143                         /*
9144                          * If at any point we start needing the real root we
9145                          * will have to build a stump root for the root we are
9146                          * in, but for now this doesn't actually use the root so
9147                          * just pass in extent_root.
9148                          */
9149                         tmp = read_tree_block(fs_info->extent_root, bytenr,
9150                                               nodesize, 0);
9151                         if (!extent_buffer_uptodate(tmp)) {
9152                                 fprintf(stderr, "Error reading root block\n");
9153                                 return -EIO;
9154                         }
9155                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
9156                         free_extent_buffer(tmp);
9157                         if (ret)
9158                                 return ret;
9159                 } else {
9160                         bytenr = btrfs_node_blockptr(eb, i);
9161
9162                         /* If we aren't the tree root don't read the block */
9163                         if (level == 1 && !tree_root) {
9164                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
9165                                 continue;
9166                         }
9167
9168                         tmp = read_tree_block(fs_info->extent_root, bytenr,
9169                                               nodesize, 0);
9170                         if (!extent_buffer_uptodate(tmp)) {
9171                                 fprintf(stderr, "Error reading tree block\n");
9172                                 return -EIO;
9173                         }
9174                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
9175                         free_extent_buffer(tmp);
9176                         if (ret)
9177                                 return ret;
9178                 }
9179         }
9180
9181         return 0;
9182 }
9183
9184 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
9185 {
9186         int ret;
9187
9188         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
9189         if (ret)
9190                 return ret;
9191
9192         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
9193 }
9194
9195 static int reset_block_groups(struct btrfs_fs_info *fs_info)
9196 {
9197         struct btrfs_block_group_cache *cache;
9198         struct btrfs_path *path;
9199         struct extent_buffer *leaf;
9200         struct btrfs_chunk *chunk;
9201         struct btrfs_key key;
9202         int ret;
9203         u64 start;
9204
9205         path = btrfs_alloc_path();
9206         if (!path)
9207                 return -ENOMEM;
9208
9209         key.objectid = 0;
9210         key.type = BTRFS_CHUNK_ITEM_KEY;
9211         key.offset = 0;
9212
9213         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
9214         if (ret < 0) {
9215                 btrfs_free_path(path);
9216                 return ret;
9217         }
9218
9219         /*
9220          * We do this in case the block groups were screwed up and had alloc
9221          * bits that aren't actually set on the chunks.  This happens with
9222          * restored images every time and could happen in real life I guess.
9223          */
9224         fs_info->avail_data_alloc_bits = 0;
9225         fs_info->avail_metadata_alloc_bits = 0;
9226         fs_info->avail_system_alloc_bits = 0;
9227
9228         /* First we need to create the in-memory block groups */
9229         while (1) {
9230                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9231                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
9232                         if (ret < 0) {
9233                                 btrfs_free_path(path);
9234                                 return ret;
9235                         }
9236                         if (ret) {
9237                                 ret = 0;
9238                                 break;
9239                         }
9240                 }
9241                 leaf = path->nodes[0];
9242                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9243                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
9244                         path->slots[0]++;
9245                         continue;
9246                 }
9247
9248                 chunk = btrfs_item_ptr(leaf, path->slots[0],
9249                                        struct btrfs_chunk);
9250                 btrfs_add_block_group(fs_info, 0,
9251                                       btrfs_chunk_type(leaf, chunk),
9252                                       key.objectid, key.offset,
9253                                       btrfs_chunk_length(leaf, chunk));
9254                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
9255                                  key.offset + btrfs_chunk_length(leaf, chunk),
9256                                  GFP_NOFS);
9257                 path->slots[0]++;
9258         }
9259         start = 0;
9260         while (1) {
9261                 cache = btrfs_lookup_first_block_group(fs_info, start);
9262                 if (!cache)
9263                         break;
9264                 cache->cached = 1;
9265                 start = cache->key.objectid + cache->key.offset;
9266         }
9267
9268         btrfs_free_path(path);
9269         return 0;
9270 }
9271
9272 static int reset_balance(struct btrfs_trans_handle *trans,
9273                          struct btrfs_fs_info *fs_info)
9274 {
9275         struct btrfs_root *root = fs_info->tree_root;
9276         struct btrfs_path *path;
9277         struct extent_buffer *leaf;
9278         struct btrfs_key key;
9279         int del_slot, del_nr = 0;
9280         int ret;
9281         int found = 0;
9282
9283         path = btrfs_alloc_path();
9284         if (!path)
9285                 return -ENOMEM;
9286
9287         key.objectid = BTRFS_BALANCE_OBJECTID;
9288         key.type = BTRFS_BALANCE_ITEM_KEY;
9289         key.offset = 0;
9290
9291         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
9292         if (ret) {
9293                 if (ret > 0)
9294                         ret = 0;
9295                 if (!ret)
9296                         goto reinit_data_reloc;
9297                 else
9298                         goto out;
9299         }
9300
9301         ret = btrfs_del_item(trans, root, path);
9302         if (ret)
9303                 goto out;
9304         btrfs_release_path(path);
9305
9306         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
9307         key.type = BTRFS_ROOT_ITEM_KEY;
9308         key.offset = 0;
9309
9310         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
9311         if (ret < 0)
9312                 goto out;
9313         while (1) {
9314                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9315                         if (!found)
9316                                 break;
9317
9318                         if (del_nr) {
9319                                 ret = btrfs_del_items(trans, root, path,
9320                                                       del_slot, del_nr);
9321                                 del_nr = 0;
9322                                 if (ret)
9323                                         goto out;
9324                         }
9325                         key.offset++;
9326                         btrfs_release_path(path);
9327
9328                         found = 0;
9329                         ret = btrfs_search_slot(trans, root, &key, path,
9330                                                 -1, 1);
9331                         if (ret < 0)
9332                                 goto out;
9333                         continue;
9334                 }
9335                 found = 1;
9336                 leaf = path->nodes[0];
9337                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9338                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
9339                         break;
9340                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
9341                         path->slots[0]++;
9342                         continue;
9343                 }
9344                 if (!del_nr) {
9345                         del_slot = path->slots[0];
9346                         del_nr = 1;
9347                 } else {
9348                         del_nr++;
9349                 }
9350                 path->slots[0]++;
9351         }
9352
9353         if (del_nr) {
9354                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
9355                 if (ret)
9356                         goto out;
9357         }
9358         btrfs_release_path(path);
9359
9360 reinit_data_reloc:
9361         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
9362         key.type = BTRFS_ROOT_ITEM_KEY;
9363         key.offset = (u64)-1;
9364         root = btrfs_read_fs_root(fs_info, &key);
9365         if (IS_ERR(root)) {
9366                 fprintf(stderr, "Error reading data reloc tree\n");
9367                 ret = PTR_ERR(root);
9368                 goto out;
9369         }
9370         record_root_in_trans(trans, root);
9371         ret = btrfs_fsck_reinit_root(trans, root, 0);
9372         if (ret)
9373                 goto out;
9374         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
9375 out:
9376         btrfs_free_path(path);
9377         return ret;
9378 }
9379
9380 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
9381                               struct btrfs_fs_info *fs_info)
9382 {
9383         u64 start = 0;
9384         int ret;
9385
9386         /*
9387          * The only reason we don't do this is because right now we're just
9388          * walking the trees we find and pinning down their bytes, we don't look
9389          * at any of the leaves.  In order to do mixed groups we'd have to check
9390          * the leaves of any fs roots and pin down the bytes for any file
9391          * extents we find.  Not hard but why do it if we don't have to?
9392          */
9393         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
9394                 fprintf(stderr, "We don't support re-initing the extent tree "
9395                         "for mixed block groups yet, please notify a btrfs "
9396                         "developer you want to do this so they can add this "
9397                         "functionality.\n");
9398                 return -EINVAL;
9399         }
9400
9401         /*
9402          * first we need to walk all of the trees except the extent tree and pin
9403          * down the bytes that are in use so we don't overwrite any existing
9404          * metadata.
9405          */
9406         ret = pin_metadata_blocks(fs_info);
9407         if (ret) {
9408                 fprintf(stderr, "error pinning down used bytes\n");
9409                 return ret;
9410         }
9411
9412         /*
9413          * Need to drop all the block groups since we're going to recreate all
9414          * of them again.
9415          */
9416         btrfs_free_block_groups(fs_info);
9417         ret = reset_block_groups(fs_info);
9418         if (ret) {
9419                 fprintf(stderr, "error resetting the block groups\n");
9420                 return ret;
9421         }
9422
9423         /* Ok we can allocate now, reinit the extent root */
9424         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
9425         if (ret) {
9426                 fprintf(stderr, "extent root initialization failed\n");
9427                 /*
9428                  * When the transaction code is updated we should end the
9429                  * transaction, but for now progs only knows about commit so
9430                  * just return an error.
9431                  */
9432                 return ret;
9433         }
9434
9435         /*
9436          * Now we have all the in-memory block groups setup so we can make
9437          * allocations properly, and the metadata we care about is safe since we
9438          * pinned all of it above.
9439          */
9440         while (1) {
9441                 struct btrfs_block_group_cache *cache;
9442
9443                 cache = btrfs_lookup_first_block_group(fs_info, start);
9444                 if (!cache)
9445                         break;
9446                 start = cache->key.objectid + cache->key.offset;
9447                 ret = btrfs_insert_item(trans, fs_info->extent_root,
9448                                         &cache->key, &cache->item,
9449                                         sizeof(cache->item));
9450                 if (ret) {
9451                         fprintf(stderr, "Error adding block group\n");
9452                         return ret;
9453                 }
9454                 btrfs_extent_post_op(trans, fs_info->extent_root);
9455         }
9456
9457         ret = reset_balance(trans, fs_info);
9458         if (ret)
9459                 fprintf(stderr, "error resetting the pending balance\n");
9460
9461         return ret;
9462 }
9463
9464 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
9465 {
9466         struct btrfs_path *path;
9467         struct btrfs_trans_handle *trans;
9468         struct btrfs_key key;
9469         int ret;
9470
9471         printf("Recowing metadata block %llu\n", eb->start);
9472         key.objectid = btrfs_header_owner(eb);
9473         key.type = BTRFS_ROOT_ITEM_KEY;
9474         key.offset = (u64)-1;
9475
9476         root = btrfs_read_fs_root(root->fs_info, &key);
9477         if (IS_ERR(root)) {
9478                 fprintf(stderr, "Couldn't find owner root %llu\n",
9479                         key.objectid);
9480                 return PTR_ERR(root);
9481         }
9482
9483         path = btrfs_alloc_path();
9484         if (!path)
9485                 return -ENOMEM;
9486
9487         trans = btrfs_start_transaction(root, 1);
9488         if (IS_ERR(trans)) {
9489                 btrfs_free_path(path);
9490                 return PTR_ERR(trans);
9491         }
9492
9493         path->lowest_level = btrfs_header_level(eb);
9494         if (path->lowest_level)
9495                 btrfs_node_key_to_cpu(eb, &key, 0);
9496         else
9497                 btrfs_item_key_to_cpu(eb, &key, 0);
9498
9499         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9500         btrfs_commit_transaction(trans, root);
9501         btrfs_free_path(path);
9502         return ret;
9503 }
9504
9505 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
9506 {
9507         struct btrfs_path *path;
9508         struct btrfs_trans_handle *trans;
9509         struct btrfs_key key;
9510         int ret;
9511
9512         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
9513                bad->key.type, bad->key.offset);
9514         key.objectid = bad->root_id;
9515         key.type = BTRFS_ROOT_ITEM_KEY;
9516         key.offset = (u64)-1;
9517
9518         root = btrfs_read_fs_root(root->fs_info, &key);
9519         if (IS_ERR(root)) {
9520                 fprintf(stderr, "Couldn't find owner root %llu\n",
9521                         key.objectid);
9522                 return PTR_ERR(root);
9523         }
9524
9525         path = btrfs_alloc_path();
9526         if (!path)
9527                 return -ENOMEM;
9528
9529         trans = btrfs_start_transaction(root, 1);
9530         if (IS_ERR(trans)) {
9531                 btrfs_free_path(path);
9532                 return PTR_ERR(trans);
9533         }
9534
9535         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
9536         if (ret) {
9537                 if (ret > 0)
9538                         ret = 0;
9539                 goto out;
9540         }
9541         ret = btrfs_del_item(trans, root, path);
9542 out:
9543         btrfs_commit_transaction(trans, root);
9544         btrfs_free_path(path);
9545         return ret;
9546 }
9547
9548 static int zero_log_tree(struct btrfs_root *root)
9549 {
9550         struct btrfs_trans_handle *trans;
9551         int ret;
9552
9553         trans = btrfs_start_transaction(root, 1);
9554         if (IS_ERR(trans)) {
9555                 ret = PTR_ERR(trans);
9556                 return ret;
9557         }
9558         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
9559         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
9560         ret = btrfs_commit_transaction(trans, root);
9561         return ret;
9562 }
9563
9564 static int populate_csum(struct btrfs_trans_handle *trans,
9565                          struct btrfs_root *csum_root, char *buf, u64 start,
9566                          u64 len)
9567 {
9568         u64 offset = 0;
9569         u64 sectorsize;
9570         int ret = 0;
9571
9572         while (offset < len) {
9573                 sectorsize = csum_root->sectorsize;
9574                 ret = read_extent_data(csum_root, buf, start + offset,
9575                                        &sectorsize, 0);
9576                 if (ret)
9577                         break;
9578                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
9579                                             start + offset, buf, sectorsize);
9580                 if (ret)
9581                         break;
9582                 offset += sectorsize;
9583         }
9584         return ret;
9585 }
9586
9587 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
9588                                       struct btrfs_root *csum_root,
9589                                       struct btrfs_root *cur_root)
9590 {
9591         struct btrfs_path *path;
9592         struct btrfs_key key;
9593         struct extent_buffer *node;
9594         struct btrfs_file_extent_item *fi;
9595         char *buf = NULL;
9596         u64 start = 0;
9597         u64 len = 0;
9598         int slot = 0;
9599         int ret = 0;
9600
9601         path = btrfs_alloc_path();
9602         if (!path)
9603                 return -ENOMEM;
9604         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
9605         if (!buf) {
9606                 ret = -ENOMEM;
9607                 goto out;
9608         }
9609
9610         key.objectid = 0;
9611         key.offset = 0;
9612         key.type = 0;
9613
9614         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
9615         if (ret < 0)
9616                 goto out;
9617         /* Iterate all regular file extents and fill its csum */
9618         while (1) {
9619                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
9620
9621                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9622                         goto next;
9623                 node = path->nodes[0];
9624                 slot = path->slots[0];
9625                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
9626                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
9627                         goto next;
9628                 start = btrfs_file_extent_disk_bytenr(node, fi);
9629                 len = btrfs_file_extent_disk_num_bytes(node, fi);
9630
9631                 ret = populate_csum(trans, csum_root, buf, start, len);
9632                 if (ret == -EEXIST)
9633                         ret = 0;
9634                 if (ret < 0)
9635                         goto out;
9636 next:
9637                 /*
9638                  * TODO: if next leaf is corrupted, jump to nearest next valid
9639                  * leaf.
9640                  */
9641                 ret = btrfs_next_item(cur_root, path);
9642                 if (ret < 0)
9643                         goto out;
9644                 if (ret > 0) {
9645                         ret = 0;
9646                         goto out;
9647                 }
9648         }
9649
9650 out:
9651         btrfs_free_path(path);
9652         free(buf);
9653         return ret;
9654 }
9655
9656 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
9657                                   struct btrfs_root *csum_root)
9658 {
9659         struct btrfs_fs_info *fs_info = csum_root->fs_info;
9660         struct btrfs_path *path;
9661         struct btrfs_root *tree_root = fs_info->tree_root;
9662         struct btrfs_root *cur_root;
9663         struct extent_buffer *node;
9664         struct btrfs_key key;
9665         int slot = 0;
9666         int ret = 0;
9667
9668         path = btrfs_alloc_path();
9669         if (!path)
9670                 return -ENOMEM;
9671
9672         key.objectid = BTRFS_FS_TREE_OBJECTID;
9673         key.offset = 0;
9674         key.type = BTRFS_ROOT_ITEM_KEY;
9675
9676         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
9677         if (ret < 0)
9678                 goto out;
9679         if (ret > 0) {
9680                 ret = -ENOENT;
9681                 goto out;
9682         }
9683
9684         while (1) {
9685                 node = path->nodes[0];
9686                 slot = path->slots[0];
9687                 btrfs_item_key_to_cpu(node, &key, slot);
9688                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
9689                         goto out;
9690                 if (key.type != BTRFS_ROOT_ITEM_KEY)
9691                         goto next;
9692                 if (!is_fstree(key.objectid))
9693                         goto next;
9694                 key.offset = (u64)-1;
9695
9696                 cur_root = btrfs_read_fs_root(fs_info, &key);
9697                 if (IS_ERR(cur_root) || !cur_root) {
9698                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
9699                                 key.objectid);
9700                         goto out;
9701                 }
9702                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
9703                                 cur_root);
9704                 if (ret < 0)
9705                         goto out;
9706 next:
9707                 ret = btrfs_next_item(tree_root, path);
9708                 if (ret > 0) {
9709                         ret = 0;
9710                         goto out;
9711                 }
9712                 if (ret < 0)
9713                         goto out;
9714         }
9715
9716 out:
9717         btrfs_free_path(path);
9718         return ret;
9719 }
9720
9721 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9722                                       struct btrfs_root *csum_root)
9723 {
9724         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9725         struct btrfs_path *path;
9726         struct btrfs_extent_item *ei;
9727         struct extent_buffer *leaf;
9728         char *buf;
9729         struct btrfs_key key;
9730         int ret;
9731
9732         path = btrfs_alloc_path();
9733         if (!path)
9734                 return -ENOMEM;
9735
9736         key.objectid = 0;
9737         key.type = BTRFS_EXTENT_ITEM_KEY;
9738         key.offset = 0;
9739
9740         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9741         if (ret < 0) {
9742                 btrfs_free_path(path);
9743                 return ret;
9744         }
9745
9746         buf = malloc(csum_root->sectorsize);
9747         if (!buf) {
9748                 btrfs_free_path(path);
9749                 return -ENOMEM;
9750         }
9751
9752         while (1) {
9753                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9754                         ret = btrfs_next_leaf(extent_root, path);
9755                         if (ret < 0)
9756                                 break;
9757                         if (ret) {
9758                                 ret = 0;
9759                                 break;
9760                         }
9761                 }
9762                 leaf = path->nodes[0];
9763
9764                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9765                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9766                         path->slots[0]++;
9767                         continue;
9768                 }
9769
9770                 ei = btrfs_item_ptr(leaf, path->slots[0],
9771                                     struct btrfs_extent_item);
9772                 if (!(btrfs_extent_flags(leaf, ei) &
9773                       BTRFS_EXTENT_FLAG_DATA)) {
9774                         path->slots[0]++;
9775                         continue;
9776                 }
9777
9778                 ret = populate_csum(trans, csum_root, buf, key.objectid,
9779                                     key.offset);
9780                 if (ret)
9781                         break;
9782                 path->slots[0]++;
9783         }
9784
9785         btrfs_free_path(path);
9786         free(buf);
9787         return ret;
9788 }
9789
9790 /*
9791  * Recalculate the csum and put it into the csum tree.
9792  *
9793  * Extent tree init will wipe out all the extent info, so in that case, we
9794  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
9795  * will use fs/subvol trees to init the csum tree.
9796  */
9797 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9798                           struct btrfs_root *csum_root,
9799                           int search_fs_tree)
9800 {
9801         if (search_fs_tree)
9802                 return fill_csum_tree_from_fs(trans, csum_root);
9803         else
9804                 return fill_csum_tree_from_extent(trans, csum_root);
9805 }
9806
9807 static void free_roots_info_cache(void)
9808 {
9809         if (!roots_info_cache)
9810                 return;
9811
9812         while (!cache_tree_empty(roots_info_cache)) {
9813                 struct cache_extent *entry;
9814                 struct root_item_info *rii;
9815
9816                 entry = first_cache_extent(roots_info_cache);
9817                 if (!entry)
9818                         break;
9819                 remove_cache_extent(roots_info_cache, entry);
9820                 rii = container_of(entry, struct root_item_info, cache_extent);
9821                 free(rii);
9822         }
9823
9824         free(roots_info_cache);
9825         roots_info_cache = NULL;
9826 }
9827
9828 static int build_roots_info_cache(struct btrfs_fs_info *info)
9829 {
9830         int ret = 0;
9831         struct btrfs_key key;
9832         struct extent_buffer *leaf;
9833         struct btrfs_path *path;
9834
9835         if (!roots_info_cache) {
9836                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9837                 if (!roots_info_cache)
9838                         return -ENOMEM;
9839                 cache_tree_init(roots_info_cache);
9840         }
9841
9842         path = btrfs_alloc_path();
9843         if (!path)
9844                 return -ENOMEM;
9845
9846         key.objectid = 0;
9847         key.type = BTRFS_EXTENT_ITEM_KEY;
9848         key.offset = 0;
9849
9850         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9851         if (ret < 0)
9852                 goto out;
9853         leaf = path->nodes[0];
9854
9855         while (1) {
9856                 struct btrfs_key found_key;
9857                 struct btrfs_extent_item *ei;
9858                 struct btrfs_extent_inline_ref *iref;
9859                 int slot = path->slots[0];
9860                 int type;
9861                 u64 flags;
9862                 u64 root_id;
9863                 u8 level;
9864                 struct cache_extent *entry;
9865                 struct root_item_info *rii;
9866
9867                 if (slot >= btrfs_header_nritems(leaf)) {
9868                         ret = btrfs_next_leaf(info->extent_root, path);
9869                         if (ret < 0) {
9870                                 break;
9871                         } else if (ret) {
9872                                 ret = 0;
9873                                 break;
9874                         }
9875                         leaf = path->nodes[0];
9876                         slot = path->slots[0];
9877                 }
9878
9879                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9880
9881                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9882                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9883                         goto next;
9884
9885                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9886                 flags = btrfs_extent_flags(leaf, ei);
9887
9888                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9889                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9890                         goto next;
9891
9892                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9893                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9894                         level = found_key.offset;
9895                 } else {
9896                         struct btrfs_tree_block_info *binfo;
9897
9898                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
9899                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9900                         level = btrfs_tree_block_level(leaf, binfo);
9901                 }
9902
9903                 /*
9904                  * For a root extent, it must be of the following type and the
9905                  * first (and only one) iref in the item.
9906                  */
9907                 type = btrfs_extent_inline_ref_type(leaf, iref);
9908                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9909                         goto next;
9910
9911                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9912                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9913                 if (!entry) {
9914                         rii = malloc(sizeof(struct root_item_info));
9915                         if (!rii) {
9916                                 ret = -ENOMEM;
9917                                 goto out;
9918                         }
9919                         rii->cache_extent.start = root_id;
9920                         rii->cache_extent.size = 1;
9921                         rii->level = (u8)-1;
9922                         entry = &rii->cache_extent;
9923                         ret = insert_cache_extent(roots_info_cache, entry);
9924                         ASSERT(ret == 0);
9925                 } else {
9926                         rii = container_of(entry, struct root_item_info,
9927                                            cache_extent);
9928                 }
9929
9930                 ASSERT(rii->cache_extent.start == root_id);
9931                 ASSERT(rii->cache_extent.size == 1);
9932
9933                 if (level > rii->level || rii->level == (u8)-1) {
9934                         rii->level = level;
9935                         rii->bytenr = found_key.objectid;
9936                         rii->gen = btrfs_extent_generation(leaf, ei);
9937                         rii->node_count = 1;
9938                 } else if (level == rii->level) {
9939                         rii->node_count++;
9940                 }
9941 next:
9942                 path->slots[0]++;
9943         }
9944
9945 out:
9946         btrfs_free_path(path);
9947
9948         return ret;
9949 }
9950
9951 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9952                                   struct btrfs_path *path,
9953                                   const struct btrfs_key *root_key,
9954                                   const int read_only_mode)
9955 {
9956         const u64 root_id = root_key->objectid;
9957         struct cache_extent *entry;
9958         struct root_item_info *rii;
9959         struct btrfs_root_item ri;
9960         unsigned long offset;
9961
9962         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9963         if (!entry) {
9964                 fprintf(stderr,
9965                         "Error: could not find extent items for root %llu\n",
9966                         root_key->objectid);
9967                 return -ENOENT;
9968         }
9969
9970         rii = container_of(entry, struct root_item_info, cache_extent);
9971         ASSERT(rii->cache_extent.start == root_id);
9972         ASSERT(rii->cache_extent.size == 1);
9973
9974         if (rii->node_count != 1) {
9975                 fprintf(stderr,
9976                         "Error: could not find btree root extent for root %llu\n",
9977                         root_id);
9978                 return -ENOENT;
9979         }
9980
9981         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9982         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9983
9984         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9985             btrfs_root_level(&ri) != rii->level ||
9986             btrfs_root_generation(&ri) != rii->gen) {
9987
9988                 /*
9989                  * If we're in repair mode but our caller told us to not update
9990                  * the root item, i.e. just check if it needs to be updated, don't
9991                  * print this message, since the caller will call us again shortly
9992                  * for the same root item without read only mode (the caller will
9993                  * open a transaction first).
9994                  */
9995                 if (!(read_only_mode && repair))
9996                         fprintf(stderr,
9997                                 "%sroot item for root %llu,"
9998                                 " current bytenr %llu, current gen %llu, current level %u,"
9999                                 " new bytenr %llu, new gen %llu, new level %u\n",
10000                                 (read_only_mode ? "" : "fixing "),
10001                                 root_id,
10002                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
10003                                 btrfs_root_level(&ri),
10004                                 rii->bytenr, rii->gen, rii->level);
10005
10006                 if (btrfs_root_generation(&ri) > rii->gen) {
10007                         fprintf(stderr,
10008                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
10009                                 root_id, btrfs_root_generation(&ri), rii->gen);
10010                         return -EINVAL;
10011                 }
10012
10013                 if (!read_only_mode) {
10014                         btrfs_set_root_bytenr(&ri, rii->bytenr);
10015                         btrfs_set_root_level(&ri, rii->level);
10016                         btrfs_set_root_generation(&ri, rii->gen);
10017                         write_extent_buffer(path->nodes[0], &ri,
10018                                             offset, sizeof(ri));
10019                 }
10020
10021                 return 1;
10022         }
10023
10024         return 0;
10025 }
10026
10027 /*
10028  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
10029  * caused read-only snapshots to be corrupted if they were created at a moment
10030  * when the source subvolume/snapshot had orphan items. The issue was that the
10031  * on-disk root items became incorrect, referring to the pre orphan cleanup root
10032  * node instead of the post orphan cleanup root node.
10033  * So this function, and its callees, just detects and fixes those cases. Even
10034  * though the regression was for read-only snapshots, this function applies to
10035  * any snapshot/subvolume root.
10036  * This must be run before any other repair code - not doing it so, makes other
10037  * repair code delete or modify backrefs in the extent tree for example, which
10038  * will result in an inconsistent fs after repairing the root items.
10039  */
10040 static int repair_root_items(struct btrfs_fs_info *info)
10041 {
10042         struct btrfs_path *path = NULL;
10043         struct btrfs_key key;
10044         struct extent_buffer *leaf;
10045         struct btrfs_trans_handle *trans = NULL;
10046         int ret = 0;
10047         int bad_roots = 0;
10048         int need_trans = 0;
10049
10050         ret = build_roots_info_cache(info);
10051         if (ret)
10052                 goto out;
10053
10054         path = btrfs_alloc_path();
10055         if (!path) {
10056                 ret = -ENOMEM;
10057                 goto out;
10058         }
10059
10060         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
10061         key.type = BTRFS_ROOT_ITEM_KEY;
10062         key.offset = 0;
10063
10064 again:
10065         /*
10066          * Avoid opening and committing transactions if a leaf doesn't have
10067          * any root items that need to be fixed, so that we avoid rotating
10068          * backup roots unnecessarily.
10069          */
10070         if (need_trans) {
10071                 trans = btrfs_start_transaction(info->tree_root, 1);
10072                 if (IS_ERR(trans)) {
10073                         ret = PTR_ERR(trans);
10074                         goto out;
10075                 }
10076         }
10077
10078         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
10079                                 0, trans ? 1 : 0);
10080         if (ret < 0)
10081                 goto out;
10082         leaf = path->nodes[0];
10083
10084         while (1) {
10085                 struct btrfs_key found_key;
10086
10087                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
10088                         int no_more_keys = find_next_key(path, &key);
10089
10090                         btrfs_release_path(path);
10091                         if (trans) {
10092                                 ret = btrfs_commit_transaction(trans,
10093                                                                info->tree_root);
10094                                 trans = NULL;
10095                                 if (ret < 0)
10096                                         goto out;
10097                         }
10098                         need_trans = 0;
10099                         if (no_more_keys)
10100                                 break;
10101                         goto again;
10102                 }
10103
10104                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10105
10106                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
10107                         goto next;
10108                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
10109                         goto next;
10110
10111                 ret = maybe_repair_root_item(info, path, &found_key,
10112                                              trans ? 0 : 1);
10113                 if (ret < 0)
10114                         goto out;
10115                 if (ret) {
10116                         if (!trans && repair) {
10117                                 need_trans = 1;
10118                                 key = found_key;
10119                                 btrfs_release_path(path);
10120                                 goto again;
10121                         }
10122                         bad_roots++;
10123                 }
10124 next:
10125                 path->slots[0]++;
10126         }
10127         ret = 0;
10128 out:
10129         free_roots_info_cache();
10130         btrfs_free_path(path);
10131         if (trans)
10132                 btrfs_commit_transaction(trans, info->tree_root);
10133         if (ret < 0)
10134                 return ret;
10135
10136         return bad_roots;
10137 }
10138
10139 const char * const cmd_check_usage[] = {
10140         "btrfs check [options] <device>",
10141         "Check structural integrity of a filesystem (unmounted).",
10142         "Check structural integrity of an unmounted filesystem. Verify internal",
10143         "trees' consistency and item connectivity. In the repair mode try to",
10144         "fix the problems found.",
10145         "WARNING: the repair mode is considered dangerous",
10146         "",
10147         "-s|--super <superblock>     use this superblock copy",
10148         "-b|--backup                 use the first valid backup root copy",
10149         "--repair                    try to repair the filesystem",
10150         "--readonly                  run in read-only mode (default)",
10151         "--init-csum-tree            create a new CRC tree",
10152         "--init-extent-tree          create a new extent tree",
10153         "--check-data-csum           verify checksums of data blocks",
10154         "-Q|--qgroup-report           print a report on qgroup consistency",
10155         "-E|--subvol-extents <subvolid>",
10156         "                            print subvolume extents and sharing state",
10157         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
10158         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
10159         "-p|--progress               indicate progress",
10160         NULL
10161 };
10162
10163 int cmd_check(int argc, char **argv)
10164 {
10165         struct cache_tree root_cache;
10166         struct btrfs_root *root;
10167         struct btrfs_fs_info *info;
10168         u64 bytenr = 0;
10169         u64 subvolid = 0;
10170         u64 tree_root_bytenr = 0;
10171         u64 chunk_root_bytenr = 0;
10172         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
10173         int ret;
10174         u64 num;
10175         int init_csum_tree = 0;
10176         int readonly = 0;
10177         int qgroup_report = 0;
10178         int qgroups_repaired = 0;
10179         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
10180
10181         while(1) {
10182                 int c;
10183                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
10184                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
10185                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE };
10186                 static const struct option long_options[] = {
10187                         { "super", required_argument, NULL, 's' },
10188                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
10189                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
10190                         { "init-csum-tree", no_argument, NULL,
10191                                 GETOPT_VAL_INIT_CSUM },
10192                         { "init-extent-tree", no_argument, NULL,
10193                                 GETOPT_VAL_INIT_EXTENT },
10194                         { "check-data-csum", no_argument, NULL,
10195                                 GETOPT_VAL_CHECK_CSUM },
10196                         { "backup", no_argument, NULL, 'b' },
10197                         { "subvol-extents", required_argument, NULL, 'E' },
10198                         { "qgroup-report", no_argument, NULL, 'Q' },
10199                         { "tree-root", required_argument, NULL, 'r' },
10200                         { "chunk-root", required_argument, NULL,
10201                                 GETOPT_VAL_CHUNK_TREE },
10202                         { "progress", no_argument, NULL, 'p' },
10203                         { NULL, 0, NULL, 0}
10204                 };
10205
10206                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
10207                 if (c < 0)
10208                         break;
10209                 switch(c) {
10210                         case 'a': /* ignored */ break;
10211                         case 'b':
10212                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
10213                                 break;
10214                         case 's':
10215                                 num = arg_strtou64(optarg);
10216                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
10217                                         fprintf(stderr,
10218                                                 "ERROR: super mirror should be less than: %d\n",
10219                                                 BTRFS_SUPER_MIRROR_MAX);
10220                                         exit(1);
10221                                 }
10222                                 bytenr = btrfs_sb_offset(((int)num));
10223                                 printf("using SB copy %llu, bytenr %llu\n", num,
10224                                        (unsigned long long)bytenr);
10225                                 break;
10226                         case 'Q':
10227                                 qgroup_report = 1;
10228                                 break;
10229                         case 'E':
10230                                 subvolid = arg_strtou64(optarg);
10231                                 break;
10232                         case 'r':
10233                                 tree_root_bytenr = arg_strtou64(optarg);
10234                                 break;
10235                         case GETOPT_VAL_CHUNK_TREE:
10236                                 chunk_root_bytenr = arg_strtou64(optarg);
10237                                 break;
10238                         case 'p':
10239                                 ctx.progress_enabled = true;
10240                                 break;
10241                         case '?':
10242                         case 'h':
10243                                 usage(cmd_check_usage);
10244                         case GETOPT_VAL_REPAIR:
10245                                 printf("enabling repair mode\n");
10246                                 repair = 1;
10247                                 ctree_flags |= OPEN_CTREE_WRITES;
10248                                 break;
10249                         case GETOPT_VAL_READONLY:
10250                                 readonly = 1;
10251                                 break;
10252                         case GETOPT_VAL_INIT_CSUM:
10253                                 printf("Creating a new CRC tree\n");
10254                                 init_csum_tree = 1;
10255                                 repair = 1;
10256                                 ctree_flags |= OPEN_CTREE_WRITES;
10257                                 break;
10258                         case GETOPT_VAL_INIT_EXTENT:
10259                                 init_extent_tree = 1;
10260                                 ctree_flags |= (OPEN_CTREE_WRITES |
10261                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
10262                                 repair = 1;
10263                                 break;
10264                         case GETOPT_VAL_CHECK_CSUM:
10265                                 check_data_csum = 1;
10266                                 break;
10267                 }
10268         }
10269
10270         if (check_argc_exact(argc - optind, 1))
10271                 usage(cmd_check_usage);
10272
10273         if (ctx.progress_enabled) {
10274                 ctx.tp = TASK_NOTHING;
10275                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
10276         }
10277
10278         /* This check is the only reason for --readonly to exist */
10279         if (readonly && repair) {
10280                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
10281                 exit(1);
10282         }
10283
10284         radix_tree_init();
10285         cache_tree_init(&root_cache);
10286
10287         if((ret = check_mounted(argv[optind])) < 0) {
10288                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
10289                 goto err_out;
10290         } else if(ret) {
10291                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
10292                 ret = -EBUSY;
10293                 goto err_out;
10294         }
10295
10296         /* only allow partial opening under repair mode */
10297         if (repair)
10298                 ctree_flags |= OPEN_CTREE_PARTIAL;
10299
10300         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
10301                                   chunk_root_bytenr, ctree_flags);
10302         if (!info) {
10303                 fprintf(stderr, "Couldn't open file system\n");
10304                 ret = -EIO;
10305                 goto err_out;
10306         }
10307
10308         global_info = info;
10309         root = info->fs_root;
10310
10311         /*
10312          * repair mode will force us to commit transaction which
10313          * will make us fail to load log tree when mounting.
10314          */
10315         if (repair && btrfs_super_log_root(info->super_copy)) {
10316                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
10317                 if (!ret) {
10318                         ret = 1;
10319                         goto close_out;
10320                 }
10321                 ret = zero_log_tree(root);
10322                 if (ret) {
10323                         fprintf(stderr, "fail to zero log tree\n");
10324                         goto close_out;
10325                 }
10326         }
10327
10328         uuid_unparse(info->super_copy->fsid, uuidbuf);
10329         if (qgroup_report) {
10330                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
10331                        uuidbuf);
10332                 ret = qgroup_verify_all(info);
10333                 if (ret == 0)
10334                         report_qgroups(1);
10335                 goto close_out;
10336         }
10337         if (subvolid) {
10338                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
10339                        subvolid, argv[optind], uuidbuf);
10340                 ret = print_extent_state(info, subvolid);
10341                 goto close_out;
10342         }
10343         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
10344
10345         if (!extent_buffer_uptodate(info->tree_root->node) ||
10346             !extent_buffer_uptodate(info->dev_root->node) ||
10347             !extent_buffer_uptodate(info->chunk_root->node)) {
10348                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
10349                 ret = -EIO;
10350                 goto close_out;
10351         }
10352
10353         if (init_extent_tree || init_csum_tree) {
10354                 struct btrfs_trans_handle *trans;
10355
10356                 trans = btrfs_start_transaction(info->extent_root, 0);
10357                 if (IS_ERR(trans)) {
10358                         fprintf(stderr, "Error starting transaction\n");
10359                         ret = PTR_ERR(trans);
10360                         goto close_out;
10361                 }
10362
10363                 if (init_extent_tree) {
10364                         printf("Creating a new extent tree\n");
10365                         ret = reinit_extent_tree(trans, info);
10366                         if (ret)
10367                                 goto close_out;
10368                 }
10369
10370                 if (init_csum_tree) {
10371                         fprintf(stderr, "Reinit crc root\n");
10372                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
10373                         if (ret) {
10374                                 fprintf(stderr, "crc root initialization failed\n");
10375                                 ret = -EIO;
10376                                 goto close_out;
10377                         }
10378
10379                         ret = fill_csum_tree(trans, info->csum_root,
10380                                              init_extent_tree);
10381                         if (ret) {
10382                                 fprintf(stderr, "crc refilling failed\n");
10383                                 return -EIO;
10384                         }
10385                 }
10386                 /*
10387                  * Ok now we commit and run the normal fsck, which will add
10388                  * extent entries for all of the items it finds.
10389                  */
10390                 ret = btrfs_commit_transaction(trans, info->extent_root);
10391                 if (ret)
10392                         goto close_out;
10393         }
10394         if (!extent_buffer_uptodate(info->extent_root->node)) {
10395                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
10396                 ret = -EIO;
10397                 goto close_out;
10398         }
10399         if (!extent_buffer_uptodate(info->csum_root->node)) {
10400                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
10401                 ret = -EIO;
10402                 goto close_out;
10403         }
10404
10405         if (!ctx.progress_enabled)
10406                 fprintf(stderr, "checking extents\n");
10407         ret = check_chunks_and_extents(root);
10408         if (ret)
10409                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
10410
10411         ret = repair_root_items(info);
10412         if (ret < 0)
10413                 goto close_out;
10414         if (repair) {
10415                 fprintf(stderr, "Fixed %d roots.\n", ret);
10416                 ret = 0;
10417         } else if (ret > 0) {
10418                 fprintf(stderr,
10419                        "Found %d roots with an outdated root item.\n",
10420                        ret);
10421                 fprintf(stderr,
10422                         "Please run a filesystem check with the option --repair to fix them.\n");
10423                 ret = 1;
10424                 goto close_out;
10425         }
10426
10427         if (!ctx.progress_enabled) {
10428                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
10429                         fprintf(stderr, "checking free space tree\n");
10430                 else
10431                         fprintf(stderr, "checking free space cache\n");
10432         }
10433         ret = check_space_cache(root);
10434         if (ret)
10435                 goto out;
10436
10437         /*
10438          * We used to have to have these hole extents in between our real
10439          * extents so if we don't have this flag set we need to make sure there
10440          * are no gaps in the file extents for inodes, otherwise we can just
10441          * ignore it when this happens.
10442          */
10443         no_holes = btrfs_fs_incompat(root->fs_info,
10444                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
10445         if (!ctx.progress_enabled)
10446                 fprintf(stderr, "checking fs roots\n");
10447         ret = check_fs_roots(root, &root_cache);
10448         if (ret)
10449                 goto out;
10450
10451         fprintf(stderr, "checking csums\n");
10452         ret = check_csums(root);
10453         if (ret)
10454                 goto out;
10455
10456         fprintf(stderr, "checking root refs\n");
10457         ret = check_root_refs(root, &root_cache);
10458         if (ret)
10459                 goto out;
10460
10461         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
10462                 struct extent_buffer *eb;
10463
10464                 eb = list_first_entry(&root->fs_info->recow_ebs,
10465                                       struct extent_buffer, recow);
10466                 list_del_init(&eb->recow);
10467                 ret = recow_extent_buffer(root, eb);
10468                 if (ret)
10469                         break;
10470         }
10471
10472         while (!list_empty(&delete_items)) {
10473                 struct bad_item *bad;
10474
10475                 bad = list_first_entry(&delete_items, struct bad_item, list);
10476                 list_del_init(&bad->list);
10477                 if (repair)
10478                         ret = delete_bad_item(root, bad);
10479                 free(bad);
10480         }
10481
10482         if (info->quota_enabled) {
10483                 int err;
10484                 fprintf(stderr, "checking quota groups\n");
10485                 err = qgroup_verify_all(info);
10486                 if (err)
10487                         goto out;
10488                 report_qgroups(0);
10489                 err = repair_qgroups(info, &qgroups_repaired);
10490                 if (err)
10491                         goto out;
10492         }
10493
10494         if (!list_empty(&root->fs_info->recow_ebs)) {
10495                 fprintf(stderr, "Transid errors in file system\n");
10496                 ret = 1;
10497         }
10498 out:
10499         /* Don't override original ret */
10500         if (!ret && qgroups_repaired)
10501                 ret = qgroups_repaired;
10502
10503         if (found_old_backref) { /*
10504                  * there was a disk format change when mixed
10505                  * backref was in testing tree. The old format
10506                  * existed about one week.
10507                  */
10508                 printf("\n * Found old mixed backref format. "
10509                        "The old format is not supported! *"
10510                        "\n * Please mount the FS in readonly mode, "
10511                        "backup data and re-format the FS. *\n\n");
10512                 ret = 1;
10513         }
10514         printf("found %llu bytes used err is %d\n",
10515                (unsigned long long)bytes_used, ret);
10516         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
10517         printf("total tree bytes: %llu\n",
10518                (unsigned long long)total_btree_bytes);
10519         printf("total fs tree bytes: %llu\n",
10520                (unsigned long long)total_fs_tree_bytes);
10521         printf("total extent tree bytes: %llu\n",
10522                (unsigned long long)total_extent_tree_bytes);
10523         printf("btree space waste bytes: %llu\n",
10524                (unsigned long long)btree_space_waste);
10525         printf("file data blocks allocated: %llu\n referenced %llu\n",
10526                 (unsigned long long)data_bytes_allocated,
10527                 (unsigned long long)data_bytes_referenced);
10528
10529         free_qgroup_counts();
10530         free_root_recs_tree(&root_cache);
10531 close_out:
10532         close_ctree(root);
10533 err_out:
10534         if (ctx.progress_enabled)
10535                 task_deinit(ctx.info);
10536
10537         return ret;
10538 }