btrfs-progs: check: introduce function to check tree block backref in extent tree
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
76
77 struct extent_backref {
78         struct rb_node node;
79         unsigned int is_data:1;
80         unsigned int found_extent_tree:1;
81         unsigned int full_backref:1;
82         unsigned int found_ref:1;
83         unsigned int broken:1;
84 };
85
86 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
87 {
88         return rb_entry(node, struct extent_backref, node);
89 }
90
91 struct data_backref {
92         struct extent_backref node;
93         union {
94                 u64 parent;
95                 u64 root;
96         };
97         u64 owner;
98         u64 offset;
99         u64 disk_bytenr;
100         u64 bytes;
101         u64 ram_bytes;
102         u32 num_refs;
103         u32 found_ref;
104 };
105
106 static inline struct data_backref* to_data_backref(struct extent_backref *back)
107 {
108         return container_of(back, struct data_backref, node);
109 }
110
111 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
112 {
113         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
114         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
115         struct data_backref *back1 = to_data_backref(ext1);
116         struct data_backref *back2 = to_data_backref(ext2);
117
118         WARN_ON(!ext1->is_data);
119         WARN_ON(!ext2->is_data);
120
121         /* parent and root are a union, so this covers both */
122         if (back1->parent > back2->parent)
123                 return 1;
124         if (back1->parent < back2->parent)
125                 return -1;
126
127         /* This is a full backref and the parents match. */
128         if (back1->node.full_backref)
129                 return 0;
130
131         if (back1->owner > back2->owner)
132                 return 1;
133         if (back1->owner < back2->owner)
134                 return -1;
135
136         if (back1->offset > back2->offset)
137                 return 1;
138         if (back1->offset < back2->offset)
139                 return -1;
140
141         if (back1->bytes > back2->bytes)
142                 return 1;
143         if (back1->bytes < back2->bytes)
144                 return -1;
145
146         if (back1->found_ref && back2->found_ref) {
147                 if (back1->disk_bytenr > back2->disk_bytenr)
148                         return 1;
149                 if (back1->disk_bytenr < back2->disk_bytenr)
150                         return -1;
151
152                 if (back1->found_ref > back2->found_ref)
153                         return 1;
154                 if (back1->found_ref < back2->found_ref)
155                         return -1;
156         }
157
158         return 0;
159 }
160
161 /*
162  * Much like data_backref, just removed the undetermined members
163  * and change it to use list_head.
164  * During extent scan, it is stored in root->orphan_data_extent.
165  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
166  */
167 struct orphan_data_extent {
168         struct list_head list;
169         u64 root;
170         u64 objectid;
171         u64 offset;
172         u64 disk_bytenr;
173         u64 disk_len;
174 };
175
176 struct tree_backref {
177         struct extent_backref node;
178         union {
179                 u64 parent;
180                 u64 root;
181         };
182 };
183
184 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
185 {
186         return container_of(back, struct tree_backref, node);
187 }
188
189 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
190 {
191         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
192         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
193         struct tree_backref *back1 = to_tree_backref(ext1);
194         struct tree_backref *back2 = to_tree_backref(ext2);
195
196         WARN_ON(ext1->is_data);
197         WARN_ON(ext2->is_data);
198
199         /* parent and root are a union, so this covers both */
200         if (back1->parent > back2->parent)
201                 return 1;
202         if (back1->parent < back2->parent)
203                 return -1;
204
205         return 0;
206 }
207
208 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
209 {
210         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
211         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
212
213         if (ext1->is_data > ext2->is_data)
214                 return 1;
215
216         if (ext1->is_data < ext2->is_data)
217                 return -1;
218
219         if (ext1->full_backref > ext2->full_backref)
220                 return 1;
221         if (ext1->full_backref < ext2->full_backref)
222                 return -1;
223
224         if (ext1->is_data)
225                 return compare_data_backref(node1, node2);
226         else
227                 return compare_tree_backref(node1, node2);
228 }
229
230 /* Explicit initialization for extent_record::flag_block_full_backref */
231 enum { FLAG_UNSET = 2 };
232
233 struct extent_record {
234         struct list_head backrefs;
235         struct list_head dups;
236         struct rb_root backref_tree;
237         struct list_head list;
238         struct cache_extent cache;
239         struct btrfs_disk_key parent_key;
240         u64 start;
241         u64 max_size;
242         u64 nr;
243         u64 refs;
244         u64 extent_item_refs;
245         u64 generation;
246         u64 parent_generation;
247         u64 info_objectid;
248         u32 num_duplicates;
249         u8 info_level;
250         unsigned int flag_block_full_backref:2;
251         unsigned int found_rec:1;
252         unsigned int content_checked:1;
253         unsigned int owner_ref_checked:1;
254         unsigned int is_root:1;
255         unsigned int metadata:1;
256         unsigned int bad_full_backref:1;
257         unsigned int crossing_stripes:1;
258         unsigned int wrong_chunk_type:1;
259 };
260
261 static inline struct extent_record* to_extent_record(struct list_head *entry)
262 {
263         return container_of(entry, struct extent_record, list);
264 }
265
266 struct inode_backref {
267         struct list_head list;
268         unsigned int found_dir_item:1;
269         unsigned int found_dir_index:1;
270         unsigned int found_inode_ref:1;
271         unsigned int filetype:8;
272         int errors;
273         unsigned int ref_type;
274         u64 dir;
275         u64 index;
276         u16 namelen;
277         char name[0];
278 };
279
280 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
281 {
282         return list_entry(entry, struct inode_backref, list);
283 }
284
285 struct root_item_record {
286         struct list_head list;
287         u64 objectid;
288         u64 bytenr;
289         u64 last_snapshot;
290         u8 level;
291         u8 drop_level;
292         int level_size;
293         struct btrfs_key drop_key;
294 };
295
296 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
297 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
298 #define REF_ERR_NO_INODE_REF            (1 << 2)
299 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
300 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
301 #define REF_ERR_DUP_INODE_REF           (1 << 5)
302 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
303 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
304 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
305 #define REF_ERR_NO_ROOT_REF             (1 << 9)
306 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
307 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
308 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
309
310 struct file_extent_hole {
311         struct rb_node node;
312         u64 start;
313         u64 len;
314 };
315
316 struct inode_record {
317         struct list_head backrefs;
318         unsigned int checked:1;
319         unsigned int merging:1;
320         unsigned int found_inode_item:1;
321         unsigned int found_dir_item:1;
322         unsigned int found_file_extent:1;
323         unsigned int found_csum_item:1;
324         unsigned int some_csum_missing:1;
325         unsigned int nodatasum:1;
326         int errors;
327
328         u64 ino;
329         u32 nlink;
330         u32 imode;
331         u64 isize;
332         u64 nbytes;
333
334         u32 found_link;
335         u64 found_size;
336         u64 extent_start;
337         u64 extent_end;
338         struct rb_root holes;
339         struct list_head orphan_extents;
340
341         u32 refs;
342 };
343
344 #define I_ERR_NO_INODE_ITEM             (1 << 0)
345 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
346 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
347 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
348 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
349 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
350 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
351 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
352 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
353 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
354 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
355 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
356 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
357 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
358 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
359
360 struct root_backref {
361         struct list_head list;
362         unsigned int found_dir_item:1;
363         unsigned int found_dir_index:1;
364         unsigned int found_back_ref:1;
365         unsigned int found_forward_ref:1;
366         unsigned int reachable:1;
367         int errors;
368         u64 ref_root;
369         u64 dir;
370         u64 index;
371         u16 namelen;
372         char name[0];
373 };
374
375 static inline struct root_backref* to_root_backref(struct list_head *entry)
376 {
377         return list_entry(entry, struct root_backref, list);
378 }
379
380 struct root_record {
381         struct list_head backrefs;
382         struct cache_extent cache;
383         unsigned int found_root_item:1;
384         u64 objectid;
385         u32 found_ref;
386 };
387
388 struct ptr_node {
389         struct cache_extent cache;
390         void *data;
391 };
392
393 struct shared_node {
394         struct cache_extent cache;
395         struct cache_tree root_cache;
396         struct cache_tree inode_cache;
397         struct inode_record *current;
398         u32 refs;
399 };
400
401 struct block_info {
402         u64 start;
403         u32 size;
404 };
405
406 struct walk_control {
407         struct cache_tree shared;
408         struct shared_node *nodes[BTRFS_MAX_LEVEL];
409         int active_node;
410         int root_level;
411 };
412
413 struct bad_item {
414         struct btrfs_key key;
415         u64 root_id;
416         struct list_head list;
417 };
418
419 struct extent_entry {
420         u64 bytenr;
421         u64 bytes;
422         int count;
423         int broken;
424         struct list_head list;
425 };
426
427 struct root_item_info {
428         /* level of the root */
429         u8 level;
430         /* number of nodes at this level, must be 1 for a root */
431         int node_count;
432         u64 bytenr;
433         u64 gen;
434         struct cache_extent cache_extent;
435 };
436
437 /*
438  * Error bit for low memory mode check.
439  *
440  * Currently no caller cares about it yet.  Just internal use for error
441  * classification.
442  */
443 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
444 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
445
446 static void *print_status_check(void *p)
447 {
448         struct task_ctx *priv = p;
449         const char work_indicator[] = { '.', 'o', 'O', 'o' };
450         uint32_t count = 0;
451         static char *task_position_string[] = {
452                 "checking extents",
453                 "checking free space cache",
454                 "checking fs roots",
455         };
456
457         task_period_start(priv->info, 1000 /* 1s */);
458
459         if (priv->tp == TASK_NOTHING)
460                 return NULL;
461
462         while (1) {
463                 printf("%s [%c]\r", task_position_string[priv->tp],
464                                 work_indicator[count % 4]);
465                 count++;
466                 fflush(stdout);
467                 task_period_wait(priv->info);
468         }
469         return NULL;
470 }
471
472 static int print_status_return(void *p)
473 {
474         printf("\n");
475         fflush(stdout);
476
477         return 0;
478 }
479
480 /* Compatible function to allow reuse of old codes */
481 static u64 first_extent_gap(struct rb_root *holes)
482 {
483         struct file_extent_hole *hole;
484
485         if (RB_EMPTY_ROOT(holes))
486                 return (u64)-1;
487
488         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
489         return hole->start;
490 }
491
492 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
493 {
494         struct file_extent_hole *hole1;
495         struct file_extent_hole *hole2;
496
497         hole1 = rb_entry(node1, struct file_extent_hole, node);
498         hole2 = rb_entry(node2, struct file_extent_hole, node);
499
500         if (hole1->start > hole2->start)
501                 return -1;
502         if (hole1->start < hole2->start)
503                 return 1;
504         /* Now hole1->start == hole2->start */
505         if (hole1->len >= hole2->len)
506                 /*
507                  * Hole 1 will be merge center
508                  * Same hole will be merged later
509                  */
510                 return -1;
511         /* Hole 2 will be merge center */
512         return 1;
513 }
514
515 /*
516  * Add a hole to the record
517  *
518  * This will do hole merge for copy_file_extent_holes(),
519  * which will ensure there won't be continuous holes.
520  */
521 static int add_file_extent_hole(struct rb_root *holes,
522                                 u64 start, u64 len)
523 {
524         struct file_extent_hole *hole;
525         struct file_extent_hole *prev = NULL;
526         struct file_extent_hole *next = NULL;
527
528         hole = malloc(sizeof(*hole));
529         if (!hole)
530                 return -ENOMEM;
531         hole->start = start;
532         hole->len = len;
533         /* Since compare will not return 0, no -EEXIST will happen */
534         rb_insert(holes, &hole->node, compare_hole);
535
536         /* simple merge with previous hole */
537         if (rb_prev(&hole->node))
538                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
539                                 node);
540         if (prev && prev->start + prev->len >= hole->start) {
541                 hole->len = hole->start + hole->len - prev->start;
542                 hole->start = prev->start;
543                 rb_erase(&prev->node, holes);
544                 free(prev);
545                 prev = NULL;
546         }
547
548         /* iterate merge with next holes */
549         while (1) {
550                 if (!rb_next(&hole->node))
551                         break;
552                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
553                                         node);
554                 if (hole->start + hole->len >= next->start) {
555                         if (hole->start + hole->len <= next->start + next->len)
556                                 hole->len = next->start + next->len -
557                                             hole->start;
558                         rb_erase(&next->node, holes);
559                         free(next);
560                         next = NULL;
561                 } else
562                         break;
563         }
564         return 0;
565 }
566
567 static int compare_hole_range(struct rb_node *node, void *data)
568 {
569         struct file_extent_hole *hole;
570         u64 start;
571
572         hole = (struct file_extent_hole *)data;
573         start = hole->start;
574
575         hole = rb_entry(node, struct file_extent_hole, node);
576         if (start < hole->start)
577                 return -1;
578         if (start >= hole->start && start < hole->start + hole->len)
579                 return 0;
580         return 1;
581 }
582
583 /*
584  * Delete a hole in the record
585  *
586  * This will do the hole split and is much restrict than add.
587  */
588 static int del_file_extent_hole(struct rb_root *holes,
589                                 u64 start, u64 len)
590 {
591         struct file_extent_hole *hole;
592         struct file_extent_hole tmp;
593         u64 prev_start = 0;
594         u64 prev_len = 0;
595         u64 next_start = 0;
596         u64 next_len = 0;
597         struct rb_node *node;
598         int have_prev = 0;
599         int have_next = 0;
600         int ret = 0;
601
602         tmp.start = start;
603         tmp.len = len;
604         node = rb_search(holes, &tmp, compare_hole_range, NULL);
605         if (!node)
606                 return -EEXIST;
607         hole = rb_entry(node, struct file_extent_hole, node);
608         if (start + len > hole->start + hole->len)
609                 return -EEXIST;
610
611         /*
612          * Now there will be no overlap, delete the hole and re-add the
613          * split(s) if they exists.
614          */
615         if (start > hole->start) {
616                 prev_start = hole->start;
617                 prev_len = start - hole->start;
618                 have_prev = 1;
619         }
620         if (hole->start + hole->len > start + len) {
621                 next_start = start + len;
622                 next_len = hole->start + hole->len - start - len;
623                 have_next = 1;
624         }
625         rb_erase(node, holes);
626         free(hole);
627         if (have_prev) {
628                 ret = add_file_extent_hole(holes, prev_start, prev_len);
629                 if (ret < 0)
630                         return ret;
631         }
632         if (have_next) {
633                 ret = add_file_extent_hole(holes, next_start, next_len);
634                 if (ret < 0)
635                         return ret;
636         }
637         return 0;
638 }
639
640 static int copy_file_extent_holes(struct rb_root *dst,
641                                   struct rb_root *src)
642 {
643         struct file_extent_hole *hole;
644         struct rb_node *node;
645         int ret = 0;
646
647         node = rb_first(src);
648         while (node) {
649                 hole = rb_entry(node, struct file_extent_hole, node);
650                 ret = add_file_extent_hole(dst, hole->start, hole->len);
651                 if (ret)
652                         break;
653                 node = rb_next(node);
654         }
655         return ret;
656 }
657
658 static void free_file_extent_holes(struct rb_root *holes)
659 {
660         struct rb_node *node;
661         struct file_extent_hole *hole;
662
663         node = rb_first(holes);
664         while (node) {
665                 hole = rb_entry(node, struct file_extent_hole, node);
666                 rb_erase(node, holes);
667                 free(hole);
668                 node = rb_first(holes);
669         }
670 }
671
672 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
673
674 static void record_root_in_trans(struct btrfs_trans_handle *trans,
675                                  struct btrfs_root *root)
676 {
677         if (root->last_trans != trans->transid) {
678                 root->track_dirty = 1;
679                 root->last_trans = trans->transid;
680                 root->commit_root = root->node;
681                 extent_buffer_get(root->node);
682         }
683 }
684
685 static u8 imode_to_type(u32 imode)
686 {
687 #define S_SHIFT 12
688         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
689                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
690                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
691                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
692                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
693                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
694                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
695                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
696         };
697
698         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
699 #undef S_SHIFT
700 }
701
702 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
703 {
704         struct device_record *rec1;
705         struct device_record *rec2;
706
707         rec1 = rb_entry(node1, struct device_record, node);
708         rec2 = rb_entry(node2, struct device_record, node);
709         if (rec1->devid > rec2->devid)
710                 return -1;
711         else if (rec1->devid < rec2->devid)
712                 return 1;
713         else
714                 return 0;
715 }
716
717 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
718 {
719         struct inode_record *rec;
720         struct inode_backref *backref;
721         struct inode_backref *orig;
722         struct inode_backref *tmp;
723         struct orphan_data_extent *src_orphan;
724         struct orphan_data_extent *dst_orphan;
725         size_t size;
726         int ret;
727
728         rec = malloc(sizeof(*rec));
729         if (!rec)
730                 return ERR_PTR(-ENOMEM);
731         memcpy(rec, orig_rec, sizeof(*rec));
732         rec->refs = 1;
733         INIT_LIST_HEAD(&rec->backrefs);
734         INIT_LIST_HEAD(&rec->orphan_extents);
735         rec->holes = RB_ROOT;
736
737         list_for_each_entry(orig, &orig_rec->backrefs, list) {
738                 size = sizeof(*orig) + orig->namelen + 1;
739                 backref = malloc(size);
740                 if (!backref) {
741                         ret = -ENOMEM;
742                         goto cleanup;
743                 }
744                 memcpy(backref, orig, size);
745                 list_add_tail(&backref->list, &rec->backrefs);
746         }
747         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
748                 dst_orphan = malloc(sizeof(*dst_orphan));
749                 if (!dst_orphan) {
750                         ret = -ENOMEM;
751                         goto cleanup;
752                 }
753                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
754                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
755         }
756         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
757         BUG_ON(ret < 0);
758
759         return rec;
760
761 cleanup:
762         if (!list_empty(&rec->backrefs))
763                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
764                         list_del(&orig->list);
765                         free(orig);
766                 }
767
768         if (!list_empty(&rec->orphan_extents))
769                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
770                         list_del(&orig->list);
771                         free(orig);
772                 }
773
774         free(rec);
775
776         return ERR_PTR(ret);
777 }
778
779 static void print_orphan_data_extents(struct list_head *orphan_extents,
780                                       u64 objectid)
781 {
782         struct orphan_data_extent *orphan;
783
784         if (list_empty(orphan_extents))
785                 return;
786         printf("The following data extent is lost in tree %llu:\n",
787                objectid);
788         list_for_each_entry(orphan, orphan_extents, list) {
789                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
790                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
791                        orphan->disk_len);
792         }
793 }
794
795 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
796 {
797         u64 root_objectid = root->root_key.objectid;
798         int errors = rec->errors;
799
800         if (!errors)
801                 return;
802         /* reloc root errors, we print its corresponding fs root objectid*/
803         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
804                 root_objectid = root->root_key.offset;
805                 fprintf(stderr, "reloc");
806         }
807         fprintf(stderr, "root %llu inode %llu errors %x",
808                 (unsigned long long) root_objectid,
809                 (unsigned long long) rec->ino, rec->errors);
810
811         if (errors & I_ERR_NO_INODE_ITEM)
812                 fprintf(stderr, ", no inode item");
813         if (errors & I_ERR_NO_ORPHAN_ITEM)
814                 fprintf(stderr, ", no orphan item");
815         if (errors & I_ERR_DUP_INODE_ITEM)
816                 fprintf(stderr, ", dup inode item");
817         if (errors & I_ERR_DUP_DIR_INDEX)
818                 fprintf(stderr, ", dup dir index");
819         if (errors & I_ERR_ODD_DIR_ITEM)
820                 fprintf(stderr, ", odd dir item");
821         if (errors & I_ERR_ODD_FILE_EXTENT)
822                 fprintf(stderr, ", odd file extent");
823         if (errors & I_ERR_BAD_FILE_EXTENT)
824                 fprintf(stderr, ", bad file extent");
825         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
826                 fprintf(stderr, ", file extent overlap");
827         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
828                 fprintf(stderr, ", file extent discount");
829         if (errors & I_ERR_DIR_ISIZE_WRONG)
830                 fprintf(stderr, ", dir isize wrong");
831         if (errors & I_ERR_FILE_NBYTES_WRONG)
832                 fprintf(stderr, ", nbytes wrong");
833         if (errors & I_ERR_ODD_CSUM_ITEM)
834                 fprintf(stderr, ", odd csum item");
835         if (errors & I_ERR_SOME_CSUM_MISSING)
836                 fprintf(stderr, ", some csum missing");
837         if (errors & I_ERR_LINK_COUNT_WRONG)
838                 fprintf(stderr, ", link count wrong");
839         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
840                 fprintf(stderr, ", orphan file extent");
841         fprintf(stderr, "\n");
842         /* Print the orphan extents if needed */
843         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
844                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
845
846         /* Print the holes if needed */
847         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
848                 struct file_extent_hole *hole;
849                 struct rb_node *node;
850                 int found = 0;
851
852                 node = rb_first(&rec->holes);
853                 fprintf(stderr, "Found file extent holes:\n");
854                 while (node) {
855                         found = 1;
856                         hole = rb_entry(node, struct file_extent_hole, node);
857                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
858                                 hole->start, hole->len);
859                         node = rb_next(node);
860                 }
861                 if (!found)
862                         fprintf(stderr, "\tstart: 0, len: %llu\n",
863                                 round_up(rec->isize, root->sectorsize));
864         }
865 }
866
867 static void print_ref_error(int errors)
868 {
869         if (errors & REF_ERR_NO_DIR_ITEM)
870                 fprintf(stderr, ", no dir item");
871         if (errors & REF_ERR_NO_DIR_INDEX)
872                 fprintf(stderr, ", no dir index");
873         if (errors & REF_ERR_NO_INODE_REF)
874                 fprintf(stderr, ", no inode ref");
875         if (errors & REF_ERR_DUP_DIR_ITEM)
876                 fprintf(stderr, ", dup dir item");
877         if (errors & REF_ERR_DUP_DIR_INDEX)
878                 fprintf(stderr, ", dup dir index");
879         if (errors & REF_ERR_DUP_INODE_REF)
880                 fprintf(stderr, ", dup inode ref");
881         if (errors & REF_ERR_INDEX_UNMATCH)
882                 fprintf(stderr, ", index mismatch");
883         if (errors & REF_ERR_FILETYPE_UNMATCH)
884                 fprintf(stderr, ", filetype mismatch");
885         if (errors & REF_ERR_NAME_TOO_LONG)
886                 fprintf(stderr, ", name too long");
887         if (errors & REF_ERR_NO_ROOT_REF)
888                 fprintf(stderr, ", no root ref");
889         if (errors & REF_ERR_NO_ROOT_BACKREF)
890                 fprintf(stderr, ", no root backref");
891         if (errors & REF_ERR_DUP_ROOT_REF)
892                 fprintf(stderr, ", dup root ref");
893         if (errors & REF_ERR_DUP_ROOT_BACKREF)
894                 fprintf(stderr, ", dup root backref");
895         fprintf(stderr, "\n");
896 }
897
898 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
899                                           u64 ino, int mod)
900 {
901         struct ptr_node *node;
902         struct cache_extent *cache;
903         struct inode_record *rec = NULL;
904         int ret;
905
906         cache = lookup_cache_extent(inode_cache, ino, 1);
907         if (cache) {
908                 node = container_of(cache, struct ptr_node, cache);
909                 rec = node->data;
910                 if (mod && rec->refs > 1) {
911                         node->data = clone_inode_rec(rec);
912                         if (IS_ERR(node->data))
913                                 return node->data;
914                         rec->refs--;
915                         rec = node->data;
916                 }
917         } else if (mod) {
918                 rec = calloc(1, sizeof(*rec));
919                 if (!rec)
920                         return ERR_PTR(-ENOMEM);
921                 rec->ino = ino;
922                 rec->extent_start = (u64)-1;
923                 rec->refs = 1;
924                 INIT_LIST_HEAD(&rec->backrefs);
925                 INIT_LIST_HEAD(&rec->orphan_extents);
926                 rec->holes = RB_ROOT;
927
928                 node = malloc(sizeof(*node));
929                 if (!node) {
930                         free(rec);
931                         return ERR_PTR(-ENOMEM);
932                 }
933                 node->cache.start = ino;
934                 node->cache.size = 1;
935                 node->data = rec;
936
937                 if (ino == BTRFS_FREE_INO_OBJECTID)
938                         rec->found_link = 1;
939
940                 ret = insert_cache_extent(inode_cache, &node->cache);
941                 if (ret)
942                         return ERR_PTR(-EEXIST);
943         }
944         return rec;
945 }
946
947 static void free_orphan_data_extents(struct list_head *orphan_extents)
948 {
949         struct orphan_data_extent *orphan;
950
951         while (!list_empty(orphan_extents)) {
952                 orphan = list_entry(orphan_extents->next,
953                                     struct orphan_data_extent, list);
954                 list_del(&orphan->list);
955                 free(orphan);
956         }
957 }
958
959 static void free_inode_rec(struct inode_record *rec)
960 {
961         struct inode_backref *backref;
962
963         if (--rec->refs > 0)
964                 return;
965
966         while (!list_empty(&rec->backrefs)) {
967                 backref = to_inode_backref(rec->backrefs.next);
968                 list_del(&backref->list);
969                 free(backref);
970         }
971         free_orphan_data_extents(&rec->orphan_extents);
972         free_file_extent_holes(&rec->holes);
973         free(rec);
974 }
975
976 static int can_free_inode_rec(struct inode_record *rec)
977 {
978         if (!rec->errors && rec->checked && rec->found_inode_item &&
979             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
980                 return 1;
981         return 0;
982 }
983
984 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
985                                  struct inode_record *rec)
986 {
987         struct cache_extent *cache;
988         struct inode_backref *tmp, *backref;
989         struct ptr_node *node;
990         unsigned char filetype;
991
992         if (!rec->found_inode_item)
993                 return;
994
995         filetype = imode_to_type(rec->imode);
996         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
997                 if (backref->found_dir_item && backref->found_dir_index) {
998                         if (backref->filetype != filetype)
999                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1000                         if (!backref->errors && backref->found_inode_ref &&
1001                             rec->nlink == rec->found_link) {
1002                                 list_del(&backref->list);
1003                                 free(backref);
1004                         }
1005                 }
1006         }
1007
1008         if (!rec->checked || rec->merging)
1009                 return;
1010
1011         if (S_ISDIR(rec->imode)) {
1012                 if (rec->found_size != rec->isize)
1013                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1014                 if (rec->found_file_extent)
1015                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1016         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1017                 if (rec->found_dir_item)
1018                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1019                 if (rec->found_size != rec->nbytes)
1020                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1021                 if (rec->nlink > 0 && !no_holes &&
1022                     (rec->extent_end < rec->isize ||
1023                      first_extent_gap(&rec->holes) < rec->isize))
1024                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1025         }
1026
1027         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1028                 if (rec->found_csum_item && rec->nodatasum)
1029                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1030                 if (rec->some_csum_missing && !rec->nodatasum)
1031                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1032         }
1033
1034         BUG_ON(rec->refs != 1);
1035         if (can_free_inode_rec(rec)) {
1036                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1037                 node = container_of(cache, struct ptr_node, cache);
1038                 BUG_ON(node->data != rec);
1039                 remove_cache_extent(inode_cache, &node->cache);
1040                 free(node);
1041                 free_inode_rec(rec);
1042         }
1043 }
1044
1045 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1046 {
1047         struct btrfs_path path;
1048         struct btrfs_key key;
1049         int ret;
1050
1051         key.objectid = BTRFS_ORPHAN_OBJECTID;
1052         key.type = BTRFS_ORPHAN_ITEM_KEY;
1053         key.offset = ino;
1054
1055         btrfs_init_path(&path);
1056         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1057         btrfs_release_path(&path);
1058         if (ret > 0)
1059                 ret = -ENOENT;
1060         return ret;
1061 }
1062
1063 static int process_inode_item(struct extent_buffer *eb,
1064                               int slot, struct btrfs_key *key,
1065                               struct shared_node *active_node)
1066 {
1067         struct inode_record *rec;
1068         struct btrfs_inode_item *item;
1069
1070         rec = active_node->current;
1071         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1072         if (rec->found_inode_item) {
1073                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1074                 return 1;
1075         }
1076         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1077         rec->nlink = btrfs_inode_nlink(eb, item);
1078         rec->isize = btrfs_inode_size(eb, item);
1079         rec->nbytes = btrfs_inode_nbytes(eb, item);
1080         rec->imode = btrfs_inode_mode(eb, item);
1081         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1082                 rec->nodatasum = 1;
1083         rec->found_inode_item = 1;
1084         if (rec->nlink == 0)
1085                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1086         maybe_free_inode_rec(&active_node->inode_cache, rec);
1087         return 0;
1088 }
1089
1090 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1091                                                 const char *name,
1092                                                 int namelen, u64 dir)
1093 {
1094         struct inode_backref *backref;
1095
1096         list_for_each_entry(backref, &rec->backrefs, list) {
1097                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1098                         break;
1099                 if (backref->dir != dir || backref->namelen != namelen)
1100                         continue;
1101                 if (memcmp(name, backref->name, namelen))
1102                         continue;
1103                 return backref;
1104         }
1105
1106         backref = malloc(sizeof(*backref) + namelen + 1);
1107         if (!backref)
1108                 return NULL;
1109         memset(backref, 0, sizeof(*backref));
1110         backref->dir = dir;
1111         backref->namelen = namelen;
1112         memcpy(backref->name, name, namelen);
1113         backref->name[namelen] = '\0';
1114         list_add_tail(&backref->list, &rec->backrefs);
1115         return backref;
1116 }
1117
1118 static int add_inode_backref(struct cache_tree *inode_cache,
1119                              u64 ino, u64 dir, u64 index,
1120                              const char *name, int namelen,
1121                              int filetype, int itemtype, int errors)
1122 {
1123         struct inode_record *rec;
1124         struct inode_backref *backref;
1125
1126         rec = get_inode_rec(inode_cache, ino, 1);
1127         BUG_ON(IS_ERR(rec));
1128         backref = get_inode_backref(rec, name, namelen, dir);
1129         BUG_ON(!backref);
1130         if (errors)
1131                 backref->errors |= errors;
1132         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1133                 if (backref->found_dir_index)
1134                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1135                 if (backref->found_inode_ref && backref->index != index)
1136                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1137                 if (backref->found_dir_item && backref->filetype != filetype)
1138                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1139
1140                 backref->index = index;
1141                 backref->filetype = filetype;
1142                 backref->found_dir_index = 1;
1143         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1144                 rec->found_link++;
1145                 if (backref->found_dir_item)
1146                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1147                 if (backref->found_dir_index && backref->filetype != filetype)
1148                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1149
1150                 backref->filetype = filetype;
1151                 backref->found_dir_item = 1;
1152         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1153                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1154                 if (backref->found_inode_ref)
1155                         backref->errors |= REF_ERR_DUP_INODE_REF;
1156                 if (backref->found_dir_index && backref->index != index)
1157                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1158                 else
1159                         backref->index = index;
1160
1161                 backref->ref_type = itemtype;
1162                 backref->found_inode_ref = 1;
1163         } else {
1164                 BUG_ON(1);
1165         }
1166
1167         maybe_free_inode_rec(inode_cache, rec);
1168         return 0;
1169 }
1170
1171 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1172                             struct cache_tree *dst_cache)
1173 {
1174         struct inode_backref *backref;
1175         u32 dir_count = 0;
1176         int ret = 0;
1177
1178         dst->merging = 1;
1179         list_for_each_entry(backref, &src->backrefs, list) {
1180                 if (backref->found_dir_index) {
1181                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1182                                         backref->index, backref->name,
1183                                         backref->namelen, backref->filetype,
1184                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1185                 }
1186                 if (backref->found_dir_item) {
1187                         dir_count++;
1188                         add_inode_backref(dst_cache, dst->ino,
1189                                         backref->dir, 0, backref->name,
1190                                         backref->namelen, backref->filetype,
1191                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1192                 }
1193                 if (backref->found_inode_ref) {
1194                         add_inode_backref(dst_cache, dst->ino,
1195                                         backref->dir, backref->index,
1196                                         backref->name, backref->namelen, 0,
1197                                         backref->ref_type, backref->errors);
1198                 }
1199         }
1200
1201         if (src->found_dir_item)
1202                 dst->found_dir_item = 1;
1203         if (src->found_file_extent)
1204                 dst->found_file_extent = 1;
1205         if (src->found_csum_item)
1206                 dst->found_csum_item = 1;
1207         if (src->some_csum_missing)
1208                 dst->some_csum_missing = 1;
1209         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1210                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1211                 if (ret < 0)
1212                         return ret;
1213         }
1214
1215         BUG_ON(src->found_link < dir_count);
1216         dst->found_link += src->found_link - dir_count;
1217         dst->found_size += src->found_size;
1218         if (src->extent_start != (u64)-1) {
1219                 if (dst->extent_start == (u64)-1) {
1220                         dst->extent_start = src->extent_start;
1221                         dst->extent_end = src->extent_end;
1222                 } else {
1223                         if (dst->extent_end > src->extent_start)
1224                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1225                         else if (dst->extent_end < src->extent_start) {
1226                                 ret = add_file_extent_hole(&dst->holes,
1227                                         dst->extent_end,
1228                                         src->extent_start - dst->extent_end);
1229                         }
1230                         if (dst->extent_end < src->extent_end)
1231                                 dst->extent_end = src->extent_end;
1232                 }
1233         }
1234
1235         dst->errors |= src->errors;
1236         if (src->found_inode_item) {
1237                 if (!dst->found_inode_item) {
1238                         dst->nlink = src->nlink;
1239                         dst->isize = src->isize;
1240                         dst->nbytes = src->nbytes;
1241                         dst->imode = src->imode;
1242                         dst->nodatasum = src->nodatasum;
1243                         dst->found_inode_item = 1;
1244                 } else {
1245                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1246                 }
1247         }
1248         dst->merging = 0;
1249
1250         return 0;
1251 }
1252
1253 static int splice_shared_node(struct shared_node *src_node,
1254                               struct shared_node *dst_node)
1255 {
1256         struct cache_extent *cache;
1257         struct ptr_node *node, *ins;
1258         struct cache_tree *src, *dst;
1259         struct inode_record *rec, *conflict;
1260         u64 current_ino = 0;
1261         int splice = 0;
1262         int ret;
1263
1264         if (--src_node->refs == 0)
1265                 splice = 1;
1266         if (src_node->current)
1267                 current_ino = src_node->current->ino;
1268
1269         src = &src_node->root_cache;
1270         dst = &dst_node->root_cache;
1271 again:
1272         cache = search_cache_extent(src, 0);
1273         while (cache) {
1274                 node = container_of(cache, struct ptr_node, cache);
1275                 rec = node->data;
1276                 cache = next_cache_extent(cache);
1277
1278                 if (splice) {
1279                         remove_cache_extent(src, &node->cache);
1280                         ins = node;
1281                 } else {
1282                         ins = malloc(sizeof(*ins));
1283                         BUG_ON(!ins);
1284                         ins->cache.start = node->cache.start;
1285                         ins->cache.size = node->cache.size;
1286                         ins->data = rec;
1287                         rec->refs++;
1288                 }
1289                 ret = insert_cache_extent(dst, &ins->cache);
1290                 if (ret == -EEXIST) {
1291                         conflict = get_inode_rec(dst, rec->ino, 1);
1292                         BUG_ON(IS_ERR(conflict));
1293                         merge_inode_recs(rec, conflict, dst);
1294                         if (rec->checked) {
1295                                 conflict->checked = 1;
1296                                 if (dst_node->current == conflict)
1297                                         dst_node->current = NULL;
1298                         }
1299                         maybe_free_inode_rec(dst, conflict);
1300                         free_inode_rec(rec);
1301                         free(ins);
1302                 } else {
1303                         BUG_ON(ret);
1304                 }
1305         }
1306
1307         if (src == &src_node->root_cache) {
1308                 src = &src_node->inode_cache;
1309                 dst = &dst_node->inode_cache;
1310                 goto again;
1311         }
1312
1313         if (current_ino > 0 && (!dst_node->current ||
1314             current_ino > dst_node->current->ino)) {
1315                 if (dst_node->current) {
1316                         dst_node->current->checked = 1;
1317                         maybe_free_inode_rec(dst, dst_node->current);
1318                 }
1319                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1320                 BUG_ON(IS_ERR(dst_node->current));
1321         }
1322         return 0;
1323 }
1324
1325 static void free_inode_ptr(struct cache_extent *cache)
1326 {
1327         struct ptr_node *node;
1328         struct inode_record *rec;
1329
1330         node = container_of(cache, struct ptr_node, cache);
1331         rec = node->data;
1332         free_inode_rec(rec);
1333         free(node);
1334 }
1335
1336 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1337
1338 static struct shared_node *find_shared_node(struct cache_tree *shared,
1339                                             u64 bytenr)
1340 {
1341         struct cache_extent *cache;
1342         struct shared_node *node;
1343
1344         cache = lookup_cache_extent(shared, bytenr, 1);
1345         if (cache) {
1346                 node = container_of(cache, struct shared_node, cache);
1347                 return node;
1348         }
1349         return NULL;
1350 }
1351
1352 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1353 {
1354         int ret;
1355         struct shared_node *node;
1356
1357         node = calloc(1, sizeof(*node));
1358         if (!node)
1359                 return -ENOMEM;
1360         node->cache.start = bytenr;
1361         node->cache.size = 1;
1362         cache_tree_init(&node->root_cache);
1363         cache_tree_init(&node->inode_cache);
1364         node->refs = refs;
1365
1366         ret = insert_cache_extent(shared, &node->cache);
1367
1368         return ret;
1369 }
1370
1371 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1372                              struct walk_control *wc, int level)
1373 {
1374         struct shared_node *node;
1375         struct shared_node *dest;
1376         int ret;
1377
1378         if (level == wc->active_node)
1379                 return 0;
1380
1381         BUG_ON(wc->active_node <= level);
1382         node = find_shared_node(&wc->shared, bytenr);
1383         if (!node) {
1384                 ret = add_shared_node(&wc->shared, bytenr, refs);
1385                 BUG_ON(ret);
1386                 node = find_shared_node(&wc->shared, bytenr);
1387                 wc->nodes[level] = node;
1388                 wc->active_node = level;
1389                 return 0;
1390         }
1391
1392         if (wc->root_level == wc->active_node &&
1393             btrfs_root_refs(&root->root_item) == 0) {
1394                 if (--node->refs == 0) {
1395                         free_inode_recs_tree(&node->root_cache);
1396                         free_inode_recs_tree(&node->inode_cache);
1397                         remove_cache_extent(&wc->shared, &node->cache);
1398                         free(node);
1399                 }
1400                 return 1;
1401         }
1402
1403         dest = wc->nodes[wc->active_node];
1404         splice_shared_node(node, dest);
1405         if (node->refs == 0) {
1406                 remove_cache_extent(&wc->shared, &node->cache);
1407                 free(node);
1408         }
1409         return 1;
1410 }
1411
1412 static int leave_shared_node(struct btrfs_root *root,
1413                              struct walk_control *wc, int level)
1414 {
1415         struct shared_node *node;
1416         struct shared_node *dest;
1417         int i;
1418
1419         if (level == wc->root_level)
1420                 return 0;
1421
1422         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1423                 if (wc->nodes[i])
1424                         break;
1425         }
1426         BUG_ON(i >= BTRFS_MAX_LEVEL);
1427
1428         node = wc->nodes[wc->active_node];
1429         wc->nodes[wc->active_node] = NULL;
1430         wc->active_node = i;
1431
1432         dest = wc->nodes[wc->active_node];
1433         if (wc->active_node < wc->root_level ||
1434             btrfs_root_refs(&root->root_item) > 0) {
1435                 BUG_ON(node->refs <= 1);
1436                 splice_shared_node(node, dest);
1437         } else {
1438                 BUG_ON(node->refs < 2);
1439                 node->refs--;
1440         }
1441         return 0;
1442 }
1443
1444 /*
1445  * Returns:
1446  * < 0 - on error
1447  * 1   - if the root with id child_root_id is a child of root parent_root_id
1448  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1449  *       has other root(s) as parent(s)
1450  * 2   - if the root child_root_id doesn't have any parent roots
1451  */
1452 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1453                          u64 child_root_id)
1454 {
1455         struct btrfs_path path;
1456         struct btrfs_key key;
1457         struct extent_buffer *leaf;
1458         int has_parent = 0;
1459         int ret;
1460
1461         btrfs_init_path(&path);
1462
1463         key.objectid = parent_root_id;
1464         key.type = BTRFS_ROOT_REF_KEY;
1465         key.offset = child_root_id;
1466         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1467                                 0, 0);
1468         if (ret < 0)
1469                 return ret;
1470         btrfs_release_path(&path);
1471         if (!ret)
1472                 return 1;
1473
1474         key.objectid = child_root_id;
1475         key.type = BTRFS_ROOT_BACKREF_KEY;
1476         key.offset = 0;
1477         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1478                                 0, 0);
1479         if (ret < 0)
1480                 goto out;
1481
1482         while (1) {
1483                 leaf = path.nodes[0];
1484                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1485                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1486                         if (ret)
1487                                 break;
1488                         leaf = path.nodes[0];
1489                 }
1490
1491                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1492                 if (key.objectid != child_root_id ||
1493                     key.type != BTRFS_ROOT_BACKREF_KEY)
1494                         break;
1495
1496                 has_parent = 1;
1497
1498                 if (key.offset == parent_root_id) {
1499                         btrfs_release_path(&path);
1500                         return 1;
1501                 }
1502
1503                 path.slots[0]++;
1504         }
1505 out:
1506         btrfs_release_path(&path);
1507         if (ret < 0)
1508                 return ret;
1509         return has_parent ? 0 : 2;
1510 }
1511
1512 static int process_dir_item(struct btrfs_root *root,
1513                             struct extent_buffer *eb,
1514                             int slot, struct btrfs_key *key,
1515                             struct shared_node *active_node)
1516 {
1517         u32 total;
1518         u32 cur = 0;
1519         u32 len;
1520         u32 name_len;
1521         u32 data_len;
1522         int error;
1523         int nritems = 0;
1524         int filetype;
1525         struct btrfs_dir_item *di;
1526         struct inode_record *rec;
1527         struct cache_tree *root_cache;
1528         struct cache_tree *inode_cache;
1529         struct btrfs_key location;
1530         char namebuf[BTRFS_NAME_LEN];
1531
1532         root_cache = &active_node->root_cache;
1533         inode_cache = &active_node->inode_cache;
1534         rec = active_node->current;
1535         rec->found_dir_item = 1;
1536
1537         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1538         total = btrfs_item_size_nr(eb, slot);
1539         while (cur < total) {
1540                 nritems++;
1541                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1542                 name_len = btrfs_dir_name_len(eb, di);
1543                 data_len = btrfs_dir_data_len(eb, di);
1544                 filetype = btrfs_dir_type(eb, di);
1545
1546                 rec->found_size += name_len;
1547                 if (name_len <= BTRFS_NAME_LEN) {
1548                         len = name_len;
1549                         error = 0;
1550                 } else {
1551                         len = BTRFS_NAME_LEN;
1552                         error = REF_ERR_NAME_TOO_LONG;
1553                 }
1554                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1555
1556                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1557                         add_inode_backref(inode_cache, location.objectid,
1558                                           key->objectid, key->offset, namebuf,
1559                                           len, filetype, key->type, error);
1560                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1561                         add_inode_backref(root_cache, location.objectid,
1562                                           key->objectid, key->offset,
1563                                           namebuf, len, filetype,
1564                                           key->type, error);
1565                 } else {
1566                         fprintf(stderr, "invalid location in dir item %u\n",
1567                                 location.type);
1568                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1569                                           key->objectid, key->offset, namebuf,
1570                                           len, filetype, key->type, error);
1571                 }
1572
1573                 len = sizeof(*di) + name_len + data_len;
1574                 di = (struct btrfs_dir_item *)((char *)di + len);
1575                 cur += len;
1576         }
1577         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1578                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1579
1580         return 0;
1581 }
1582
1583 static int process_inode_ref(struct extent_buffer *eb,
1584                              int slot, struct btrfs_key *key,
1585                              struct shared_node *active_node)
1586 {
1587         u32 total;
1588         u32 cur = 0;
1589         u32 len;
1590         u32 name_len;
1591         u64 index;
1592         int error;
1593         struct cache_tree *inode_cache;
1594         struct btrfs_inode_ref *ref;
1595         char namebuf[BTRFS_NAME_LEN];
1596
1597         inode_cache = &active_node->inode_cache;
1598
1599         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1600         total = btrfs_item_size_nr(eb, slot);
1601         while (cur < total) {
1602                 name_len = btrfs_inode_ref_name_len(eb, ref);
1603                 index = btrfs_inode_ref_index(eb, ref);
1604                 if (name_len <= BTRFS_NAME_LEN) {
1605                         len = name_len;
1606                         error = 0;
1607                 } else {
1608                         len = BTRFS_NAME_LEN;
1609                         error = REF_ERR_NAME_TOO_LONG;
1610                 }
1611                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1612                 add_inode_backref(inode_cache, key->objectid, key->offset,
1613                                   index, namebuf, len, 0, key->type, error);
1614
1615                 len = sizeof(*ref) + name_len;
1616                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1617                 cur += len;
1618         }
1619         return 0;
1620 }
1621
1622 static int process_inode_extref(struct extent_buffer *eb,
1623                                 int slot, struct btrfs_key *key,
1624                                 struct shared_node *active_node)
1625 {
1626         u32 total;
1627         u32 cur = 0;
1628         u32 len;
1629         u32 name_len;
1630         u64 index;
1631         u64 parent;
1632         int error;
1633         struct cache_tree *inode_cache;
1634         struct btrfs_inode_extref *extref;
1635         char namebuf[BTRFS_NAME_LEN];
1636
1637         inode_cache = &active_node->inode_cache;
1638
1639         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1640         total = btrfs_item_size_nr(eb, slot);
1641         while (cur < total) {
1642                 name_len = btrfs_inode_extref_name_len(eb, extref);
1643                 index = btrfs_inode_extref_index(eb, extref);
1644                 parent = btrfs_inode_extref_parent(eb, extref);
1645                 if (name_len <= BTRFS_NAME_LEN) {
1646                         len = name_len;
1647                         error = 0;
1648                 } else {
1649                         len = BTRFS_NAME_LEN;
1650                         error = REF_ERR_NAME_TOO_LONG;
1651                 }
1652                 read_extent_buffer(eb, namebuf,
1653                                    (unsigned long)(extref + 1), len);
1654                 add_inode_backref(inode_cache, key->objectid, parent,
1655                                   index, namebuf, len, 0, key->type, error);
1656
1657                 len = sizeof(*extref) + name_len;
1658                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1659                 cur += len;
1660         }
1661         return 0;
1662
1663 }
1664
1665 static int count_csum_range(struct btrfs_root *root, u64 start,
1666                             u64 len, u64 *found)
1667 {
1668         struct btrfs_key key;
1669         struct btrfs_path path;
1670         struct extent_buffer *leaf;
1671         int ret;
1672         size_t size;
1673         *found = 0;
1674         u64 csum_end;
1675         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1676
1677         btrfs_init_path(&path);
1678
1679         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1680         key.offset = start;
1681         key.type = BTRFS_EXTENT_CSUM_KEY;
1682
1683         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1684                                 &key, &path, 0, 0);
1685         if (ret < 0)
1686                 goto out;
1687         if (ret > 0 && path.slots[0] > 0) {
1688                 leaf = path.nodes[0];
1689                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1690                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1691                     key.type == BTRFS_EXTENT_CSUM_KEY)
1692                         path.slots[0]--;
1693         }
1694
1695         while (len > 0) {
1696                 leaf = path.nodes[0];
1697                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1698                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1699                         if (ret > 0)
1700                                 break;
1701                         else if (ret < 0)
1702                                 goto out;
1703                         leaf = path.nodes[0];
1704                 }
1705
1706                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1707                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1708                     key.type != BTRFS_EXTENT_CSUM_KEY)
1709                         break;
1710
1711                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1712                 if (key.offset >= start + len)
1713                         break;
1714
1715                 if (key.offset > start)
1716                         start = key.offset;
1717
1718                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1719                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1720                 if (csum_end > start) {
1721                         size = min(csum_end - start, len);
1722                         len -= size;
1723                         start += size;
1724                         *found += size;
1725                 }
1726
1727                 path.slots[0]++;
1728         }
1729 out:
1730         btrfs_release_path(&path);
1731         if (ret < 0)
1732                 return ret;
1733         return 0;
1734 }
1735
1736 static int process_file_extent(struct btrfs_root *root,
1737                                 struct extent_buffer *eb,
1738                                 int slot, struct btrfs_key *key,
1739                                 struct shared_node *active_node)
1740 {
1741         struct inode_record *rec;
1742         struct btrfs_file_extent_item *fi;
1743         u64 num_bytes = 0;
1744         u64 disk_bytenr = 0;
1745         u64 extent_offset = 0;
1746         u64 mask = root->sectorsize - 1;
1747         int extent_type;
1748         int ret;
1749
1750         rec = active_node->current;
1751         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1752         rec->found_file_extent = 1;
1753
1754         if (rec->extent_start == (u64)-1) {
1755                 rec->extent_start = key->offset;
1756                 rec->extent_end = key->offset;
1757         }
1758
1759         if (rec->extent_end > key->offset)
1760                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1761         else if (rec->extent_end < key->offset) {
1762                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1763                                            key->offset - rec->extent_end);
1764                 if (ret < 0)
1765                         return ret;
1766         }
1767
1768         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1769         extent_type = btrfs_file_extent_type(eb, fi);
1770
1771         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1772                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1773                 if (num_bytes == 0)
1774                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1775                 rec->found_size += num_bytes;
1776                 num_bytes = (num_bytes + mask) & ~mask;
1777         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1778                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1779                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1780                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1781                 extent_offset = btrfs_file_extent_offset(eb, fi);
1782                 if (num_bytes == 0 || (num_bytes & mask))
1783                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1784                 if (num_bytes + extent_offset >
1785                     btrfs_file_extent_ram_bytes(eb, fi))
1786                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1787                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1788                     (btrfs_file_extent_compression(eb, fi) ||
1789                      btrfs_file_extent_encryption(eb, fi) ||
1790                      btrfs_file_extent_other_encoding(eb, fi)))
1791                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1792                 if (disk_bytenr > 0)
1793                         rec->found_size += num_bytes;
1794         } else {
1795                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1796         }
1797         rec->extent_end = key->offset + num_bytes;
1798
1799         /*
1800          * The data reloc tree will copy full extents into its inode and then
1801          * copy the corresponding csums.  Because the extent it copied could be
1802          * a preallocated extent that hasn't been written to yet there may be no
1803          * csums to copy, ergo we won't have csums for our file extent.  This is
1804          * ok so just don't bother checking csums if the inode belongs to the
1805          * data reloc tree.
1806          */
1807         if (disk_bytenr > 0 &&
1808             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1809                 u64 found;
1810                 if (btrfs_file_extent_compression(eb, fi))
1811                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1812                 else
1813                         disk_bytenr += extent_offset;
1814
1815                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1816                 if (ret < 0)
1817                         return ret;
1818                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1819                         if (found > 0)
1820                                 rec->found_csum_item = 1;
1821                         if (found < num_bytes)
1822                                 rec->some_csum_missing = 1;
1823                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1824                         if (found > 0)
1825                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1826                 }
1827         }
1828         return 0;
1829 }
1830
1831 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1832                             struct walk_control *wc)
1833 {
1834         struct btrfs_key key;
1835         u32 nritems;
1836         int i;
1837         int ret = 0;
1838         struct cache_tree *inode_cache;
1839         struct shared_node *active_node;
1840
1841         if (wc->root_level == wc->active_node &&
1842             btrfs_root_refs(&root->root_item) == 0)
1843                 return 0;
1844
1845         active_node = wc->nodes[wc->active_node];
1846         inode_cache = &active_node->inode_cache;
1847         nritems = btrfs_header_nritems(eb);
1848         for (i = 0; i < nritems; i++) {
1849                 btrfs_item_key_to_cpu(eb, &key, i);
1850
1851                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1852                         continue;
1853                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1854                         continue;
1855
1856                 if (active_node->current == NULL ||
1857                     active_node->current->ino < key.objectid) {
1858                         if (active_node->current) {
1859                                 active_node->current->checked = 1;
1860                                 maybe_free_inode_rec(inode_cache,
1861                                                      active_node->current);
1862                         }
1863                         active_node->current = get_inode_rec(inode_cache,
1864                                                              key.objectid, 1);
1865                         BUG_ON(IS_ERR(active_node->current));
1866                 }
1867                 switch (key.type) {
1868                 case BTRFS_DIR_ITEM_KEY:
1869                 case BTRFS_DIR_INDEX_KEY:
1870                         ret = process_dir_item(root, eb, i, &key, active_node);
1871                         break;
1872                 case BTRFS_INODE_REF_KEY:
1873                         ret = process_inode_ref(eb, i, &key, active_node);
1874                         break;
1875                 case BTRFS_INODE_EXTREF_KEY:
1876                         ret = process_inode_extref(eb, i, &key, active_node);
1877                         break;
1878                 case BTRFS_INODE_ITEM_KEY:
1879                         ret = process_inode_item(eb, i, &key, active_node);
1880                         break;
1881                 case BTRFS_EXTENT_DATA_KEY:
1882                         ret = process_file_extent(root, eb, i, &key,
1883                                                   active_node);
1884                         break;
1885                 default:
1886                         break;
1887                 };
1888         }
1889         return ret;
1890 }
1891
1892 static void reada_walk_down(struct btrfs_root *root,
1893                             struct extent_buffer *node, int slot)
1894 {
1895         u64 bytenr;
1896         u64 ptr_gen;
1897         u32 nritems;
1898         u32 blocksize;
1899         int i;
1900         int level;
1901
1902         level = btrfs_header_level(node);
1903         if (level != 1)
1904                 return;
1905
1906         nritems = btrfs_header_nritems(node);
1907         blocksize = root->nodesize;
1908         for (i = slot; i < nritems; i++) {
1909                 bytenr = btrfs_node_blockptr(node, i);
1910                 ptr_gen = btrfs_node_ptr_generation(node, i);
1911                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1912         }
1913 }
1914
1915 /*
1916  * Check the child node/leaf by the following condition:
1917  * 1. the first item key of the node/leaf should be the same with the one
1918  *    in parent.
1919  * 2. block in parent node should match the child node/leaf.
1920  * 3. generation of parent node and child's header should be consistent.
1921  *
1922  * Or the child node/leaf pointed by the key in parent is not valid.
1923  *
1924  * We hope to check leaf owner too, but since subvol may share leaves,
1925  * which makes leaf owner check not so strong, key check should be
1926  * sufficient enough for that case.
1927  */
1928 static int check_child_node(struct btrfs_root *root,
1929                             struct extent_buffer *parent, int slot,
1930                             struct extent_buffer *child)
1931 {
1932         struct btrfs_key parent_key;
1933         struct btrfs_key child_key;
1934         int ret = 0;
1935
1936         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1937         if (btrfs_header_level(child) == 0)
1938                 btrfs_item_key_to_cpu(child, &child_key, 0);
1939         else
1940                 btrfs_node_key_to_cpu(child, &child_key, 0);
1941
1942         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1943                 ret = -EINVAL;
1944                 fprintf(stderr,
1945                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1946                         parent_key.objectid, parent_key.type, parent_key.offset,
1947                         child_key.objectid, child_key.type, child_key.offset);
1948         }
1949         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1950                 ret = -EINVAL;
1951                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1952                         btrfs_node_blockptr(parent, slot),
1953                         btrfs_header_bytenr(child));
1954         }
1955         if (btrfs_node_ptr_generation(parent, slot) !=
1956             btrfs_header_generation(child)) {
1957                 ret = -EINVAL;
1958                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1959                         btrfs_header_generation(child),
1960                         btrfs_node_ptr_generation(parent, slot));
1961         }
1962         return ret;
1963 }
1964
1965 struct node_refs {
1966         u64 bytenr[BTRFS_MAX_LEVEL];
1967         u64 refs[BTRFS_MAX_LEVEL];
1968 };
1969
1970 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1971                           struct walk_control *wc, int *level,
1972                           struct node_refs *nrefs)
1973 {
1974         enum btrfs_tree_block_status status;
1975         u64 bytenr;
1976         u64 ptr_gen;
1977         struct extent_buffer *next;
1978         struct extent_buffer *cur;
1979         u32 blocksize;
1980         int ret, err = 0;
1981         u64 refs;
1982
1983         WARN_ON(*level < 0);
1984         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1985
1986         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1987                 refs = nrefs->refs[*level];
1988                 ret = 0;
1989         } else {
1990                 ret = btrfs_lookup_extent_info(NULL, root,
1991                                        path->nodes[*level]->start,
1992                                        *level, 1, &refs, NULL);
1993                 if (ret < 0) {
1994                         err = ret;
1995                         goto out;
1996                 }
1997                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1998                 nrefs->refs[*level] = refs;
1999         }
2000
2001         if (refs > 1) {
2002                 ret = enter_shared_node(root, path->nodes[*level]->start,
2003                                         refs, wc, *level);
2004                 if (ret > 0) {
2005                         err = ret;
2006                         goto out;
2007                 }
2008         }
2009
2010         while (*level >= 0) {
2011                 WARN_ON(*level < 0);
2012                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2013                 cur = path->nodes[*level];
2014
2015                 if (btrfs_header_level(cur) != *level)
2016                         WARN_ON(1);
2017
2018                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2019                         break;
2020                 if (*level == 0) {
2021                         ret = process_one_leaf(root, cur, wc);
2022                         if (ret < 0)
2023                                 err = ret;
2024                         break;
2025                 }
2026                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2027                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2028                 blocksize = root->nodesize;
2029
2030                 if (bytenr == nrefs->bytenr[*level - 1]) {
2031                         refs = nrefs->refs[*level - 1];
2032                 } else {
2033                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2034                                         *level - 1, 1, &refs, NULL);
2035                         if (ret < 0) {
2036                                 refs = 0;
2037                         } else {
2038                                 nrefs->bytenr[*level - 1] = bytenr;
2039                                 nrefs->refs[*level - 1] = refs;
2040                         }
2041                 }
2042
2043                 if (refs > 1) {
2044                         ret = enter_shared_node(root, bytenr, refs,
2045                                                 wc, *level - 1);
2046                         if (ret > 0) {
2047                                 path->slots[*level]++;
2048                                 continue;
2049                         }
2050                 }
2051
2052                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2053                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2054                         free_extent_buffer(next);
2055                         reada_walk_down(root, cur, path->slots[*level]);
2056                         next = read_tree_block(root, bytenr, blocksize,
2057                                                ptr_gen);
2058                         if (!extent_buffer_uptodate(next)) {
2059                                 struct btrfs_key node_key;
2060
2061                                 btrfs_node_key_to_cpu(path->nodes[*level],
2062                                                       &node_key,
2063                                                       path->slots[*level]);
2064                                 btrfs_add_corrupt_extent_record(root->fs_info,
2065                                                 &node_key,
2066                                                 path->nodes[*level]->start,
2067                                                 root->nodesize, *level);
2068                                 err = -EIO;
2069                                 goto out;
2070                         }
2071                 }
2072
2073                 ret = check_child_node(root, cur, path->slots[*level], next);
2074                 if (ret) {
2075                         err = ret;
2076                         goto out;
2077                 }
2078
2079                 if (btrfs_is_leaf(next))
2080                         status = btrfs_check_leaf(root, NULL, next);
2081                 else
2082                         status = btrfs_check_node(root, NULL, next);
2083                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2084                         free_extent_buffer(next);
2085                         err = -EIO;
2086                         goto out;
2087                 }
2088
2089                 *level = *level - 1;
2090                 free_extent_buffer(path->nodes[*level]);
2091                 path->nodes[*level] = next;
2092                 path->slots[*level] = 0;
2093         }
2094 out:
2095         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2096         return err;
2097 }
2098
2099 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2100                         struct walk_control *wc, int *level)
2101 {
2102         int i;
2103         struct extent_buffer *leaf;
2104
2105         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2106                 leaf = path->nodes[i];
2107                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2108                         path->slots[i]++;
2109                         *level = i;
2110                         return 0;
2111                 } else {
2112                         free_extent_buffer(path->nodes[*level]);
2113                         path->nodes[*level] = NULL;
2114                         BUG_ON(*level > wc->active_node);
2115                         if (*level == wc->active_node)
2116                                 leave_shared_node(root, wc, *level);
2117                         *level = i + 1;
2118                 }
2119         }
2120         return 1;
2121 }
2122
2123 static int check_root_dir(struct inode_record *rec)
2124 {
2125         struct inode_backref *backref;
2126         int ret = -1;
2127
2128         if (!rec->found_inode_item || rec->errors)
2129                 goto out;
2130         if (rec->nlink != 1 || rec->found_link != 0)
2131                 goto out;
2132         if (list_empty(&rec->backrefs))
2133                 goto out;
2134         backref = to_inode_backref(rec->backrefs.next);
2135         if (!backref->found_inode_ref)
2136                 goto out;
2137         if (backref->index != 0 || backref->namelen != 2 ||
2138             memcmp(backref->name, "..", 2))
2139                 goto out;
2140         if (backref->found_dir_index || backref->found_dir_item)
2141                 goto out;
2142         ret = 0;
2143 out:
2144         return ret;
2145 }
2146
2147 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2148                               struct btrfs_root *root, struct btrfs_path *path,
2149                               struct inode_record *rec)
2150 {
2151         struct btrfs_inode_item *ei;
2152         struct btrfs_key key;
2153         int ret;
2154
2155         key.objectid = rec->ino;
2156         key.type = BTRFS_INODE_ITEM_KEY;
2157         key.offset = (u64)-1;
2158
2159         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2160         if (ret < 0)
2161                 goto out;
2162         if (ret) {
2163                 if (!path->slots[0]) {
2164                         ret = -ENOENT;
2165                         goto out;
2166                 }
2167                 path->slots[0]--;
2168                 ret = 0;
2169         }
2170         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2171         if (key.objectid != rec->ino) {
2172                 ret = -ENOENT;
2173                 goto out;
2174         }
2175
2176         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2177                             struct btrfs_inode_item);
2178         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2179         btrfs_mark_buffer_dirty(path->nodes[0]);
2180         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2181         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2182                root->root_key.objectid);
2183 out:
2184         btrfs_release_path(path);
2185         return ret;
2186 }
2187
2188 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2189                                     struct btrfs_root *root,
2190                                     struct btrfs_path *path,
2191                                     struct inode_record *rec)
2192 {
2193         int ret;
2194
2195         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2196         btrfs_release_path(path);
2197         if (!ret)
2198                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2199         return ret;
2200 }
2201
2202 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2203                                struct btrfs_root *root,
2204                                struct btrfs_path *path,
2205                                struct inode_record *rec)
2206 {
2207         struct btrfs_inode_item *ei;
2208         struct btrfs_key key;
2209         int ret = 0;
2210
2211         key.objectid = rec->ino;
2212         key.type = BTRFS_INODE_ITEM_KEY;
2213         key.offset = 0;
2214
2215         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2216         if (ret) {
2217                 if (ret > 0)
2218                         ret = -ENOENT;
2219                 goto out;
2220         }
2221
2222         /* Since ret == 0, no need to check anything */
2223         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2224                             struct btrfs_inode_item);
2225         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2226         btrfs_mark_buffer_dirty(path->nodes[0]);
2227         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2228         printf("reset nbytes for ino %llu root %llu\n",
2229                rec->ino, root->root_key.objectid);
2230 out:
2231         btrfs_release_path(path);
2232         return ret;
2233 }
2234
2235 static int add_missing_dir_index(struct btrfs_root *root,
2236                                  struct cache_tree *inode_cache,
2237                                  struct inode_record *rec,
2238                                  struct inode_backref *backref)
2239 {
2240         struct btrfs_path *path;
2241         struct btrfs_trans_handle *trans;
2242         struct btrfs_dir_item *dir_item;
2243         struct extent_buffer *leaf;
2244         struct btrfs_key key;
2245         struct btrfs_disk_key disk_key;
2246         struct inode_record *dir_rec;
2247         unsigned long name_ptr;
2248         u32 data_size = sizeof(*dir_item) + backref->namelen;
2249         int ret;
2250
2251         path = btrfs_alloc_path();
2252         if (!path)
2253                 return -ENOMEM;
2254
2255         trans = btrfs_start_transaction(root, 1);
2256         if (IS_ERR(trans)) {
2257                 btrfs_free_path(path);
2258                 return PTR_ERR(trans);
2259         }
2260
2261         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2262                 (unsigned long long)rec->ino);
2263         key.objectid = backref->dir;
2264         key.type = BTRFS_DIR_INDEX_KEY;
2265         key.offset = backref->index;
2266
2267         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2268         BUG_ON(ret);
2269
2270         leaf = path->nodes[0];
2271         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2272
2273         disk_key.objectid = cpu_to_le64(rec->ino);
2274         disk_key.type = BTRFS_INODE_ITEM_KEY;
2275         disk_key.offset = 0;
2276
2277         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2278         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2279         btrfs_set_dir_data_len(leaf, dir_item, 0);
2280         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2281         name_ptr = (unsigned long)(dir_item + 1);
2282         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2283         btrfs_mark_buffer_dirty(leaf);
2284         btrfs_free_path(path);
2285         btrfs_commit_transaction(trans, root);
2286
2287         backref->found_dir_index = 1;
2288         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2289         BUG_ON(IS_ERR(dir_rec));
2290         if (!dir_rec)
2291                 return 0;
2292         dir_rec->found_size += backref->namelen;
2293         if (dir_rec->found_size == dir_rec->isize &&
2294             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2295                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2296         if (dir_rec->found_size != dir_rec->isize)
2297                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2298
2299         return 0;
2300 }
2301
2302 static int delete_dir_index(struct btrfs_root *root,
2303                             struct cache_tree *inode_cache,
2304                             struct inode_record *rec,
2305                             struct inode_backref *backref)
2306 {
2307         struct btrfs_trans_handle *trans;
2308         struct btrfs_dir_item *di;
2309         struct btrfs_path *path;
2310         int ret = 0;
2311
2312         path = btrfs_alloc_path();
2313         if (!path)
2314                 return -ENOMEM;
2315
2316         trans = btrfs_start_transaction(root, 1);
2317         if (IS_ERR(trans)) {
2318                 btrfs_free_path(path);
2319                 return PTR_ERR(trans);
2320         }
2321
2322
2323         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2324                 (unsigned long long)backref->dir,
2325                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2326                 (unsigned long long)root->objectid);
2327
2328         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2329                                     backref->name, backref->namelen,
2330                                     backref->index, -1);
2331         if (IS_ERR(di)) {
2332                 ret = PTR_ERR(di);
2333                 btrfs_free_path(path);
2334                 btrfs_commit_transaction(trans, root);
2335                 if (ret == -ENOENT)
2336                         return 0;
2337                 return ret;
2338         }
2339
2340         if (!di)
2341                 ret = btrfs_del_item(trans, root, path);
2342         else
2343                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2344         BUG_ON(ret);
2345         btrfs_free_path(path);
2346         btrfs_commit_transaction(trans, root);
2347         return ret;
2348 }
2349
2350 static int create_inode_item(struct btrfs_root *root,
2351                              struct inode_record *rec,
2352                              struct inode_backref *backref, int root_dir)
2353 {
2354         struct btrfs_trans_handle *trans;
2355         struct btrfs_inode_item inode_item;
2356         time_t now = time(NULL);
2357         int ret;
2358
2359         trans = btrfs_start_transaction(root, 1);
2360         if (IS_ERR(trans)) {
2361                 ret = PTR_ERR(trans);
2362                 return ret;
2363         }
2364
2365         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2366                 "be incomplete, please check permissions and content after "
2367                 "the fsck completes.\n", (unsigned long long)root->objectid,
2368                 (unsigned long long)rec->ino);
2369
2370         memset(&inode_item, 0, sizeof(inode_item));
2371         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2372         if (root_dir)
2373                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2374         else
2375                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2376         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2377         if (rec->found_dir_item) {
2378                 if (rec->found_file_extent)
2379                         fprintf(stderr, "root %llu inode %llu has both a dir "
2380                                 "item and extents, unsure if it is a dir or a "
2381                                 "regular file so setting it as a directory\n",
2382                                 (unsigned long long)root->objectid,
2383                                 (unsigned long long)rec->ino);
2384                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2385                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2386         } else if (!rec->found_dir_item) {
2387                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2388                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2389         }
2390         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2391         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2392         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2393         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2394         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2395         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2396         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2397         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2398
2399         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2400         BUG_ON(ret);
2401         btrfs_commit_transaction(trans, root);
2402         return 0;
2403 }
2404
2405 static int repair_inode_backrefs(struct btrfs_root *root,
2406                                  struct inode_record *rec,
2407                                  struct cache_tree *inode_cache,
2408                                  int delete)
2409 {
2410         struct inode_backref *tmp, *backref;
2411         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2412         int ret = 0;
2413         int repaired = 0;
2414
2415         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2416                 if (!delete && rec->ino == root_dirid) {
2417                         if (!rec->found_inode_item) {
2418                                 ret = create_inode_item(root, rec, backref, 1);
2419                                 if (ret)
2420                                         break;
2421                                 repaired++;
2422                         }
2423                 }
2424
2425                 /* Index 0 for root dir's are special, don't mess with it */
2426                 if (rec->ino == root_dirid && backref->index == 0)
2427                         continue;
2428
2429                 if (delete &&
2430                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2431                      (backref->found_dir_index && backref->found_inode_ref &&
2432                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2433                         ret = delete_dir_index(root, inode_cache, rec, backref);
2434                         if (ret)
2435                                 break;
2436                         repaired++;
2437                         list_del(&backref->list);
2438                         free(backref);
2439                 }
2440
2441                 if (!delete && !backref->found_dir_index &&
2442                     backref->found_dir_item && backref->found_inode_ref) {
2443                         ret = add_missing_dir_index(root, inode_cache, rec,
2444                                                     backref);
2445                         if (ret)
2446                                 break;
2447                         repaired++;
2448                         if (backref->found_dir_item &&
2449                             backref->found_dir_index &&
2450                             backref->found_dir_index) {
2451                                 if (!backref->errors &&
2452                                     backref->found_inode_ref) {
2453                                         list_del(&backref->list);
2454                                         free(backref);
2455                                 }
2456                         }
2457                 }
2458
2459                 if (!delete && (!backref->found_dir_index &&
2460                                 !backref->found_dir_item &&
2461                                 backref->found_inode_ref)) {
2462                         struct btrfs_trans_handle *trans;
2463                         struct btrfs_key location;
2464
2465                         ret = check_dir_conflict(root, backref->name,
2466                                                  backref->namelen,
2467                                                  backref->dir,
2468                                                  backref->index);
2469                         if (ret) {
2470                                 /*
2471                                  * let nlink fixing routine to handle it,
2472                                  * which can do it better.
2473                                  */
2474                                 ret = 0;
2475                                 break;
2476                         }
2477                         location.objectid = rec->ino;
2478                         location.type = BTRFS_INODE_ITEM_KEY;
2479                         location.offset = 0;
2480
2481                         trans = btrfs_start_transaction(root, 1);
2482                         if (IS_ERR(trans)) {
2483                                 ret = PTR_ERR(trans);
2484                                 break;
2485                         }
2486                         fprintf(stderr, "adding missing dir index/item pair "
2487                                 "for inode %llu\n",
2488                                 (unsigned long long)rec->ino);
2489                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2490                                                     backref->namelen,
2491                                                     backref->dir, &location,
2492                                                     imode_to_type(rec->imode),
2493                                                     backref->index);
2494                         BUG_ON(ret);
2495                         btrfs_commit_transaction(trans, root);
2496                         repaired++;
2497                 }
2498
2499                 if (!delete && (backref->found_inode_ref &&
2500                                 backref->found_dir_index &&
2501                                 backref->found_dir_item &&
2502                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2503                                 !rec->found_inode_item)) {
2504                         ret = create_inode_item(root, rec, backref, 0);
2505                         if (ret)
2506                                 break;
2507                         repaired++;
2508                 }
2509
2510         }
2511         return ret ? ret : repaired;
2512 }
2513
2514 /*
2515  * To determine the file type for nlink/inode_item repair
2516  *
2517  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2518  * Return -ENOENT if file type is not found.
2519  */
2520 static int find_file_type(struct inode_record *rec, u8 *type)
2521 {
2522         struct inode_backref *backref;
2523
2524         /* For inode item recovered case */
2525         if (rec->found_inode_item) {
2526                 *type = imode_to_type(rec->imode);
2527                 return 0;
2528         }
2529
2530         list_for_each_entry(backref, &rec->backrefs, list) {
2531                 if (backref->found_dir_index || backref->found_dir_item) {
2532                         *type = backref->filetype;
2533                         return 0;
2534                 }
2535         }
2536         return -ENOENT;
2537 }
2538
2539 /*
2540  * To determine the file name for nlink repair
2541  *
2542  * Return 0 if file name is found, set name and namelen.
2543  * Return -ENOENT if file name is not found.
2544  */
2545 static int find_file_name(struct inode_record *rec,
2546                           char *name, int *namelen)
2547 {
2548         struct inode_backref *backref;
2549
2550         list_for_each_entry(backref, &rec->backrefs, list) {
2551                 if (backref->found_dir_index || backref->found_dir_item ||
2552                     backref->found_inode_ref) {
2553                         memcpy(name, backref->name, backref->namelen);
2554                         *namelen = backref->namelen;
2555                         return 0;
2556                 }
2557         }
2558         return -ENOENT;
2559 }
2560
2561 /* Reset the nlink of the inode to the correct one */
2562 static int reset_nlink(struct btrfs_trans_handle *trans,
2563                        struct btrfs_root *root,
2564                        struct btrfs_path *path,
2565                        struct inode_record *rec)
2566 {
2567         struct inode_backref *backref;
2568         struct inode_backref *tmp;
2569         struct btrfs_key key;
2570         struct btrfs_inode_item *inode_item;
2571         int ret = 0;
2572
2573         /* We don't believe this either, reset it and iterate backref */
2574         rec->found_link = 0;
2575
2576         /* Remove all backref including the valid ones */
2577         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2578                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2579                                    backref->index, backref->name,
2580                                    backref->namelen, 0);
2581                 if (ret < 0)
2582                         goto out;
2583
2584                 /* remove invalid backref, so it won't be added back */
2585                 if (!(backref->found_dir_index &&
2586                       backref->found_dir_item &&
2587                       backref->found_inode_ref)) {
2588                         list_del(&backref->list);
2589                         free(backref);
2590                 } else {
2591                         rec->found_link++;
2592                 }
2593         }
2594
2595         /* Set nlink to 0 */
2596         key.objectid = rec->ino;
2597         key.type = BTRFS_INODE_ITEM_KEY;
2598         key.offset = 0;
2599         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2600         if (ret < 0)
2601                 goto out;
2602         if (ret > 0) {
2603                 ret = -ENOENT;
2604                 goto out;
2605         }
2606         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2607                                     struct btrfs_inode_item);
2608         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2609         btrfs_mark_buffer_dirty(path->nodes[0]);
2610         btrfs_release_path(path);
2611
2612         /*
2613          * Add back valid inode_ref/dir_item/dir_index,
2614          * add_link() will handle the nlink inc, so new nlink must be correct
2615          */
2616         list_for_each_entry(backref, &rec->backrefs, list) {
2617                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2618                                      backref->name, backref->namelen,
2619                                      backref->filetype, &backref->index, 1);
2620                 if (ret < 0)
2621                         goto out;
2622         }
2623 out:
2624         btrfs_release_path(path);
2625         return ret;
2626 }
2627
2628 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2629                                struct btrfs_root *root,
2630                                struct btrfs_path *path,
2631                                struct inode_record *rec)
2632 {
2633         char *dir_name = "lost+found";
2634         char namebuf[BTRFS_NAME_LEN] = {0};
2635         u64 lost_found_ino;
2636         u32 mode = 0700;
2637         u8 type = 0;
2638         int namelen = 0;
2639         int name_recovered = 0;
2640         int type_recovered = 0;
2641         int ret = 0;
2642
2643         /*
2644          * Get file name and type first before these invalid inode ref
2645          * are deleted by remove_all_invalid_backref()
2646          */
2647         name_recovered = !find_file_name(rec, namebuf, &namelen);
2648         type_recovered = !find_file_type(rec, &type);
2649
2650         if (!name_recovered) {
2651                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2652                        rec->ino, rec->ino);
2653                 namelen = count_digits(rec->ino);
2654                 sprintf(namebuf, "%llu", rec->ino);
2655                 name_recovered = 1;
2656         }
2657         if (!type_recovered) {
2658                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2659                        rec->ino);
2660                 type = BTRFS_FT_REG_FILE;
2661                 type_recovered = 1;
2662         }
2663
2664         ret = reset_nlink(trans, root, path, rec);
2665         if (ret < 0) {
2666                 fprintf(stderr,
2667                         "Failed to reset nlink for inode %llu: %s\n",
2668                         rec->ino, strerror(-ret));
2669                 goto out;
2670         }
2671
2672         if (rec->found_link == 0) {
2673                 lost_found_ino = root->highest_inode;
2674                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2675                         ret = -EOVERFLOW;
2676                         goto out;
2677                 }
2678                 lost_found_ino++;
2679                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2680                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2681                                   mode);
2682                 if (ret < 0) {
2683                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2684                                 dir_name, strerror(-ret));
2685                         goto out;
2686                 }
2687                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2688                                      namebuf, namelen, type, NULL, 1);
2689                 /*
2690                  * Add ".INO" suffix several times to handle case where
2691                  * "FILENAME.INO" is already taken by another file.
2692                  */
2693                 while (ret == -EEXIST) {
2694                         /*
2695                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2696                          */
2697                         if (namelen + count_digits(rec->ino) + 1 >
2698                             BTRFS_NAME_LEN) {
2699                                 ret = -EFBIG;
2700                                 goto out;
2701                         }
2702                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2703                                  ".%llu", rec->ino);
2704                         namelen += count_digits(rec->ino) + 1;
2705                         ret = btrfs_add_link(trans, root, rec->ino,
2706                                              lost_found_ino, namebuf,
2707                                              namelen, type, NULL, 1);
2708                 }
2709                 if (ret < 0) {
2710                         fprintf(stderr,
2711                                 "Failed to link the inode %llu to %s dir: %s\n",
2712                                 rec->ino, dir_name, strerror(-ret));
2713                         goto out;
2714                 }
2715                 /*
2716                  * Just increase the found_link, don't actually add the
2717                  * backref. This will make things easier and this inode
2718                  * record will be freed after the repair is done.
2719                  * So fsck will not report problem about this inode.
2720                  */
2721                 rec->found_link++;
2722                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2723                        namelen, namebuf, dir_name);
2724         }
2725         printf("Fixed the nlink of inode %llu\n", rec->ino);
2726 out:
2727         /*
2728          * Clear the flag anyway, or we will loop forever for the same inode
2729          * as it will not be removed from the bad inode list and the dead loop
2730          * happens.
2731          */
2732         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2733         btrfs_release_path(path);
2734         return ret;
2735 }
2736
2737 /*
2738  * Check if there is any normal(reg or prealloc) file extent for given
2739  * ino.
2740  * This is used to determine the file type when neither its dir_index/item or
2741  * inode_item exists.
2742  *
2743  * This will *NOT* report error, if any error happens, just consider it does
2744  * not have any normal file extent.
2745  */
2746 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2747 {
2748         struct btrfs_path *path;
2749         struct btrfs_key key;
2750         struct btrfs_key found_key;
2751         struct btrfs_file_extent_item *fi;
2752         u8 type;
2753         int ret = 0;
2754
2755         path = btrfs_alloc_path();
2756         if (!path)
2757                 goto out;
2758         key.objectid = ino;
2759         key.type = BTRFS_EXTENT_DATA_KEY;
2760         key.offset = 0;
2761
2762         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2763         if (ret < 0) {
2764                 ret = 0;
2765                 goto out;
2766         }
2767         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2768                 ret = btrfs_next_leaf(root, path);
2769                 if (ret) {
2770                         ret = 0;
2771                         goto out;
2772                 }
2773         }
2774         while (1) {
2775                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2776                                       path->slots[0]);
2777                 if (found_key.objectid != ino ||
2778                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2779                         break;
2780                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2781                                     struct btrfs_file_extent_item);
2782                 type = btrfs_file_extent_type(path->nodes[0], fi);
2783                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2784                         ret = 1;
2785                         goto out;
2786                 }
2787         }
2788 out:
2789         btrfs_free_path(path);
2790         return ret;
2791 }
2792
2793 static u32 btrfs_type_to_imode(u8 type)
2794 {
2795         static u32 imode_by_btrfs_type[] = {
2796                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2797                 [BTRFS_FT_DIR]          = S_IFDIR,
2798                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2799                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2800                 [BTRFS_FT_FIFO]         = S_IFIFO,
2801                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2802                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2803         };
2804
2805         return imode_by_btrfs_type[(type)];
2806 }
2807
2808 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2809                                 struct btrfs_root *root,
2810                                 struct btrfs_path *path,
2811                                 struct inode_record *rec)
2812 {
2813         u8 filetype;
2814         u32 mode = 0700;
2815         int type_recovered = 0;
2816         int ret = 0;
2817
2818         printf("Trying to rebuild inode:%llu\n", rec->ino);
2819
2820         type_recovered = !find_file_type(rec, &filetype);
2821
2822         /*
2823          * Try to determine inode type if type not found.
2824          *
2825          * For found regular file extent, it must be FILE.
2826          * For found dir_item/index, it must be DIR.
2827          *
2828          * For undetermined one, use FILE as fallback.
2829          *
2830          * TODO:
2831          * 1. If found backref(inode_index/item is already handled) to it,
2832          *    it must be DIR.
2833          *    Need new inode-inode ref structure to allow search for that.
2834          */
2835         if (!type_recovered) {
2836                 if (rec->found_file_extent &&
2837                     find_normal_file_extent(root, rec->ino)) {
2838                         type_recovered = 1;
2839                         filetype = BTRFS_FT_REG_FILE;
2840                 } else if (rec->found_dir_item) {
2841                         type_recovered = 1;
2842                         filetype = BTRFS_FT_DIR;
2843                 } else if (!list_empty(&rec->orphan_extents)) {
2844                         type_recovered = 1;
2845                         filetype = BTRFS_FT_REG_FILE;
2846                 } else{
2847                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2848                                rec->ino);
2849                         type_recovered = 1;
2850                         filetype = BTRFS_FT_REG_FILE;
2851                 }
2852         }
2853
2854         ret = btrfs_new_inode(trans, root, rec->ino,
2855                               mode | btrfs_type_to_imode(filetype));
2856         if (ret < 0)
2857                 goto out;
2858
2859         /*
2860          * Here inode rebuild is done, we only rebuild the inode item,
2861          * don't repair the nlink(like move to lost+found).
2862          * That is the job of nlink repair.
2863          *
2864          * We just fill the record and return
2865          */
2866         rec->found_dir_item = 1;
2867         rec->imode = mode | btrfs_type_to_imode(filetype);
2868         rec->nlink = 0;
2869         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2870         /* Ensure the inode_nlinks repair function will be called */
2871         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2872 out:
2873         return ret;
2874 }
2875
2876 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2877                                       struct btrfs_root *root,
2878                                       struct btrfs_path *path,
2879                                       struct inode_record *rec)
2880 {
2881         struct orphan_data_extent *orphan;
2882         struct orphan_data_extent *tmp;
2883         int ret = 0;
2884
2885         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2886                 /*
2887                  * Check for conflicting file extents
2888                  *
2889                  * Here we don't know whether the extents is compressed or not,
2890                  * so we can only assume it not compressed nor data offset,
2891                  * and use its disk_len as extent length.
2892                  */
2893                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2894                                        orphan->offset, orphan->disk_len, 0);
2895                 btrfs_release_path(path);
2896                 if (ret < 0)
2897                         goto out;
2898                 if (!ret) {
2899                         fprintf(stderr,
2900                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2901                                 orphan->disk_bytenr, orphan->disk_len);
2902                         ret = btrfs_free_extent(trans,
2903                                         root->fs_info->extent_root,
2904                                         orphan->disk_bytenr, orphan->disk_len,
2905                                         0, root->objectid, orphan->objectid,
2906                                         orphan->offset);
2907                         if (ret < 0)
2908                                 goto out;
2909                 }
2910                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2911                                 orphan->offset, orphan->disk_bytenr,
2912                                 orphan->disk_len, orphan->disk_len);
2913                 if (ret < 0)
2914                         goto out;
2915
2916                 /* Update file size info */
2917                 rec->found_size += orphan->disk_len;
2918                 if (rec->found_size == rec->nbytes)
2919                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2920
2921                 /* Update the file extent hole info too */
2922                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2923                                            orphan->disk_len);
2924                 if (ret < 0)
2925                         goto out;
2926                 if (RB_EMPTY_ROOT(&rec->holes))
2927                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2928
2929                 list_del(&orphan->list);
2930                 free(orphan);
2931         }
2932         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2933 out:
2934         return ret;
2935 }
2936
2937 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2938                                         struct btrfs_root *root,
2939                                         struct btrfs_path *path,
2940                                         struct inode_record *rec)
2941 {
2942         struct rb_node *node;
2943         struct file_extent_hole *hole;
2944         int found = 0;
2945         int ret = 0;
2946
2947         node = rb_first(&rec->holes);
2948
2949         while (node) {
2950                 found = 1;
2951                 hole = rb_entry(node, struct file_extent_hole, node);
2952                 ret = btrfs_punch_hole(trans, root, rec->ino,
2953                                        hole->start, hole->len);
2954                 if (ret < 0)
2955                         goto out;
2956                 ret = del_file_extent_hole(&rec->holes, hole->start,
2957                                            hole->len);
2958                 if (ret < 0)
2959                         goto out;
2960                 if (RB_EMPTY_ROOT(&rec->holes))
2961                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2962                 node = rb_first(&rec->holes);
2963         }
2964         /* special case for a file losing all its file extent */
2965         if (!found) {
2966                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2967                                        round_up(rec->isize, root->sectorsize));
2968                 if (ret < 0)
2969                         goto out;
2970         }
2971         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2972                rec->ino, root->objectid);
2973 out:
2974         return ret;
2975 }
2976
2977 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2978 {
2979         struct btrfs_trans_handle *trans;
2980         struct btrfs_path *path;
2981         int ret = 0;
2982
2983         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2984                              I_ERR_NO_ORPHAN_ITEM |
2985                              I_ERR_LINK_COUNT_WRONG |
2986                              I_ERR_NO_INODE_ITEM |
2987                              I_ERR_FILE_EXTENT_ORPHAN |
2988                              I_ERR_FILE_EXTENT_DISCOUNT|
2989                              I_ERR_FILE_NBYTES_WRONG)))
2990                 return rec->errors;
2991
2992         path = btrfs_alloc_path();
2993         if (!path)
2994                 return -ENOMEM;
2995
2996         /*
2997          * For nlink repair, it may create a dir and add link, so
2998          * 2 for parent(256)'s dir_index and dir_item
2999          * 2 for lost+found dir's inode_item and inode_ref
3000          * 1 for the new inode_ref of the file
3001          * 2 for lost+found dir's dir_index and dir_item for the file
3002          */
3003         trans = btrfs_start_transaction(root, 7);
3004         if (IS_ERR(trans)) {
3005                 btrfs_free_path(path);
3006                 return PTR_ERR(trans);
3007         }
3008
3009         if (rec->errors & I_ERR_NO_INODE_ITEM)
3010                 ret = repair_inode_no_item(trans, root, path, rec);
3011         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3012                 ret = repair_inode_orphan_extent(trans, root, path, rec);
3013         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3014                 ret = repair_inode_discount_extent(trans, root, path, rec);
3015         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3016                 ret = repair_inode_isize(trans, root, path, rec);
3017         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3018                 ret = repair_inode_orphan_item(trans, root, path, rec);
3019         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3020                 ret = repair_inode_nlinks(trans, root, path, rec);
3021         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3022                 ret = repair_inode_nbytes(trans, root, path, rec);
3023         btrfs_commit_transaction(trans, root);
3024         btrfs_free_path(path);
3025         return ret;
3026 }
3027
3028 static int check_inode_recs(struct btrfs_root *root,
3029                             struct cache_tree *inode_cache)
3030 {
3031         struct cache_extent *cache;
3032         struct ptr_node *node;
3033         struct inode_record *rec;
3034         struct inode_backref *backref;
3035         int stage = 0;
3036         int ret = 0;
3037         int err = 0;
3038         u64 error = 0;
3039         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3040
3041         if (btrfs_root_refs(&root->root_item) == 0) {
3042                 if (!cache_tree_empty(inode_cache))
3043                         fprintf(stderr, "warning line %d\n", __LINE__);
3044                 return 0;
3045         }
3046
3047         /*
3048          * We need to record the highest inode number for later 'lost+found'
3049          * dir creation.
3050          * We must select an ino not used/referred by any existing inode, or
3051          * 'lost+found' ino may be a missing ino in a corrupted leaf,
3052          * this may cause 'lost+found' dir has wrong nlinks.
3053          */
3054         cache = last_cache_extent(inode_cache);
3055         if (cache) {
3056                 node = container_of(cache, struct ptr_node, cache);
3057                 rec = node->data;
3058                 if (rec->ino > root->highest_inode)
3059                         root->highest_inode = rec->ino;
3060         }
3061
3062         /*
3063          * We need to repair backrefs first because we could change some of the
3064          * errors in the inode recs.
3065          *
3066          * We also need to go through and delete invalid backrefs first and then
3067          * add the correct ones second.  We do this because we may get EEXIST
3068          * when adding back the correct index because we hadn't yet deleted the
3069          * invalid index.
3070          *
3071          * For example, if we were missing a dir index then the directories
3072          * isize would be wrong, so if we fixed the isize to what we thought it
3073          * would be and then fixed the backref we'd still have a invalid fs, so
3074          * we need to add back the dir index and then check to see if the isize
3075          * is still wrong.
3076          */
3077         while (stage < 3) {
3078                 stage++;
3079                 if (stage == 3 && !err)
3080                         break;
3081
3082                 cache = search_cache_extent(inode_cache, 0);
3083                 while (repair && cache) {
3084                         node = container_of(cache, struct ptr_node, cache);
3085                         rec = node->data;
3086                         cache = next_cache_extent(cache);
3087
3088                         /* Need to free everything up and rescan */
3089                         if (stage == 3) {
3090                                 remove_cache_extent(inode_cache, &node->cache);
3091                                 free(node);
3092                                 free_inode_rec(rec);
3093                                 continue;
3094                         }
3095
3096                         if (list_empty(&rec->backrefs))
3097                                 continue;
3098
3099                         ret = repair_inode_backrefs(root, rec, inode_cache,
3100                                                     stage == 1);
3101                         if (ret < 0) {
3102                                 err = ret;
3103                                 stage = 2;
3104                                 break;
3105                         } if (ret > 0) {
3106                                 err = -EAGAIN;
3107                         }
3108                 }
3109         }
3110         if (err)
3111                 return err;
3112
3113         rec = get_inode_rec(inode_cache, root_dirid, 0);
3114         BUG_ON(IS_ERR(rec));
3115         if (rec) {
3116                 ret = check_root_dir(rec);
3117                 if (ret) {
3118                         fprintf(stderr, "root %llu root dir %llu error\n",
3119                                 (unsigned long long)root->root_key.objectid,
3120                                 (unsigned long long)root_dirid);
3121                         print_inode_error(root, rec);
3122                         error++;
3123                 }
3124         } else {
3125                 if (repair) {
3126                         struct btrfs_trans_handle *trans;
3127
3128                         trans = btrfs_start_transaction(root, 1);
3129                         if (IS_ERR(trans)) {
3130                                 err = PTR_ERR(trans);
3131                                 return err;
3132                         }
3133
3134                         fprintf(stderr,
3135                                 "root %llu missing its root dir, recreating\n",
3136                                 (unsigned long long)root->objectid);
3137
3138                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3139                         BUG_ON(ret);
3140
3141                         btrfs_commit_transaction(trans, root);
3142                         return -EAGAIN;
3143                 }
3144
3145                 fprintf(stderr, "root %llu root dir %llu not found\n",
3146                         (unsigned long long)root->root_key.objectid,
3147                         (unsigned long long)root_dirid);
3148         }
3149
3150         while (1) {
3151                 cache = search_cache_extent(inode_cache, 0);
3152                 if (!cache)
3153                         break;
3154                 node = container_of(cache, struct ptr_node, cache);
3155                 rec = node->data;
3156                 remove_cache_extent(inode_cache, &node->cache);
3157                 free(node);
3158                 if (rec->ino == root_dirid ||
3159                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3160                         free_inode_rec(rec);
3161                         continue;
3162                 }
3163
3164                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3165                         ret = check_orphan_item(root, rec->ino);
3166                         if (ret == 0)
3167                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3168                         if (can_free_inode_rec(rec)) {
3169                                 free_inode_rec(rec);
3170                                 continue;
3171                         }
3172                 }
3173
3174                 if (!rec->found_inode_item)
3175                         rec->errors |= I_ERR_NO_INODE_ITEM;
3176                 if (rec->found_link != rec->nlink)
3177                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3178                 if (repair) {
3179                         ret = try_repair_inode(root, rec);
3180                         if (ret == 0 && can_free_inode_rec(rec)) {
3181                                 free_inode_rec(rec);
3182                                 continue;
3183                         }
3184                         ret = 0;
3185                 }
3186
3187                 if (!(repair && ret == 0))
3188                         error++;
3189                 print_inode_error(root, rec);
3190                 list_for_each_entry(backref, &rec->backrefs, list) {
3191                         if (!backref->found_dir_item)
3192                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3193                         if (!backref->found_dir_index)
3194                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3195                         if (!backref->found_inode_ref)
3196                                 backref->errors |= REF_ERR_NO_INODE_REF;
3197                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3198                                 " namelen %u name %s filetype %d errors %x",
3199                                 (unsigned long long)backref->dir,
3200                                 (unsigned long long)backref->index,
3201                                 backref->namelen, backref->name,
3202                                 backref->filetype, backref->errors);
3203                         print_ref_error(backref->errors);
3204                 }
3205                 free_inode_rec(rec);
3206         }
3207         return (error > 0) ? -1 : 0;
3208 }
3209
3210 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3211                                         u64 objectid)
3212 {
3213         struct cache_extent *cache;
3214         struct root_record *rec = NULL;
3215         int ret;
3216
3217         cache = lookup_cache_extent(root_cache, objectid, 1);
3218         if (cache) {
3219                 rec = container_of(cache, struct root_record, cache);
3220         } else {
3221                 rec = calloc(1, sizeof(*rec));
3222                 if (!rec)
3223                         return ERR_PTR(-ENOMEM);
3224                 rec->objectid = objectid;
3225                 INIT_LIST_HEAD(&rec->backrefs);
3226                 rec->cache.start = objectid;
3227                 rec->cache.size = 1;
3228
3229                 ret = insert_cache_extent(root_cache, &rec->cache);
3230                 if (ret)
3231                         return ERR_PTR(-EEXIST);
3232         }
3233         return rec;
3234 }
3235
3236 static struct root_backref *get_root_backref(struct root_record *rec,
3237                                              u64 ref_root, u64 dir, u64 index,
3238                                              const char *name, int namelen)
3239 {
3240         struct root_backref *backref;
3241
3242         list_for_each_entry(backref, &rec->backrefs, list) {
3243                 if (backref->ref_root != ref_root || backref->dir != dir ||
3244                     backref->namelen != namelen)
3245                         continue;
3246                 if (memcmp(name, backref->name, namelen))
3247                         continue;
3248                 return backref;
3249         }
3250
3251         backref = calloc(1, sizeof(*backref) + namelen + 1);
3252         if (!backref)
3253                 return NULL;
3254         backref->ref_root = ref_root;
3255         backref->dir = dir;
3256         backref->index = index;
3257         backref->namelen = namelen;
3258         memcpy(backref->name, name, namelen);
3259         backref->name[namelen] = '\0';
3260         list_add_tail(&backref->list, &rec->backrefs);
3261         return backref;
3262 }
3263
3264 static void free_root_record(struct cache_extent *cache)
3265 {
3266         struct root_record *rec;
3267         struct root_backref *backref;
3268
3269         rec = container_of(cache, struct root_record, cache);
3270         while (!list_empty(&rec->backrefs)) {
3271                 backref = to_root_backref(rec->backrefs.next);
3272                 list_del(&backref->list);
3273                 free(backref);
3274         }
3275
3276         kfree(rec);
3277 }
3278
3279 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3280
3281 static int add_root_backref(struct cache_tree *root_cache,
3282                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3283                             const char *name, int namelen,
3284                             int item_type, int errors)
3285 {
3286         struct root_record *rec;
3287         struct root_backref *backref;
3288
3289         rec = get_root_rec(root_cache, root_id);
3290         BUG_ON(IS_ERR(rec));
3291         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3292         BUG_ON(!backref);
3293
3294         backref->errors |= errors;
3295
3296         if (item_type != BTRFS_DIR_ITEM_KEY) {
3297                 if (backref->found_dir_index || backref->found_back_ref ||
3298                     backref->found_forward_ref) {
3299                         if (backref->index != index)
3300                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3301                 } else {
3302                         backref->index = index;
3303                 }
3304         }
3305
3306         if (item_type == BTRFS_DIR_ITEM_KEY) {
3307                 if (backref->found_forward_ref)
3308                         rec->found_ref++;
3309                 backref->found_dir_item = 1;
3310         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3311                 backref->found_dir_index = 1;
3312         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3313                 if (backref->found_forward_ref)
3314                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3315                 else if (backref->found_dir_item)
3316                         rec->found_ref++;
3317                 backref->found_forward_ref = 1;
3318         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3319                 if (backref->found_back_ref)
3320                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3321                 backref->found_back_ref = 1;
3322         } else {
3323                 BUG_ON(1);
3324         }
3325
3326         if (backref->found_forward_ref && backref->found_dir_item)
3327                 backref->reachable = 1;
3328         return 0;
3329 }
3330
3331 static int merge_root_recs(struct btrfs_root *root,
3332                            struct cache_tree *src_cache,
3333                            struct cache_tree *dst_cache)
3334 {
3335         struct cache_extent *cache;
3336         struct ptr_node *node;
3337         struct inode_record *rec;
3338         struct inode_backref *backref;
3339         int ret = 0;
3340
3341         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3342                 free_inode_recs_tree(src_cache);
3343                 return 0;
3344         }
3345
3346         while (1) {
3347                 cache = search_cache_extent(src_cache, 0);
3348                 if (!cache)
3349                         break;
3350                 node = container_of(cache, struct ptr_node, cache);
3351                 rec = node->data;
3352                 remove_cache_extent(src_cache, &node->cache);
3353                 free(node);
3354
3355                 ret = is_child_root(root, root->objectid, rec->ino);
3356                 if (ret < 0)
3357                         break;
3358                 else if (ret == 0)
3359                         goto skip;
3360
3361                 list_for_each_entry(backref, &rec->backrefs, list) {
3362                         BUG_ON(backref->found_inode_ref);
3363                         if (backref->found_dir_item)
3364                                 add_root_backref(dst_cache, rec->ino,
3365                                         root->root_key.objectid, backref->dir,
3366                                         backref->index, backref->name,
3367                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3368                                         backref->errors);
3369                         if (backref->found_dir_index)
3370                                 add_root_backref(dst_cache, rec->ino,
3371                                         root->root_key.objectid, backref->dir,
3372                                         backref->index, backref->name,
3373                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3374                                         backref->errors);
3375                 }
3376 skip:
3377                 free_inode_rec(rec);
3378         }
3379         if (ret < 0)
3380                 return ret;
3381         return 0;
3382 }
3383
3384 static int check_root_refs(struct btrfs_root *root,
3385                            struct cache_tree *root_cache)
3386 {
3387         struct root_record *rec;
3388         struct root_record *ref_root;
3389         struct root_backref *backref;
3390         struct cache_extent *cache;
3391         int loop = 1;
3392         int ret;
3393         int error;
3394         int errors = 0;
3395
3396         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3397         BUG_ON(IS_ERR(rec));
3398         rec->found_ref = 1;
3399
3400         /* fixme: this can not detect circular references */
3401         while (loop) {
3402                 loop = 0;
3403                 cache = search_cache_extent(root_cache, 0);
3404                 while (1) {
3405                         if (!cache)
3406                                 break;
3407                         rec = container_of(cache, struct root_record, cache);
3408                         cache = next_cache_extent(cache);
3409
3410                         if (rec->found_ref == 0)
3411                                 continue;
3412
3413                         list_for_each_entry(backref, &rec->backrefs, list) {
3414                                 if (!backref->reachable)
3415                                         continue;
3416
3417                                 ref_root = get_root_rec(root_cache,
3418                                                         backref->ref_root);
3419                                 BUG_ON(IS_ERR(ref_root));
3420                                 if (ref_root->found_ref > 0)
3421                                         continue;
3422
3423                                 backref->reachable = 0;
3424                                 rec->found_ref--;
3425                                 if (rec->found_ref == 0)
3426                                         loop = 1;
3427                         }
3428                 }
3429         }
3430
3431         cache = search_cache_extent(root_cache, 0);
3432         while (1) {
3433                 if (!cache)
3434                         break;
3435                 rec = container_of(cache, struct root_record, cache);
3436                 cache = next_cache_extent(cache);
3437
3438                 if (rec->found_ref == 0 &&
3439                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3440                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3441                         ret = check_orphan_item(root->fs_info->tree_root,
3442                                                 rec->objectid);
3443                         if (ret == 0)
3444                                 continue;
3445
3446                         /*
3447                          * If we don't have a root item then we likely just have
3448                          * a dir item in a snapshot for this root but no actual
3449                          * ref key or anything so it's meaningless.
3450                          */
3451                         if (!rec->found_root_item)
3452                                 continue;
3453                         errors++;
3454                         fprintf(stderr, "fs tree %llu not referenced\n",
3455                                 (unsigned long long)rec->objectid);
3456                 }
3457
3458                 error = 0;
3459                 if (rec->found_ref > 0 && !rec->found_root_item)
3460                         error = 1;
3461                 list_for_each_entry(backref, &rec->backrefs, list) {
3462                         if (!backref->found_dir_item)
3463                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3464                         if (!backref->found_dir_index)
3465                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3466                         if (!backref->found_back_ref)
3467                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3468                         if (!backref->found_forward_ref)
3469                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3470                         if (backref->reachable && backref->errors)
3471                                 error = 1;
3472                 }
3473                 if (!error)
3474                         continue;
3475
3476                 errors++;
3477                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3478                         (unsigned long long)rec->objectid, rec->found_ref,
3479                          rec->found_root_item ? "" : "not found");
3480
3481                 list_for_each_entry(backref, &rec->backrefs, list) {
3482                         if (!backref->reachable)
3483                                 continue;
3484                         if (!backref->errors && rec->found_root_item)
3485                                 continue;
3486                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3487                                 " index %llu namelen %u name %s errors %x\n",
3488                                 (unsigned long long)backref->ref_root,
3489                                 (unsigned long long)backref->dir,
3490                                 (unsigned long long)backref->index,
3491                                 backref->namelen, backref->name,
3492                                 backref->errors);
3493                         print_ref_error(backref->errors);
3494                 }
3495         }
3496         return errors > 0 ? 1 : 0;
3497 }
3498
3499 static int process_root_ref(struct extent_buffer *eb, int slot,
3500                             struct btrfs_key *key,
3501                             struct cache_tree *root_cache)
3502 {
3503         u64 dirid;
3504         u64 index;
3505         u32 len;
3506         u32 name_len;
3507         struct btrfs_root_ref *ref;
3508         char namebuf[BTRFS_NAME_LEN];
3509         int error;
3510
3511         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3512
3513         dirid = btrfs_root_ref_dirid(eb, ref);
3514         index = btrfs_root_ref_sequence(eb, ref);
3515         name_len = btrfs_root_ref_name_len(eb, ref);
3516
3517         if (name_len <= BTRFS_NAME_LEN) {
3518                 len = name_len;
3519                 error = 0;
3520         } else {
3521                 len = BTRFS_NAME_LEN;
3522                 error = REF_ERR_NAME_TOO_LONG;
3523         }
3524         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3525
3526         if (key->type == BTRFS_ROOT_REF_KEY) {
3527                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3528                                  index, namebuf, len, key->type, error);
3529         } else {
3530                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3531                                  index, namebuf, len, key->type, error);
3532         }
3533         return 0;
3534 }
3535
3536 static void free_corrupt_block(struct cache_extent *cache)
3537 {
3538         struct btrfs_corrupt_block *corrupt;
3539
3540         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3541         free(corrupt);
3542 }
3543
3544 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3545
3546 /*
3547  * Repair the btree of the given root.
3548  *
3549  * The fix is to remove the node key in corrupt_blocks cache_tree.
3550  * and rebalance the tree.
3551  * After the fix, the btree should be writeable.
3552  */
3553 static int repair_btree(struct btrfs_root *root,
3554                         struct cache_tree *corrupt_blocks)
3555 {
3556         struct btrfs_trans_handle *trans;
3557         struct btrfs_path *path;
3558         struct btrfs_corrupt_block *corrupt;
3559         struct cache_extent *cache;
3560         struct btrfs_key key;
3561         u64 offset;
3562         int level;
3563         int ret = 0;
3564
3565         if (cache_tree_empty(corrupt_blocks))
3566                 return 0;
3567
3568         path = btrfs_alloc_path();
3569         if (!path)
3570                 return -ENOMEM;
3571
3572         trans = btrfs_start_transaction(root, 1);
3573         if (IS_ERR(trans)) {
3574                 ret = PTR_ERR(trans);
3575                 fprintf(stderr, "Error starting transaction: %s\n",
3576                         strerror(-ret));
3577                 goto out_free_path;
3578         }
3579         cache = first_cache_extent(corrupt_blocks);
3580         while (cache) {
3581                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3582                                        cache);
3583                 level = corrupt->level;
3584                 path->lowest_level = level;
3585                 key.objectid = corrupt->key.objectid;
3586                 key.type = corrupt->key.type;
3587                 key.offset = corrupt->key.offset;
3588
3589                 /*
3590                  * Here we don't want to do any tree balance, since it may
3591                  * cause a balance with corrupted brother leaf/node,
3592                  * so ins_len set to 0 here.
3593                  * Balance will be done after all corrupt node/leaf is deleted.
3594                  */
3595                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3596                 if (ret < 0)
3597                         goto out;
3598                 offset = btrfs_node_blockptr(path->nodes[level],
3599                                              path->slots[level]);
3600
3601                 /* Remove the ptr */
3602                 ret = btrfs_del_ptr(trans, root, path, level,
3603                                     path->slots[level]);
3604                 if (ret < 0)
3605                         goto out;
3606                 /*
3607                  * Remove the corresponding extent
3608                  * return value is not concerned.
3609                  */
3610                 btrfs_release_path(path);
3611                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3612                                         0, root->root_key.objectid,
3613                                         level - 1, 0);
3614                 cache = next_cache_extent(cache);
3615         }
3616
3617         /* Balance the btree using btrfs_search_slot() */
3618         cache = first_cache_extent(corrupt_blocks);
3619         while (cache) {
3620                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3621                                        cache);
3622                 memcpy(&key, &corrupt->key, sizeof(key));
3623                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3624                 if (ret < 0)
3625                         goto out;
3626                 /* return will always >0 since it won't find the item */
3627                 ret = 0;
3628                 btrfs_release_path(path);
3629                 cache = next_cache_extent(cache);
3630         }
3631 out:
3632         btrfs_commit_transaction(trans, root);
3633 out_free_path:
3634         btrfs_free_path(path);
3635         return ret;
3636 }
3637
3638 static int check_fs_root(struct btrfs_root *root,
3639                          struct cache_tree *root_cache,
3640                          struct walk_control *wc)
3641 {
3642         int ret = 0;
3643         int err = 0;
3644         int wret;
3645         int level;
3646         struct btrfs_path path;
3647         struct shared_node root_node;
3648         struct root_record *rec;
3649         struct btrfs_root_item *root_item = &root->root_item;
3650         struct cache_tree corrupt_blocks;
3651         struct orphan_data_extent *orphan;
3652         struct orphan_data_extent *tmp;
3653         enum btrfs_tree_block_status status;
3654         struct node_refs nrefs;
3655
3656         /*
3657          * Reuse the corrupt_block cache tree to record corrupted tree block
3658          *
3659          * Unlike the usage in extent tree check, here we do it in a per
3660          * fs/subvol tree base.
3661          */
3662         cache_tree_init(&corrupt_blocks);
3663         root->fs_info->corrupt_blocks = &corrupt_blocks;
3664
3665         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3666                 rec = get_root_rec(root_cache, root->root_key.objectid);
3667                 BUG_ON(IS_ERR(rec));
3668                 if (btrfs_root_refs(root_item) > 0)
3669                         rec->found_root_item = 1;
3670         }
3671
3672         btrfs_init_path(&path);
3673         memset(&root_node, 0, sizeof(root_node));
3674         cache_tree_init(&root_node.root_cache);
3675         cache_tree_init(&root_node.inode_cache);
3676         memset(&nrefs, 0, sizeof(nrefs));
3677
3678         /* Move the orphan extent record to corresponding inode_record */
3679         list_for_each_entry_safe(orphan, tmp,
3680                                  &root->orphan_data_extents, list) {
3681                 struct inode_record *inode;
3682
3683                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3684                                       1);
3685                 BUG_ON(IS_ERR(inode));
3686                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3687                 list_move(&orphan->list, &inode->orphan_extents);
3688         }
3689
3690         level = btrfs_header_level(root->node);
3691         memset(wc->nodes, 0, sizeof(wc->nodes));
3692         wc->nodes[level] = &root_node;
3693         wc->active_node = level;
3694         wc->root_level = level;
3695
3696         /* We may not have checked the root block, lets do that now */
3697         if (btrfs_is_leaf(root->node))
3698                 status = btrfs_check_leaf(root, NULL, root->node);
3699         else
3700                 status = btrfs_check_node(root, NULL, root->node);
3701         if (status != BTRFS_TREE_BLOCK_CLEAN)
3702                 return -EIO;
3703
3704         if (btrfs_root_refs(root_item) > 0 ||
3705             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3706                 path.nodes[level] = root->node;
3707                 extent_buffer_get(root->node);
3708                 path.slots[level] = 0;
3709         } else {
3710                 struct btrfs_key key;
3711                 struct btrfs_disk_key found_key;
3712
3713                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3714                 level = root_item->drop_level;
3715                 path.lowest_level = level;
3716                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3717                 if (wret < 0)
3718                         goto skip_walking;
3719                 btrfs_node_key(path.nodes[level], &found_key,
3720                                 path.slots[level]);
3721                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3722                                         sizeof(found_key)));
3723         }
3724
3725         while (1) {
3726                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3727                 if (wret < 0)
3728                         ret = wret;
3729                 if (wret != 0)
3730                         break;
3731
3732                 wret = walk_up_tree(root, &path, wc, &level);
3733                 if (wret < 0)
3734                         ret = wret;
3735                 if (wret != 0)
3736                         break;
3737         }
3738 skip_walking:
3739         btrfs_release_path(&path);
3740
3741         if (!cache_tree_empty(&corrupt_blocks)) {
3742                 struct cache_extent *cache;
3743                 struct btrfs_corrupt_block *corrupt;
3744
3745                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3746                        root->root_key.objectid);
3747                 cache = first_cache_extent(&corrupt_blocks);
3748                 while (cache) {
3749                         corrupt = container_of(cache,
3750                                                struct btrfs_corrupt_block,
3751                                                cache);
3752                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3753                                cache->start, corrupt->level,
3754                                corrupt->key.objectid, corrupt->key.type,
3755                                corrupt->key.offset);
3756                         cache = next_cache_extent(cache);
3757                 }
3758                 if (repair) {
3759                         printf("Try to repair the btree for root %llu\n",
3760                                root->root_key.objectid);
3761                         ret = repair_btree(root, &corrupt_blocks);
3762                         if (ret < 0)
3763                                 fprintf(stderr, "Failed to repair btree: %s\n",
3764                                         strerror(-ret));
3765                         if (!ret)
3766                                 printf("Btree for root %llu is fixed\n",
3767                                        root->root_key.objectid);
3768                 }
3769         }
3770
3771         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3772         if (err < 0)
3773                 ret = err;
3774
3775         if (root_node.current) {
3776                 root_node.current->checked = 1;
3777                 maybe_free_inode_rec(&root_node.inode_cache,
3778                                 root_node.current);
3779         }
3780
3781         err = check_inode_recs(root, &root_node.inode_cache);
3782         if (!ret)
3783                 ret = err;
3784
3785         free_corrupt_blocks_tree(&corrupt_blocks);
3786         root->fs_info->corrupt_blocks = NULL;
3787         free_orphan_data_extents(&root->orphan_data_extents);
3788         return ret;
3789 }
3790
3791 static int fs_root_objectid(u64 objectid)
3792 {
3793         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3794             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3795                 return 1;
3796         return is_fstree(objectid);
3797 }
3798
3799 static int check_fs_roots(struct btrfs_root *root,
3800                           struct cache_tree *root_cache)
3801 {
3802         struct btrfs_path path;
3803         struct btrfs_key key;
3804         struct walk_control wc;
3805         struct extent_buffer *leaf, *tree_node;
3806         struct btrfs_root *tmp_root;
3807         struct btrfs_root *tree_root = root->fs_info->tree_root;
3808         int ret;
3809         int err = 0;
3810
3811         if (ctx.progress_enabled) {
3812                 ctx.tp = TASK_FS_ROOTS;
3813                 task_start(ctx.info);
3814         }
3815
3816         /*
3817          * Just in case we made any changes to the extent tree that weren't
3818          * reflected into the free space cache yet.
3819          */
3820         if (repair)
3821                 reset_cached_block_groups(root->fs_info);
3822         memset(&wc, 0, sizeof(wc));
3823         cache_tree_init(&wc.shared);
3824         btrfs_init_path(&path);
3825
3826 again:
3827         key.offset = 0;
3828         key.objectid = 0;
3829         key.type = BTRFS_ROOT_ITEM_KEY;
3830         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3831         if (ret < 0) {
3832                 err = 1;
3833                 goto out;
3834         }
3835         tree_node = tree_root->node;
3836         while (1) {
3837                 if (tree_node != tree_root->node) {
3838                         free_root_recs_tree(root_cache);
3839                         btrfs_release_path(&path);
3840                         goto again;
3841                 }
3842                 leaf = path.nodes[0];
3843                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3844                         ret = btrfs_next_leaf(tree_root, &path);
3845                         if (ret) {
3846                                 if (ret < 0)
3847                                         err = 1;
3848                                 break;
3849                         }
3850                         leaf = path.nodes[0];
3851                 }
3852                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3853                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3854                     fs_root_objectid(key.objectid)) {
3855                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3856                                 tmp_root = btrfs_read_fs_root_no_cache(
3857                                                 root->fs_info, &key);
3858                         } else {
3859                                 key.offset = (u64)-1;
3860                                 tmp_root = btrfs_read_fs_root(
3861                                                 root->fs_info, &key);
3862                         }
3863                         if (IS_ERR(tmp_root)) {
3864                                 err = 1;
3865                                 goto next;
3866                         }
3867                         ret = check_fs_root(tmp_root, root_cache, &wc);
3868                         if (ret == -EAGAIN) {
3869                                 free_root_recs_tree(root_cache);
3870                                 btrfs_release_path(&path);
3871                                 goto again;
3872                         }
3873                         if (ret)
3874                                 err = 1;
3875                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3876                                 btrfs_free_fs_root(tmp_root);
3877                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3878                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3879                         process_root_ref(leaf, path.slots[0], &key,
3880                                          root_cache);
3881                 }
3882 next:
3883                 path.slots[0]++;
3884         }
3885 out:
3886         btrfs_release_path(&path);
3887         if (err)
3888                 free_extent_cache_tree(&wc.shared);
3889         if (!cache_tree_empty(&wc.shared))
3890                 fprintf(stderr, "warning line %d\n", __LINE__);
3891
3892         task_stop(ctx.info);
3893
3894         return err;
3895 }
3896
3897 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3898 {
3899         struct rb_node *n;
3900         struct extent_backref *back;
3901         struct tree_backref *tback;
3902         struct data_backref *dback;
3903         u64 found = 0;
3904         int err = 0;
3905
3906         for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
3907                 back = rb_node_to_extent_backref(n);
3908                 if (!back->found_extent_tree) {
3909                         err = 1;
3910                         if (!print_errs)
3911                                 goto out;
3912                         if (back->is_data) {
3913                                 dback = to_data_backref(back);
3914                                 fprintf(stderr, "Backref %llu %s %llu"
3915                                         " owner %llu offset %llu num_refs %lu"
3916                                         " not found in extent tree\n",
3917                                         (unsigned long long)rec->start,
3918                                         back->full_backref ?
3919                                         "parent" : "root",
3920                                         back->full_backref ?
3921                                         (unsigned long long)dback->parent:
3922                                         (unsigned long long)dback->root,
3923                                         (unsigned long long)dback->owner,
3924                                         (unsigned long long)dback->offset,
3925                                         (unsigned long)dback->num_refs);
3926                         } else {
3927                                 tback = to_tree_backref(back);
3928                                 fprintf(stderr, "Backref %llu parent %llu"
3929                                         " root %llu not found in extent tree\n",
3930                                         (unsigned long long)rec->start,
3931                                         (unsigned long long)tback->parent,
3932                                         (unsigned long long)tback->root);
3933                         }
3934                 }
3935                 if (!back->is_data && !back->found_ref) {
3936                         err = 1;
3937                         if (!print_errs)
3938                                 goto out;
3939                         tback = to_tree_backref(back);
3940                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3941                                 (unsigned long long)rec->start,
3942                                 back->full_backref ? "parent" : "root",
3943                                 back->full_backref ?
3944                                 (unsigned long long)tback->parent :
3945                                 (unsigned long long)tback->root, back);
3946                 }
3947                 if (back->is_data) {
3948                         dback = to_data_backref(back);
3949                         if (dback->found_ref != dback->num_refs) {
3950                                 err = 1;
3951                                 if (!print_errs)
3952                                         goto out;
3953                                 fprintf(stderr, "Incorrect local backref count"
3954                                         " on %llu %s %llu owner %llu"
3955                                         " offset %llu found %u wanted %u back %p\n",
3956                                         (unsigned long long)rec->start,
3957                                         back->full_backref ?
3958                                         "parent" : "root",
3959                                         back->full_backref ?
3960                                         (unsigned long long)dback->parent:
3961                                         (unsigned long long)dback->root,
3962                                         (unsigned long long)dback->owner,
3963                                         (unsigned long long)dback->offset,
3964                                         dback->found_ref, dback->num_refs, back);
3965                         }
3966                         if (dback->disk_bytenr != rec->start) {
3967                                 err = 1;
3968                                 if (!print_errs)
3969                                         goto out;
3970                                 fprintf(stderr, "Backref disk bytenr does not"
3971                                         " match extent record, bytenr=%llu, "
3972                                         "ref bytenr=%llu\n",
3973                                         (unsigned long long)rec->start,
3974                                         (unsigned long long)dback->disk_bytenr);
3975                         }
3976
3977                         if (dback->bytes != rec->nr) {
3978                                 err = 1;
3979                                 if (!print_errs)
3980                                         goto out;
3981                                 fprintf(stderr, "Backref bytes do not match "
3982                                         "extent backref, bytenr=%llu, ref "
3983                                         "bytes=%llu, backref bytes=%llu\n",
3984                                         (unsigned long long)rec->start,
3985                                         (unsigned long long)rec->nr,
3986                                         (unsigned long long)dback->bytes);
3987                         }
3988                 }
3989                 if (!back->is_data) {
3990                         found += 1;
3991                 } else {
3992                         dback = to_data_backref(back);
3993                         found += dback->found_ref;
3994                 }
3995         }
3996         if (found != rec->refs) {
3997                 err = 1;
3998                 if (!print_errs)
3999                         goto out;
4000                 fprintf(stderr, "Incorrect global backref count "
4001                         "on %llu found %llu wanted %llu\n",
4002                         (unsigned long long)rec->start,
4003                         (unsigned long long)found,
4004                         (unsigned long long)rec->refs);
4005         }
4006 out:
4007         return err;
4008 }
4009
4010 static void __free_one_backref(struct rb_node *node)
4011 {
4012         struct extent_backref *back = rb_node_to_extent_backref(node);
4013
4014         free(back);
4015 }
4016
4017 static void free_all_extent_backrefs(struct extent_record *rec)
4018 {
4019         rb_free_nodes(&rec->backref_tree, __free_one_backref);
4020 }
4021
4022 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4023                                      struct cache_tree *extent_cache)
4024 {
4025         struct cache_extent *cache;
4026         struct extent_record *rec;
4027
4028         while (1) {
4029                 cache = first_cache_extent(extent_cache);
4030                 if (!cache)
4031                         break;
4032                 rec = container_of(cache, struct extent_record, cache);
4033                 remove_cache_extent(extent_cache, cache);
4034                 free_all_extent_backrefs(rec);
4035                 free(rec);
4036         }
4037 }
4038
4039 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4040                                  struct extent_record *rec)
4041 {
4042         if (rec->content_checked && rec->owner_ref_checked &&
4043             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4044             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4045             !rec->bad_full_backref && !rec->crossing_stripes &&
4046             !rec->wrong_chunk_type) {
4047                 remove_cache_extent(extent_cache, &rec->cache);
4048                 free_all_extent_backrefs(rec);
4049                 list_del_init(&rec->list);
4050                 free(rec);
4051         }
4052         return 0;
4053 }
4054
4055 static int check_owner_ref(struct btrfs_root *root,
4056                             struct extent_record *rec,
4057                             struct extent_buffer *buf)
4058 {
4059         struct extent_backref *node, *tmp;
4060         struct tree_backref *back;
4061         struct btrfs_root *ref_root;
4062         struct btrfs_key key;
4063         struct btrfs_path path;
4064         struct extent_buffer *parent;
4065         int level;
4066         int found = 0;
4067         int ret;
4068
4069         rbtree_postorder_for_each_entry_safe(node, tmp,
4070                                              &rec->backref_tree, node) {
4071                 if (node->is_data)
4072                         continue;
4073                 if (!node->found_ref)
4074                         continue;
4075                 if (node->full_backref)
4076                         continue;
4077                 back = to_tree_backref(node);
4078                 if (btrfs_header_owner(buf) == back->root)
4079                         return 0;
4080         }
4081         BUG_ON(rec->is_root);
4082
4083         /* try to find the block by search corresponding fs tree */
4084         key.objectid = btrfs_header_owner(buf);
4085         key.type = BTRFS_ROOT_ITEM_KEY;
4086         key.offset = (u64)-1;
4087
4088         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4089         if (IS_ERR(ref_root))
4090                 return 1;
4091
4092         level = btrfs_header_level(buf);
4093         if (level == 0)
4094                 btrfs_item_key_to_cpu(buf, &key, 0);
4095         else
4096                 btrfs_node_key_to_cpu(buf, &key, 0);
4097
4098         btrfs_init_path(&path);
4099         path.lowest_level = level + 1;
4100         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4101         if (ret < 0)
4102                 return 0;
4103
4104         parent = path.nodes[level + 1];
4105         if (parent && buf->start == btrfs_node_blockptr(parent,
4106                                                         path.slots[level + 1]))
4107                 found = 1;
4108
4109         btrfs_release_path(&path);
4110         return found ? 0 : 1;
4111 }
4112
4113 static int is_extent_tree_record(struct extent_record *rec)
4114 {
4115         struct extent_backref *ref, *tmp;
4116         struct tree_backref *back;
4117         int is_extent = 0;
4118
4119         rbtree_postorder_for_each_entry_safe(ref, tmp,
4120                                              &rec->backref_tree, node) {
4121                 if (ref->is_data)
4122                         return 0;
4123                 back = to_tree_backref(ref);
4124                 if (ref->full_backref)
4125                         return 0;
4126                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4127                         is_extent = 1;
4128         }
4129         return is_extent;
4130 }
4131
4132
4133 static int record_bad_block_io(struct btrfs_fs_info *info,
4134                                struct cache_tree *extent_cache,
4135                                u64 start, u64 len)
4136 {
4137         struct extent_record *rec;
4138         struct cache_extent *cache;
4139         struct btrfs_key key;
4140
4141         cache = lookup_cache_extent(extent_cache, start, len);
4142         if (!cache)
4143                 return 0;
4144
4145         rec = container_of(cache, struct extent_record, cache);
4146         if (!is_extent_tree_record(rec))
4147                 return 0;
4148
4149         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4150         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4151 }
4152
4153 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4154                        struct extent_buffer *buf, int slot)
4155 {
4156         if (btrfs_header_level(buf)) {
4157                 struct btrfs_key_ptr ptr1, ptr2;
4158
4159                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4160                                    sizeof(struct btrfs_key_ptr));
4161                 read_extent_buffer(buf, &ptr2,
4162                                    btrfs_node_key_ptr_offset(slot + 1),
4163                                    sizeof(struct btrfs_key_ptr));
4164                 write_extent_buffer(buf, &ptr1,
4165                                     btrfs_node_key_ptr_offset(slot + 1),
4166                                     sizeof(struct btrfs_key_ptr));
4167                 write_extent_buffer(buf, &ptr2,
4168                                     btrfs_node_key_ptr_offset(slot),
4169                                     sizeof(struct btrfs_key_ptr));
4170                 if (slot == 0) {
4171                         struct btrfs_disk_key key;
4172                         btrfs_node_key(buf, &key, 0);
4173                         btrfs_fixup_low_keys(root, path, &key,
4174                                              btrfs_header_level(buf) + 1);
4175                 }
4176         } else {
4177                 struct btrfs_item *item1, *item2;
4178                 struct btrfs_key k1, k2;
4179                 char *item1_data, *item2_data;
4180                 u32 item1_offset, item2_offset, item1_size, item2_size;
4181
4182                 item1 = btrfs_item_nr(slot);
4183                 item2 = btrfs_item_nr(slot + 1);
4184                 btrfs_item_key_to_cpu(buf, &k1, slot);
4185                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4186                 item1_offset = btrfs_item_offset(buf, item1);
4187                 item2_offset = btrfs_item_offset(buf, item2);
4188                 item1_size = btrfs_item_size(buf, item1);
4189                 item2_size = btrfs_item_size(buf, item2);
4190
4191                 item1_data = malloc(item1_size);
4192                 if (!item1_data)
4193                         return -ENOMEM;
4194                 item2_data = malloc(item2_size);
4195                 if (!item2_data) {
4196                         free(item1_data);
4197                         return -ENOMEM;
4198                 }
4199
4200                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4201                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4202
4203                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4204                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4205                 free(item1_data);
4206                 free(item2_data);
4207
4208                 btrfs_set_item_offset(buf, item1, item2_offset);
4209                 btrfs_set_item_offset(buf, item2, item1_offset);
4210                 btrfs_set_item_size(buf, item1, item2_size);
4211                 btrfs_set_item_size(buf, item2, item1_size);
4212
4213                 path->slots[0] = slot;
4214                 btrfs_set_item_key_unsafe(root, path, &k2);
4215                 path->slots[0] = slot + 1;
4216                 btrfs_set_item_key_unsafe(root, path, &k1);
4217         }
4218         return 0;
4219 }
4220
4221 static int fix_key_order(struct btrfs_trans_handle *trans,
4222                          struct btrfs_root *root,
4223                          struct btrfs_path *path)
4224 {
4225         struct extent_buffer *buf;
4226         struct btrfs_key k1, k2;
4227         int i;
4228         int level = path->lowest_level;
4229         int ret = -EIO;
4230
4231         buf = path->nodes[level];
4232         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4233                 if (level) {
4234                         btrfs_node_key_to_cpu(buf, &k1, i);
4235                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4236                 } else {
4237                         btrfs_item_key_to_cpu(buf, &k1, i);
4238                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4239                 }
4240                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4241                         continue;
4242                 ret = swap_values(root, path, buf, i);
4243                 if (ret)
4244                         break;
4245                 btrfs_mark_buffer_dirty(buf);
4246                 i = 0;
4247         }
4248         return ret;
4249 }
4250
4251 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4252                              struct btrfs_root *root,
4253                              struct btrfs_path *path,
4254                              struct extent_buffer *buf, int slot)
4255 {
4256         struct btrfs_key key;
4257         int nritems = btrfs_header_nritems(buf);
4258
4259         btrfs_item_key_to_cpu(buf, &key, slot);
4260
4261         /* These are all the keys we can deal with missing. */
4262         if (key.type != BTRFS_DIR_INDEX_KEY &&
4263             key.type != BTRFS_EXTENT_ITEM_KEY &&
4264             key.type != BTRFS_METADATA_ITEM_KEY &&
4265             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4266             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4267                 return -1;
4268
4269         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4270                (unsigned long long)key.objectid, key.type,
4271                (unsigned long long)key.offset, slot, buf->start);
4272         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4273                               btrfs_item_nr_offset(slot + 1),
4274                               sizeof(struct btrfs_item) *
4275                               (nritems - slot - 1));
4276         btrfs_set_header_nritems(buf, nritems - 1);
4277         if (slot == 0) {
4278                 struct btrfs_disk_key disk_key;
4279
4280                 btrfs_item_key(buf, &disk_key, 0);
4281                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4282         }
4283         btrfs_mark_buffer_dirty(buf);
4284         return 0;
4285 }
4286
4287 static int fix_item_offset(struct btrfs_trans_handle *trans,
4288                            struct btrfs_root *root,
4289                            struct btrfs_path *path)
4290 {
4291         struct extent_buffer *buf;
4292         int i;
4293         int ret = 0;
4294
4295         /* We should only get this for leaves */
4296         BUG_ON(path->lowest_level);
4297         buf = path->nodes[0];
4298 again:
4299         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4300                 unsigned int shift = 0, offset;
4301
4302                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4303                     BTRFS_LEAF_DATA_SIZE(root)) {
4304                         if (btrfs_item_end_nr(buf, i) >
4305                             BTRFS_LEAF_DATA_SIZE(root)) {
4306                                 ret = delete_bogus_item(trans, root, path,
4307                                                         buf, i);
4308                                 if (!ret)
4309                                         goto again;
4310                                 fprintf(stderr, "item is off the end of the "
4311                                         "leaf, can't fix\n");
4312                                 ret = -EIO;
4313                                 break;
4314                         }
4315                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4316                                 btrfs_item_end_nr(buf, i);
4317                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4318                            btrfs_item_offset_nr(buf, i - 1)) {
4319                         if (btrfs_item_end_nr(buf, i) >
4320                             btrfs_item_offset_nr(buf, i - 1)) {
4321                                 ret = delete_bogus_item(trans, root, path,
4322                                                         buf, i);
4323                                 if (!ret)
4324                                         goto again;
4325                                 fprintf(stderr, "items overlap, can't fix\n");
4326                                 ret = -EIO;
4327                                 break;
4328                         }
4329                         shift = btrfs_item_offset_nr(buf, i - 1) -
4330                                 btrfs_item_end_nr(buf, i);
4331                 }
4332                 if (!shift)
4333                         continue;
4334
4335                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4336                        i, shift, (unsigned long long)buf->start);
4337                 offset = btrfs_item_offset_nr(buf, i);
4338                 memmove_extent_buffer(buf,
4339                                       btrfs_leaf_data(buf) + offset + shift,
4340                                       btrfs_leaf_data(buf) + offset,
4341                                       btrfs_item_size_nr(buf, i));
4342                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4343                                       offset + shift);
4344                 btrfs_mark_buffer_dirty(buf);
4345         }
4346
4347         /*
4348          * We may have moved things, in which case we want to exit so we don't
4349          * write those changes out.  Once we have proper abort functionality in
4350          * progs this can be changed to something nicer.
4351          */
4352         BUG_ON(ret);
4353         return ret;
4354 }
4355
4356 /*
4357  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4358  * then just return -EIO.
4359  */
4360 static int try_to_fix_bad_block(struct btrfs_root *root,
4361                                 struct extent_buffer *buf,
4362                                 enum btrfs_tree_block_status status)
4363 {
4364         struct btrfs_trans_handle *trans;
4365         struct ulist *roots;
4366         struct ulist_node *node;
4367         struct btrfs_root *search_root;
4368         struct btrfs_path *path;
4369         struct ulist_iterator iter;
4370         struct btrfs_key root_key, key;
4371         int ret;
4372
4373         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4374             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4375                 return -EIO;
4376
4377         path = btrfs_alloc_path();
4378         if (!path)
4379                 return -EIO;
4380
4381         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4382                                    0, &roots);
4383         if (ret) {
4384                 btrfs_free_path(path);
4385                 return -EIO;
4386         }
4387
4388         ULIST_ITER_INIT(&iter);
4389         while ((node = ulist_next(roots, &iter))) {
4390                 root_key.objectid = node->val;
4391                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4392                 root_key.offset = (u64)-1;
4393
4394                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4395                 if (IS_ERR(root)) {
4396                         ret = -EIO;
4397                         break;
4398                 }
4399
4400
4401                 trans = btrfs_start_transaction(search_root, 0);
4402                 if (IS_ERR(trans)) {
4403                         ret = PTR_ERR(trans);
4404                         break;
4405                 }
4406
4407                 path->lowest_level = btrfs_header_level(buf);
4408                 path->skip_check_block = 1;
4409                 if (path->lowest_level)
4410                         btrfs_node_key_to_cpu(buf, &key, 0);
4411                 else
4412                         btrfs_item_key_to_cpu(buf, &key, 0);
4413                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4414                 if (ret) {
4415                         ret = -EIO;
4416                         btrfs_commit_transaction(trans, search_root);
4417                         break;
4418                 }
4419                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4420                         ret = fix_key_order(trans, search_root, path);
4421                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4422                         ret = fix_item_offset(trans, search_root, path);
4423                 if (ret) {
4424                         btrfs_commit_transaction(trans, search_root);
4425                         break;
4426                 }
4427                 btrfs_release_path(path);
4428                 btrfs_commit_transaction(trans, search_root);
4429         }
4430         ulist_free(roots);
4431         btrfs_free_path(path);
4432         return ret;
4433 }
4434
4435 static int check_block(struct btrfs_root *root,
4436                        struct cache_tree *extent_cache,
4437                        struct extent_buffer *buf, u64 flags)
4438 {
4439         struct extent_record *rec;
4440         struct cache_extent *cache;
4441         struct btrfs_key key;
4442         enum btrfs_tree_block_status status;
4443         int ret = 0;
4444         int level;
4445
4446         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4447         if (!cache)
4448                 return 1;
4449         rec = container_of(cache, struct extent_record, cache);
4450         rec->generation = btrfs_header_generation(buf);
4451
4452         level = btrfs_header_level(buf);
4453         if (btrfs_header_nritems(buf) > 0) {
4454
4455                 if (level == 0)
4456                         btrfs_item_key_to_cpu(buf, &key, 0);
4457                 else
4458                         btrfs_node_key_to_cpu(buf, &key, 0);
4459
4460                 rec->info_objectid = key.objectid;
4461         }
4462         rec->info_level = level;
4463
4464         if (btrfs_is_leaf(buf))
4465                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4466         else
4467                 status = btrfs_check_node(root, &rec->parent_key, buf);
4468
4469         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4470                 if (repair)
4471                         status = try_to_fix_bad_block(root, buf, status);
4472                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4473                         ret = -EIO;
4474                         fprintf(stderr, "bad block %llu\n",
4475                                 (unsigned long long)buf->start);
4476                 } else {
4477                         /*
4478                          * Signal to callers we need to start the scan over
4479                          * again since we'll have cowed blocks.
4480                          */
4481                         ret = -EAGAIN;
4482                 }
4483         } else {
4484                 rec->content_checked = 1;
4485                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4486                         rec->owner_ref_checked = 1;
4487                 else {
4488                         ret = check_owner_ref(root, rec, buf);
4489                         if (!ret)
4490                                 rec->owner_ref_checked = 1;
4491                 }
4492         }
4493         if (!ret)
4494                 maybe_free_extent_rec(extent_cache, rec);
4495         return ret;
4496 }
4497
4498
4499 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4500                                                 u64 parent, u64 root)
4501 {
4502         struct rb_node *node;
4503         struct tree_backref *back = NULL;
4504         struct tree_backref match = {
4505                 .node = {
4506                         .is_data = 0,
4507                 },
4508         };
4509
4510         if (parent) {
4511                 match.parent = parent;
4512                 match.node.full_backref = 1;
4513         } else {
4514                 match.root = root;
4515         }
4516
4517         node = rb_search(&rec->backref_tree, &match.node.node,
4518                          (rb_compare_keys)compare_extent_backref, NULL);
4519         if (node)
4520                 back = to_tree_backref(rb_node_to_extent_backref(node));
4521
4522         return back;
4523 }
4524
4525 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4526                                                 u64 parent, u64 root)
4527 {
4528         struct tree_backref *ref = malloc(sizeof(*ref));
4529
4530         if (!ref)
4531                 return NULL;
4532         memset(&ref->node, 0, sizeof(ref->node));
4533         if (parent > 0) {
4534                 ref->parent = parent;
4535                 ref->node.full_backref = 1;
4536         } else {
4537                 ref->root = root;
4538                 ref->node.full_backref = 0;
4539         }
4540         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4541
4542         return ref;
4543 }
4544
4545 static struct data_backref *find_data_backref(struct extent_record *rec,
4546                                                 u64 parent, u64 root,
4547                                                 u64 owner, u64 offset,
4548                                                 int found_ref,
4549                                                 u64 disk_bytenr, u64 bytes)
4550 {
4551         struct rb_node *node;
4552         struct data_backref *back = NULL;
4553         struct data_backref match = {
4554                 .node = {
4555                         .is_data = 1,
4556                 },
4557                 .owner = owner,
4558                 .offset = offset,
4559                 .bytes = bytes,
4560                 .found_ref = found_ref,
4561                 .disk_bytenr = disk_bytenr,
4562         };
4563
4564         if (parent) {
4565                 match.parent = parent;
4566                 match.node.full_backref = 1;
4567         } else {
4568                 match.root = root;
4569         }
4570
4571         node = rb_search(&rec->backref_tree, &match.node.node,
4572                          (rb_compare_keys)compare_extent_backref, NULL);
4573         if (node)
4574                 back = to_data_backref(rb_node_to_extent_backref(node));
4575
4576         return back;
4577 }
4578
4579 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4580                                                 u64 parent, u64 root,
4581                                                 u64 owner, u64 offset,
4582                                                 u64 max_size)
4583 {
4584         struct data_backref *ref = malloc(sizeof(*ref));
4585
4586         if (!ref)
4587                 return NULL;
4588         memset(&ref->node, 0, sizeof(ref->node));
4589         ref->node.is_data = 1;
4590
4591         if (parent > 0) {
4592                 ref->parent = parent;
4593                 ref->owner = 0;
4594                 ref->offset = 0;
4595                 ref->node.full_backref = 1;
4596         } else {
4597                 ref->root = root;
4598                 ref->owner = owner;
4599                 ref->offset = offset;
4600                 ref->node.full_backref = 0;
4601         }
4602         ref->bytes = max_size;
4603         ref->found_ref = 0;
4604         ref->num_refs = 0;
4605         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4606         if (max_size > rec->max_size)
4607                 rec->max_size = max_size;
4608         return ref;
4609 }
4610
4611 /* Check if the type of extent matches with its chunk */
4612 static void check_extent_type(struct extent_record *rec)
4613 {
4614         struct btrfs_block_group_cache *bg_cache;
4615
4616         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4617         if (!bg_cache)
4618                 return;
4619
4620         /* data extent, check chunk directly*/
4621         if (!rec->metadata) {
4622                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4623                         rec->wrong_chunk_type = 1;
4624                 return;
4625         }
4626
4627         /* metadata extent, check the obvious case first */
4628         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4629                                  BTRFS_BLOCK_GROUP_METADATA))) {
4630                 rec->wrong_chunk_type = 1;
4631                 return;
4632         }
4633
4634         /*
4635          * Check SYSTEM extent, as it's also marked as metadata, we can only
4636          * make sure it's a SYSTEM extent by its backref
4637          */
4638         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4639                 struct extent_backref *node;
4640                 struct tree_backref *tback;
4641                 u64 bg_type;
4642
4643                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4644                 if (node->is_data) {
4645                         /* tree block shouldn't have data backref */
4646                         rec->wrong_chunk_type = 1;
4647                         return;
4648                 }
4649                 tback = container_of(node, struct tree_backref, node);
4650
4651                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4652                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4653                 else
4654                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4655                 if (!(bg_cache->flags & bg_type))
4656                         rec->wrong_chunk_type = 1;
4657         }
4658 }
4659
4660 /*
4661  * Allocate a new extent record, fill default values from @tmpl and insert int
4662  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4663  * the cache, otherwise it fails.
4664  */
4665 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4666                 struct extent_record *tmpl)
4667 {
4668         struct extent_record *rec;
4669         int ret = 0;
4670
4671         rec = malloc(sizeof(*rec));
4672         if (!rec)
4673                 return -ENOMEM;
4674         rec->start = tmpl->start;
4675         rec->max_size = tmpl->max_size;
4676         rec->nr = max(tmpl->nr, tmpl->max_size);
4677         rec->found_rec = tmpl->found_rec;
4678         rec->content_checked = tmpl->content_checked;
4679         rec->owner_ref_checked = tmpl->owner_ref_checked;
4680         rec->num_duplicates = 0;
4681         rec->metadata = tmpl->metadata;
4682         rec->flag_block_full_backref = FLAG_UNSET;
4683         rec->bad_full_backref = 0;
4684         rec->crossing_stripes = 0;
4685         rec->wrong_chunk_type = 0;
4686         rec->is_root = tmpl->is_root;
4687         rec->refs = tmpl->refs;
4688         rec->extent_item_refs = tmpl->extent_item_refs;
4689         rec->parent_generation = tmpl->parent_generation;
4690         INIT_LIST_HEAD(&rec->backrefs);
4691         INIT_LIST_HEAD(&rec->dups);
4692         INIT_LIST_HEAD(&rec->list);
4693         rec->backref_tree = RB_ROOT;
4694         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4695         rec->cache.start = tmpl->start;
4696         rec->cache.size = tmpl->nr;
4697         ret = insert_cache_extent(extent_cache, &rec->cache);
4698         BUG_ON(ret);
4699         bytes_used += rec->nr;
4700
4701         if (tmpl->metadata)
4702                 rec->crossing_stripes = check_crossing_stripes(rec->start,
4703                                 global_info->tree_root->nodesize);
4704         check_extent_type(rec);
4705         return ret;
4706 }
4707
4708 /*
4709  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4710  * some are hints:
4711  * - refs              - if found, increase refs
4712  * - is_root           - if found, set
4713  * - content_checked   - if found, set
4714  * - owner_ref_checked - if found, set
4715  *
4716  * If not found, create a new one, initialize and insert.
4717  */
4718 static int add_extent_rec(struct cache_tree *extent_cache,
4719                 struct extent_record *tmpl)
4720 {
4721         struct extent_record *rec;
4722         struct cache_extent *cache;
4723         int ret = 0;
4724         int dup = 0;
4725
4726         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4727         if (cache) {
4728                 rec = container_of(cache, struct extent_record, cache);
4729                 if (tmpl->refs)
4730                         rec->refs++;
4731                 if (rec->nr == 1)
4732                         rec->nr = max(tmpl->nr, tmpl->max_size);
4733
4734                 /*
4735                  * We need to make sure to reset nr to whatever the extent
4736                  * record says was the real size, this way we can compare it to
4737                  * the backrefs.
4738                  */
4739                 if (tmpl->found_rec) {
4740                         if (tmpl->start != rec->start || rec->found_rec) {
4741                                 struct extent_record *tmp;
4742
4743                                 dup = 1;
4744                                 if (list_empty(&rec->list))
4745                                         list_add_tail(&rec->list,
4746                                                       &duplicate_extents);
4747
4748                                 /*
4749                                  * We have to do this song and dance in case we
4750                                  * find an extent record that falls inside of
4751                                  * our current extent record but does not have
4752                                  * the same objectid.
4753                                  */
4754                                 tmp = malloc(sizeof(*tmp));
4755                                 if (!tmp)
4756                                         return -ENOMEM;
4757                                 tmp->start = tmpl->start;
4758                                 tmp->max_size = tmpl->max_size;
4759                                 tmp->nr = tmpl->nr;
4760                                 tmp->found_rec = 1;
4761                                 tmp->metadata = tmpl->metadata;
4762                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4763                                 INIT_LIST_HEAD(&tmp->list);
4764                                 list_add_tail(&tmp->list, &rec->dups);
4765                                 rec->num_duplicates++;
4766                         } else {
4767                                 rec->nr = tmpl->nr;
4768                                 rec->found_rec = 1;
4769                         }
4770                 }
4771
4772                 if (tmpl->extent_item_refs && !dup) {
4773                         if (rec->extent_item_refs) {
4774                                 fprintf(stderr, "block %llu rec "
4775                                         "extent_item_refs %llu, passed %llu\n",
4776                                         (unsigned long long)tmpl->start,
4777                                         (unsigned long long)
4778                                                         rec->extent_item_refs,
4779                                         (unsigned long long)tmpl->extent_item_refs);
4780                         }
4781                         rec->extent_item_refs = tmpl->extent_item_refs;
4782                 }
4783                 if (tmpl->is_root)
4784                         rec->is_root = 1;
4785                 if (tmpl->content_checked)
4786                         rec->content_checked = 1;
4787                 if (tmpl->owner_ref_checked)
4788                         rec->owner_ref_checked = 1;
4789                 memcpy(&rec->parent_key, &tmpl->parent_key,
4790                                 sizeof(tmpl->parent_key));
4791                 if (tmpl->parent_generation)
4792                         rec->parent_generation = tmpl->parent_generation;
4793                 if (rec->max_size < tmpl->max_size)
4794                         rec->max_size = tmpl->max_size;
4795
4796                 /*
4797                  * A metadata extent can't cross stripe_len boundary, otherwise
4798                  * kernel scrub won't be able to handle it.
4799                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4800                  * it.
4801                  */
4802                 if (tmpl->metadata)
4803                         rec->crossing_stripes = check_crossing_stripes(
4804                                 rec->start, global_info->tree_root->nodesize);
4805                 check_extent_type(rec);
4806                 maybe_free_extent_rec(extent_cache, rec);
4807                 return ret;
4808         }
4809
4810         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4811
4812         return ret;
4813 }
4814
4815 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4816                             u64 parent, u64 root, int found_ref)
4817 {
4818         struct extent_record *rec;
4819         struct tree_backref *back;
4820         struct cache_extent *cache;
4821
4822         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4823         if (!cache) {
4824                 struct extent_record tmpl;
4825
4826                 memset(&tmpl, 0, sizeof(tmpl));
4827                 tmpl.start = bytenr;
4828                 tmpl.nr = 1;
4829                 tmpl.metadata = 1;
4830
4831                 add_extent_rec_nolookup(extent_cache, &tmpl);
4832
4833                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4834                 if (!cache)
4835                         abort();
4836         }
4837
4838         rec = container_of(cache, struct extent_record, cache);
4839         if (rec->start != bytenr) {
4840                 abort();
4841         }
4842
4843         back = find_tree_backref(rec, parent, root);
4844         if (!back) {
4845                 back = alloc_tree_backref(rec, parent, root);
4846                 BUG_ON(!back);
4847         }
4848
4849         if (found_ref) {
4850                 if (back->node.found_ref) {
4851                         fprintf(stderr, "Extent back ref already exists "
4852                                 "for %llu parent %llu root %llu \n",
4853                                 (unsigned long long)bytenr,
4854                                 (unsigned long long)parent,
4855                                 (unsigned long long)root);
4856                 }
4857                 back->node.found_ref = 1;
4858         } else {
4859                 if (back->node.found_extent_tree) {
4860                         fprintf(stderr, "Extent back ref already exists "
4861                                 "for %llu parent %llu root %llu \n",
4862                                 (unsigned long long)bytenr,
4863                                 (unsigned long long)parent,
4864                                 (unsigned long long)root);
4865                 }
4866                 back->node.found_extent_tree = 1;
4867         }
4868         check_extent_type(rec);
4869         maybe_free_extent_rec(extent_cache, rec);
4870         return 0;
4871 }
4872
4873 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4874                             u64 parent, u64 root, u64 owner, u64 offset,
4875                             u32 num_refs, int found_ref, u64 max_size)
4876 {
4877         struct extent_record *rec;
4878         struct data_backref *back;
4879         struct cache_extent *cache;
4880
4881         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4882         if (!cache) {
4883                 struct extent_record tmpl;
4884
4885                 memset(&tmpl, 0, sizeof(tmpl));
4886                 tmpl.start = bytenr;
4887                 tmpl.nr = 1;
4888                 tmpl.max_size = max_size;
4889
4890                 add_extent_rec_nolookup(extent_cache, &tmpl);
4891
4892                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4893                 if (!cache)
4894                         abort();
4895         }
4896
4897         rec = container_of(cache, struct extent_record, cache);
4898         if (rec->max_size < max_size)
4899                 rec->max_size = max_size;
4900
4901         /*
4902          * If found_ref is set then max_size is the real size and must match the
4903          * existing refs.  So if we have already found a ref then we need to
4904          * make sure that this ref matches the existing one, otherwise we need
4905          * to add a new backref so we can notice that the backrefs don't match
4906          * and we need to figure out who is telling the truth.  This is to
4907          * account for that awful fsync bug I introduced where we'd end up with
4908          * a btrfs_file_extent_item that would have its length include multiple
4909          * prealloc extents or point inside of a prealloc extent.
4910          */
4911         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4912                                  bytenr, max_size);
4913         if (!back) {
4914                 back = alloc_data_backref(rec, parent, root, owner, offset,
4915                                           max_size);
4916                 BUG_ON(!back);
4917         }
4918
4919         if (found_ref) {
4920                 BUG_ON(num_refs != 1);
4921                 if (back->node.found_ref)
4922                         BUG_ON(back->bytes != max_size);
4923                 back->node.found_ref = 1;
4924                 back->found_ref += 1;
4925                 back->bytes = max_size;
4926                 back->disk_bytenr = bytenr;
4927                 rec->refs += 1;
4928                 rec->content_checked = 1;
4929                 rec->owner_ref_checked = 1;
4930         } else {
4931                 if (back->node.found_extent_tree) {
4932                         fprintf(stderr, "Extent back ref already exists "
4933                                 "for %llu parent %llu root %llu "
4934                                 "owner %llu offset %llu num_refs %lu\n",
4935                                 (unsigned long long)bytenr,
4936                                 (unsigned long long)parent,
4937                                 (unsigned long long)root,
4938                                 (unsigned long long)owner,
4939                                 (unsigned long long)offset,
4940                                 (unsigned long)num_refs);
4941                 }
4942                 back->num_refs = num_refs;
4943                 back->node.found_extent_tree = 1;
4944         }
4945         maybe_free_extent_rec(extent_cache, rec);
4946         return 0;
4947 }
4948
4949 static int add_pending(struct cache_tree *pending,
4950                        struct cache_tree *seen, u64 bytenr, u32 size)
4951 {
4952         int ret;
4953         ret = add_cache_extent(seen, bytenr, size);
4954         if (ret)
4955                 return ret;
4956         add_cache_extent(pending, bytenr, size);
4957         return 0;
4958 }
4959
4960 static int pick_next_pending(struct cache_tree *pending,
4961                         struct cache_tree *reada,
4962                         struct cache_tree *nodes,
4963                         u64 last, struct block_info *bits, int bits_nr,
4964                         int *reada_bits)
4965 {
4966         unsigned long node_start = last;
4967         struct cache_extent *cache;
4968         int ret;
4969
4970         cache = search_cache_extent(reada, 0);
4971         if (cache) {
4972                 bits[0].start = cache->start;
4973                 bits[0].size = cache->size;
4974                 *reada_bits = 1;
4975                 return 1;
4976         }
4977         *reada_bits = 0;
4978         if (node_start > 32768)
4979                 node_start -= 32768;
4980
4981         cache = search_cache_extent(nodes, node_start);
4982         if (!cache)
4983                 cache = search_cache_extent(nodes, 0);
4984
4985         if (!cache) {
4986                  cache = search_cache_extent(pending, 0);
4987                  if (!cache)
4988                          return 0;
4989                  ret = 0;
4990                  do {
4991                          bits[ret].start = cache->start;
4992                          bits[ret].size = cache->size;
4993                          cache = next_cache_extent(cache);
4994                          ret++;
4995                  } while (cache && ret < bits_nr);
4996                  return ret;
4997         }
4998
4999         ret = 0;
5000         do {
5001                 bits[ret].start = cache->start;
5002                 bits[ret].size = cache->size;
5003                 cache = next_cache_extent(cache);
5004                 ret++;
5005         } while (cache && ret < bits_nr);
5006
5007         if (bits_nr - ret > 8) {
5008                 u64 lookup = bits[0].start + bits[0].size;
5009                 struct cache_extent *next;
5010                 next = search_cache_extent(pending, lookup);
5011                 while(next) {
5012                         if (next->start - lookup > 32768)
5013                                 break;
5014                         bits[ret].start = next->start;
5015                         bits[ret].size = next->size;
5016                         lookup = next->start + next->size;
5017                         ret++;
5018                         if (ret == bits_nr)
5019                                 break;
5020                         next = next_cache_extent(next);
5021                         if (!next)
5022                                 break;
5023                 }
5024         }
5025         return ret;
5026 }
5027
5028 static void free_chunk_record(struct cache_extent *cache)
5029 {
5030         struct chunk_record *rec;
5031
5032         rec = container_of(cache, struct chunk_record, cache);
5033         list_del_init(&rec->list);
5034         list_del_init(&rec->dextents);
5035         free(rec);
5036 }
5037
5038 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5039 {
5040         cache_tree_free_extents(chunk_cache, free_chunk_record);
5041 }
5042
5043 static void free_device_record(struct rb_node *node)
5044 {
5045         struct device_record *rec;
5046
5047         rec = container_of(node, struct device_record, node);
5048         free(rec);
5049 }
5050
5051 FREE_RB_BASED_TREE(device_cache, free_device_record);
5052
5053 int insert_block_group_record(struct block_group_tree *tree,
5054                               struct block_group_record *bg_rec)
5055 {
5056         int ret;
5057
5058         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5059         if (ret)
5060                 return ret;
5061
5062         list_add_tail(&bg_rec->list, &tree->block_groups);
5063         return 0;
5064 }
5065
5066 static void free_block_group_record(struct cache_extent *cache)
5067 {
5068         struct block_group_record *rec;
5069
5070         rec = container_of(cache, struct block_group_record, cache);
5071         list_del_init(&rec->list);
5072         free(rec);
5073 }
5074
5075 void free_block_group_tree(struct block_group_tree *tree)
5076 {
5077         cache_tree_free_extents(&tree->tree, free_block_group_record);
5078 }
5079
5080 int insert_device_extent_record(struct device_extent_tree *tree,
5081                                 struct device_extent_record *de_rec)
5082 {
5083         int ret;
5084
5085         /*
5086          * Device extent is a bit different from the other extents, because
5087          * the extents which belong to the different devices may have the
5088          * same start and size, so we need use the special extent cache
5089          * search/insert functions.
5090          */
5091         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5092         if (ret)
5093                 return ret;
5094
5095         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5096         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5097         return 0;
5098 }
5099
5100 static void free_device_extent_record(struct cache_extent *cache)
5101 {
5102         struct device_extent_record *rec;
5103
5104         rec = container_of(cache, struct device_extent_record, cache);
5105         if (!list_empty(&rec->chunk_list))
5106                 list_del_init(&rec->chunk_list);
5107         if (!list_empty(&rec->device_list))
5108                 list_del_init(&rec->device_list);
5109         free(rec);
5110 }
5111
5112 void free_device_extent_tree(struct device_extent_tree *tree)
5113 {
5114         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5115 }
5116
5117 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5118 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5119                                  struct extent_buffer *leaf, int slot)
5120 {
5121         struct btrfs_extent_ref_v0 *ref0;
5122         struct btrfs_key key;
5123
5124         btrfs_item_key_to_cpu(leaf, &key, slot);
5125         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5126         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5127                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5128         } else {
5129                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5130                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5131         }
5132         return 0;
5133 }
5134 #endif
5135
5136 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5137                                             struct btrfs_key *key,
5138                                             int slot)
5139 {
5140         struct btrfs_chunk *ptr;
5141         struct chunk_record *rec;
5142         int num_stripes, i;
5143
5144         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5145         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5146
5147         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5148         if (!rec) {
5149                 fprintf(stderr, "memory allocation failed\n");
5150                 exit(-1);
5151         }
5152
5153         INIT_LIST_HEAD(&rec->list);
5154         INIT_LIST_HEAD(&rec->dextents);
5155         rec->bg_rec = NULL;
5156
5157         rec->cache.start = key->offset;
5158         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5159
5160         rec->generation = btrfs_header_generation(leaf);
5161
5162         rec->objectid = key->objectid;
5163         rec->type = key->type;
5164         rec->offset = key->offset;
5165
5166         rec->length = rec->cache.size;
5167         rec->owner = btrfs_chunk_owner(leaf, ptr);
5168         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5169         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5170         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5171         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5172         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5173         rec->num_stripes = num_stripes;
5174         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5175
5176         for (i = 0; i < rec->num_stripes; ++i) {
5177                 rec->stripes[i].devid =
5178                         btrfs_stripe_devid_nr(leaf, ptr, i);
5179                 rec->stripes[i].offset =
5180                         btrfs_stripe_offset_nr(leaf, ptr, i);
5181                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5182                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5183                                 BTRFS_UUID_SIZE);
5184         }
5185
5186         return rec;
5187 }
5188
5189 static int process_chunk_item(struct cache_tree *chunk_cache,
5190                               struct btrfs_key *key, struct extent_buffer *eb,
5191                               int slot)
5192 {
5193         struct chunk_record *rec;
5194         int ret = 0;
5195
5196         rec = btrfs_new_chunk_record(eb, key, slot);
5197         ret = insert_cache_extent(chunk_cache, &rec->cache);
5198         if (ret) {
5199                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5200                         rec->offset, rec->length);
5201                 free(rec);
5202         }
5203
5204         return ret;
5205 }
5206
5207 static int process_device_item(struct rb_root *dev_cache,
5208                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5209 {
5210         struct btrfs_dev_item *ptr;
5211         struct device_record *rec;
5212         int ret = 0;
5213
5214         ptr = btrfs_item_ptr(eb,
5215                 slot, struct btrfs_dev_item);
5216
5217         rec = malloc(sizeof(*rec));
5218         if (!rec) {
5219                 fprintf(stderr, "memory allocation failed\n");
5220                 return -ENOMEM;
5221         }
5222
5223         rec->devid = key->offset;
5224         rec->generation = btrfs_header_generation(eb);
5225
5226         rec->objectid = key->objectid;
5227         rec->type = key->type;
5228         rec->offset = key->offset;
5229
5230         rec->devid = btrfs_device_id(eb, ptr);
5231         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5232         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5233
5234         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5235         if (ret) {
5236                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5237                 free(rec);
5238         }
5239
5240         return ret;
5241 }
5242
5243 struct block_group_record *
5244 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5245                              int slot)
5246 {
5247         struct btrfs_block_group_item *ptr;
5248         struct block_group_record *rec;
5249
5250         rec = calloc(1, sizeof(*rec));
5251         if (!rec) {
5252                 fprintf(stderr, "memory allocation failed\n");
5253                 exit(-1);
5254         }
5255
5256         rec->cache.start = key->objectid;
5257         rec->cache.size = key->offset;
5258
5259         rec->generation = btrfs_header_generation(leaf);
5260
5261         rec->objectid = key->objectid;
5262         rec->type = key->type;
5263         rec->offset = key->offset;
5264
5265         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5266         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5267
5268         INIT_LIST_HEAD(&rec->list);
5269
5270         return rec;
5271 }
5272
5273 static int process_block_group_item(struct block_group_tree *block_group_cache,
5274                                     struct btrfs_key *key,
5275                                     struct extent_buffer *eb, int slot)
5276 {
5277         struct block_group_record *rec;
5278         int ret = 0;
5279
5280         rec = btrfs_new_block_group_record(eb, key, slot);
5281         ret = insert_block_group_record(block_group_cache, rec);
5282         if (ret) {
5283                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5284                         rec->objectid, rec->offset);
5285                 free(rec);
5286         }
5287
5288         return ret;
5289 }
5290
5291 struct device_extent_record *
5292 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5293                                struct btrfs_key *key, int slot)
5294 {
5295         struct device_extent_record *rec;
5296         struct btrfs_dev_extent *ptr;
5297
5298         rec = calloc(1, sizeof(*rec));
5299         if (!rec) {
5300                 fprintf(stderr, "memory allocation failed\n");
5301                 exit(-1);
5302         }
5303
5304         rec->cache.objectid = key->objectid;
5305         rec->cache.start = key->offset;
5306
5307         rec->generation = btrfs_header_generation(leaf);
5308
5309         rec->objectid = key->objectid;
5310         rec->type = key->type;
5311         rec->offset = key->offset;
5312
5313         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5314         rec->chunk_objecteid =
5315                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5316         rec->chunk_offset =
5317                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5318         rec->length = btrfs_dev_extent_length(leaf, ptr);
5319         rec->cache.size = rec->length;
5320
5321         INIT_LIST_HEAD(&rec->chunk_list);
5322         INIT_LIST_HEAD(&rec->device_list);
5323
5324         return rec;
5325 }
5326
5327 static int
5328 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5329                            struct btrfs_key *key, struct extent_buffer *eb,
5330                            int slot)
5331 {
5332         struct device_extent_record *rec;
5333         int ret;
5334
5335         rec = btrfs_new_device_extent_record(eb, key, slot);
5336         ret = insert_device_extent_record(dev_extent_cache, rec);
5337         if (ret) {
5338                 fprintf(stderr,
5339                         "Device extent[%llu, %llu, %llu] existed.\n",
5340                         rec->objectid, rec->offset, rec->length);
5341                 free(rec);
5342         }
5343
5344         return ret;
5345 }
5346
5347 static int process_extent_item(struct btrfs_root *root,
5348                                struct cache_tree *extent_cache,
5349                                struct extent_buffer *eb, int slot)
5350 {
5351         struct btrfs_extent_item *ei;
5352         struct btrfs_extent_inline_ref *iref;
5353         struct btrfs_extent_data_ref *dref;
5354         struct btrfs_shared_data_ref *sref;
5355         struct btrfs_key key;
5356         struct extent_record tmpl;
5357         unsigned long end;
5358         unsigned long ptr;
5359         int type;
5360         u32 item_size = btrfs_item_size_nr(eb, slot);
5361         u64 refs = 0;
5362         u64 offset;
5363         u64 num_bytes;
5364         int metadata = 0;
5365
5366         btrfs_item_key_to_cpu(eb, &key, slot);
5367
5368         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5369                 metadata = 1;
5370                 num_bytes = root->nodesize;
5371         } else {
5372                 num_bytes = key.offset;
5373         }
5374
5375         if (item_size < sizeof(*ei)) {
5376 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5377                 struct btrfs_extent_item_v0 *ei0;
5378                 BUG_ON(item_size != sizeof(*ei0));
5379                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5380                 refs = btrfs_extent_refs_v0(eb, ei0);
5381 #else
5382                 BUG();
5383 #endif
5384                 memset(&tmpl, 0, sizeof(tmpl));
5385                 tmpl.start = key.objectid;
5386                 tmpl.nr = num_bytes;
5387                 tmpl.extent_item_refs = refs;
5388                 tmpl.metadata = metadata;
5389                 tmpl.found_rec = 1;
5390                 tmpl.max_size = num_bytes;
5391
5392                 return add_extent_rec(extent_cache, &tmpl);
5393         }
5394
5395         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5396         refs = btrfs_extent_refs(eb, ei);
5397         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5398                 metadata = 1;
5399         else
5400                 metadata = 0;
5401
5402         memset(&tmpl, 0, sizeof(tmpl));
5403         tmpl.start = key.objectid;
5404         tmpl.nr = num_bytes;
5405         tmpl.extent_item_refs = refs;
5406         tmpl.metadata = metadata;
5407         tmpl.found_rec = 1;
5408         tmpl.max_size = num_bytes;
5409         add_extent_rec(extent_cache, &tmpl);
5410
5411         ptr = (unsigned long)(ei + 1);
5412         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5413             key.type == BTRFS_EXTENT_ITEM_KEY)
5414                 ptr += sizeof(struct btrfs_tree_block_info);
5415
5416         end = (unsigned long)ei + item_size;
5417         while (ptr < end) {
5418                 iref = (struct btrfs_extent_inline_ref *)ptr;
5419                 type = btrfs_extent_inline_ref_type(eb, iref);
5420                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5421                 switch (type) {
5422                 case BTRFS_TREE_BLOCK_REF_KEY:
5423                         add_tree_backref(extent_cache, key.objectid,
5424                                          0, offset, 0);
5425                         break;
5426                 case BTRFS_SHARED_BLOCK_REF_KEY:
5427                         add_tree_backref(extent_cache, key.objectid,
5428                                          offset, 0, 0);
5429                         break;
5430                 case BTRFS_EXTENT_DATA_REF_KEY:
5431                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5432                         add_data_backref(extent_cache, key.objectid, 0,
5433                                         btrfs_extent_data_ref_root(eb, dref),
5434                                         btrfs_extent_data_ref_objectid(eb,
5435                                                                        dref),
5436                                         btrfs_extent_data_ref_offset(eb, dref),
5437                                         btrfs_extent_data_ref_count(eb, dref),
5438                                         0, num_bytes);
5439                         break;
5440                 case BTRFS_SHARED_DATA_REF_KEY:
5441                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5442                         add_data_backref(extent_cache, key.objectid, offset,
5443                                         0, 0, 0,
5444                                         btrfs_shared_data_ref_count(eb, sref),
5445                                         0, num_bytes);
5446                         break;
5447                 default:
5448                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5449                                 key.objectid, key.type, num_bytes);
5450                         goto out;
5451                 }
5452                 ptr += btrfs_extent_inline_ref_size(type);
5453         }
5454         WARN_ON(ptr > end);
5455 out:
5456         return 0;
5457 }
5458
5459 static int check_cache_range(struct btrfs_root *root,
5460                              struct btrfs_block_group_cache *cache,
5461                              u64 offset, u64 bytes)
5462 {
5463         struct btrfs_free_space *entry;
5464         u64 *logical;
5465         u64 bytenr;
5466         int stripe_len;
5467         int i, nr, ret;
5468
5469         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5470                 bytenr = btrfs_sb_offset(i);
5471                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5472                                        cache->key.objectid, bytenr, 0,
5473                                        &logical, &nr, &stripe_len);
5474                 if (ret)
5475                         return ret;
5476
5477                 while (nr--) {
5478                         if (logical[nr] + stripe_len <= offset)
5479                                 continue;
5480                         if (offset + bytes <= logical[nr])
5481                                 continue;
5482                         if (logical[nr] == offset) {
5483                                 if (stripe_len >= bytes) {
5484                                         kfree(logical);
5485                                         return 0;
5486                                 }
5487                                 bytes -= stripe_len;
5488                                 offset += stripe_len;
5489                         } else if (logical[nr] < offset) {
5490                                 if (logical[nr] + stripe_len >=
5491                                     offset + bytes) {
5492                                         kfree(logical);
5493                                         return 0;
5494                                 }
5495                                 bytes = (offset + bytes) -
5496                                         (logical[nr] + stripe_len);
5497                                 offset = logical[nr] + stripe_len;
5498                         } else {
5499                                 /*
5500                                  * Could be tricky, the super may land in the
5501                                  * middle of the area we're checking.  First
5502                                  * check the easiest case, it's at the end.
5503                                  */
5504                                 if (logical[nr] + stripe_len >=
5505                                     bytes + offset) {
5506                                         bytes = logical[nr] - offset;
5507                                         continue;
5508                                 }
5509
5510                                 /* Check the left side */
5511                                 ret = check_cache_range(root, cache,
5512                                                         offset,
5513                                                         logical[nr] - offset);
5514                                 if (ret) {
5515                                         kfree(logical);
5516                                         return ret;
5517                                 }
5518
5519                                 /* Now we continue with the right side */
5520                                 bytes = (offset + bytes) -
5521                                         (logical[nr] + stripe_len);
5522                                 offset = logical[nr] + stripe_len;
5523                         }
5524                 }
5525
5526                 kfree(logical);
5527         }
5528
5529         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5530         if (!entry) {
5531                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5532                         offset, offset+bytes);
5533                 return -EINVAL;
5534         }
5535
5536         if (entry->offset != offset) {
5537                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5538                         entry->offset);
5539                 return -EINVAL;
5540         }
5541
5542         if (entry->bytes != bytes) {
5543                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5544                         bytes, entry->bytes, offset);
5545                 return -EINVAL;
5546         }
5547
5548         unlink_free_space(cache->free_space_ctl, entry);
5549         free(entry);
5550         return 0;
5551 }
5552
5553 static int verify_space_cache(struct btrfs_root *root,
5554                               struct btrfs_block_group_cache *cache)
5555 {
5556         struct btrfs_path *path;
5557         struct extent_buffer *leaf;
5558         struct btrfs_key key;
5559         u64 last;
5560         int ret = 0;
5561
5562         path = btrfs_alloc_path();
5563         if (!path)
5564                 return -ENOMEM;
5565
5566         root = root->fs_info->extent_root;
5567
5568         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5569
5570         key.objectid = last;
5571         key.offset = 0;
5572         key.type = BTRFS_EXTENT_ITEM_KEY;
5573
5574         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5575         if (ret < 0)
5576                 goto out;
5577         ret = 0;
5578         while (1) {
5579                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5580                         ret = btrfs_next_leaf(root, path);
5581                         if (ret < 0)
5582                                 goto out;
5583                         if (ret > 0) {
5584                                 ret = 0;
5585                                 break;
5586                         }
5587                 }
5588                 leaf = path->nodes[0];
5589                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5590                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5591                         break;
5592                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5593                     key.type != BTRFS_METADATA_ITEM_KEY) {
5594                         path->slots[0]++;
5595                         continue;
5596                 }
5597
5598                 if (last == key.objectid) {
5599                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5600                                 last = key.objectid + key.offset;
5601                         else
5602                                 last = key.objectid + root->nodesize;
5603                         path->slots[0]++;
5604                         continue;
5605                 }
5606
5607                 ret = check_cache_range(root, cache, last,
5608                                         key.objectid - last);
5609                 if (ret)
5610                         break;
5611                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5612                         last = key.objectid + key.offset;
5613                 else
5614                         last = key.objectid + root->nodesize;
5615                 path->slots[0]++;
5616         }
5617
5618         if (last < cache->key.objectid + cache->key.offset)
5619                 ret = check_cache_range(root, cache, last,
5620                                         cache->key.objectid +
5621                                         cache->key.offset - last);
5622
5623 out:
5624         btrfs_free_path(path);
5625
5626         if (!ret &&
5627             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5628                 fprintf(stderr, "There are still entries left in the space "
5629                         "cache\n");
5630                 ret = -EINVAL;
5631         }
5632
5633         return ret;
5634 }
5635
5636 static int check_space_cache(struct btrfs_root *root)
5637 {
5638         struct btrfs_block_group_cache *cache;
5639         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5640         int ret;
5641         int error = 0;
5642
5643         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5644             btrfs_super_generation(root->fs_info->super_copy) !=
5645             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5646                 printf("cache and super generation don't match, space cache "
5647                        "will be invalidated\n");
5648                 return 0;
5649         }
5650
5651         if (ctx.progress_enabled) {
5652                 ctx.tp = TASK_FREE_SPACE;
5653                 task_start(ctx.info);
5654         }
5655
5656         while (1) {
5657                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5658                 if (!cache)
5659                         break;
5660
5661                 start = cache->key.objectid + cache->key.offset;
5662                 if (!cache->free_space_ctl) {
5663                         if (btrfs_init_free_space_ctl(cache,
5664                                                       root->sectorsize)) {
5665                                 ret = -ENOMEM;
5666                                 break;
5667                         }
5668                 } else {
5669                         btrfs_remove_free_space_cache(cache);
5670                 }
5671
5672                 if (btrfs_fs_compat_ro(root->fs_info,
5673                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5674                         ret = exclude_super_stripes(root, cache);
5675                         if (ret) {
5676                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5677                                         strerror(-ret));
5678                                 error++;
5679                                 continue;
5680                         }
5681                         ret = load_free_space_tree(root->fs_info, cache);
5682                         free_excluded_extents(root, cache);
5683                         if (ret < 0) {
5684                                 fprintf(stderr, "could not load free space tree: %s\n",
5685                                         strerror(-ret));
5686                                 error++;
5687                                 continue;
5688                         }
5689                         error += ret;
5690                 } else {
5691                         ret = load_free_space_cache(root->fs_info, cache);
5692                         if (!ret)
5693                                 continue;
5694                 }
5695
5696                 ret = verify_space_cache(root, cache);
5697                 if (ret) {
5698                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5699                                 cache->key.objectid);
5700                         error++;
5701                 }
5702         }
5703
5704         task_stop(ctx.info);
5705
5706         return error ? -EINVAL : 0;
5707 }
5708
5709 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5710                         u64 num_bytes, unsigned long leaf_offset,
5711                         struct extent_buffer *eb) {
5712
5713         u64 offset = 0;
5714         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5715         char *data;
5716         unsigned long csum_offset;
5717         u32 csum;
5718         u32 csum_expected;
5719         u64 read_len;
5720         u64 data_checked = 0;
5721         u64 tmp;
5722         int ret = 0;
5723         int mirror;
5724         int num_copies;
5725
5726         if (num_bytes % root->sectorsize)
5727                 return -EINVAL;
5728
5729         data = malloc(num_bytes);
5730         if (!data)
5731                 return -ENOMEM;
5732
5733         while (offset < num_bytes) {
5734                 mirror = 0;
5735 again:
5736                 read_len = num_bytes - offset;
5737                 /* read as much space once a time */
5738                 ret = read_extent_data(root, data + offset,
5739                                 bytenr + offset, &read_len, mirror);
5740                 if (ret)
5741                         goto out;
5742                 data_checked = 0;
5743                 /* verify every 4k data's checksum */
5744                 while (data_checked < read_len) {
5745                         csum = ~(u32)0;
5746                         tmp = offset + data_checked;
5747
5748                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5749                                                csum, root->sectorsize);
5750                         btrfs_csum_final(csum, (char *)&csum);
5751
5752                         csum_offset = leaf_offset +
5753                                  tmp / root->sectorsize * csum_size;
5754                         read_extent_buffer(eb, (char *)&csum_expected,
5755                                            csum_offset, csum_size);
5756                         /* try another mirror */
5757                         if (csum != csum_expected) {
5758                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5759                                                 mirror, bytenr + tmp,
5760                                                 csum, csum_expected);
5761                                 num_copies = btrfs_num_copies(
5762                                                 &root->fs_info->mapping_tree,
5763                                                 bytenr, num_bytes);
5764                                 if (mirror < num_copies - 1) {
5765                                         mirror += 1;
5766                                         goto again;
5767                                 }
5768                         }
5769                         data_checked += root->sectorsize;
5770                 }
5771                 offset += read_len;
5772         }
5773 out:
5774         free(data);
5775         return ret;
5776 }
5777
5778 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5779                                u64 num_bytes)
5780 {
5781         struct btrfs_path *path;
5782         struct extent_buffer *leaf;
5783         struct btrfs_key key;
5784         int ret;
5785
5786         path = btrfs_alloc_path();
5787         if (!path) {
5788                 fprintf(stderr, "Error allocating path\n");
5789                 return -ENOMEM;
5790         }
5791
5792         key.objectid = bytenr;
5793         key.type = BTRFS_EXTENT_ITEM_KEY;
5794         key.offset = (u64)-1;
5795
5796 again:
5797         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5798                                 0, 0);
5799         if (ret < 0) {
5800                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5801                 btrfs_free_path(path);
5802                 return ret;
5803         } else if (ret) {
5804                 if (path->slots[0] > 0) {
5805                         path->slots[0]--;
5806                 } else {
5807                         ret = btrfs_prev_leaf(root, path);
5808                         if (ret < 0) {
5809                                 goto out;
5810                         } else if (ret > 0) {
5811                                 ret = 0;
5812                                 goto out;
5813                         }
5814                 }
5815         }
5816
5817         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5818
5819         /*
5820          * Block group items come before extent items if they have the same
5821          * bytenr, so walk back one more just in case.  Dear future traveller,
5822          * first congrats on mastering time travel.  Now if it's not too much
5823          * trouble could you go back to 2006 and tell Chris to make the
5824          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5825          * EXTENT_ITEM_KEY please?
5826          */
5827         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5828                 if (path->slots[0] > 0) {
5829                         path->slots[0]--;
5830                 } else {
5831                         ret = btrfs_prev_leaf(root, path);
5832                         if (ret < 0) {
5833                                 goto out;
5834                         } else if (ret > 0) {
5835                                 ret = 0;
5836                                 goto out;
5837                         }
5838                 }
5839                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5840         }
5841
5842         while (num_bytes) {
5843                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5844                         ret = btrfs_next_leaf(root, path);
5845                         if (ret < 0) {
5846                                 fprintf(stderr, "Error going to next leaf "
5847                                         "%d\n", ret);
5848                                 btrfs_free_path(path);
5849                                 return ret;
5850                         } else if (ret) {
5851                                 break;
5852                         }
5853                 }
5854                 leaf = path->nodes[0];
5855                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5856                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5857                         path->slots[0]++;
5858                         continue;
5859                 }
5860                 if (key.objectid + key.offset < bytenr) {
5861                         path->slots[0]++;
5862                         continue;
5863                 }
5864                 if (key.objectid > bytenr + num_bytes)
5865                         break;
5866
5867                 if (key.objectid == bytenr) {
5868                         if (key.offset >= num_bytes) {
5869                                 num_bytes = 0;
5870                                 break;
5871                         }
5872                         num_bytes -= key.offset;
5873                         bytenr += key.offset;
5874                 } else if (key.objectid < bytenr) {
5875                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5876                                 num_bytes = 0;
5877                                 break;
5878                         }
5879                         num_bytes = (bytenr + num_bytes) -
5880                                 (key.objectid + key.offset);
5881                         bytenr = key.objectid + key.offset;
5882                 } else {
5883                         if (key.objectid + key.offset < bytenr + num_bytes) {
5884                                 u64 new_start = key.objectid + key.offset;
5885                                 u64 new_bytes = bytenr + num_bytes - new_start;
5886
5887                                 /*
5888                                  * Weird case, the extent is in the middle of
5889                                  * our range, we'll have to search one side
5890                                  * and then the other.  Not sure if this happens
5891                                  * in real life, but no harm in coding it up
5892                                  * anyway just in case.
5893                                  */
5894                                 btrfs_release_path(path);
5895                                 ret = check_extent_exists(root, new_start,
5896                                                           new_bytes);
5897                                 if (ret) {
5898                                         fprintf(stderr, "Right section didn't "
5899                                                 "have a record\n");
5900                                         break;
5901                                 }
5902                                 num_bytes = key.objectid - bytenr;
5903                                 goto again;
5904                         }
5905                         num_bytes = key.objectid - bytenr;
5906                 }
5907                 path->slots[0]++;
5908         }
5909         ret = 0;
5910
5911 out:
5912         if (num_bytes && !ret) {
5913                 fprintf(stderr, "There are no extents for csum range "
5914                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5915                 ret = 1;
5916         }
5917
5918         btrfs_free_path(path);
5919         return ret;
5920 }
5921
5922 static int check_csums(struct btrfs_root *root)
5923 {
5924         struct btrfs_path *path;
5925         struct extent_buffer *leaf;
5926         struct btrfs_key key;
5927         u64 offset = 0, num_bytes = 0;
5928         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5929         int errors = 0;
5930         int ret;
5931         u64 data_len;
5932         unsigned long leaf_offset;
5933
5934         root = root->fs_info->csum_root;
5935         if (!extent_buffer_uptodate(root->node)) {
5936                 fprintf(stderr, "No valid csum tree found\n");
5937                 return -ENOENT;
5938         }
5939
5940         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5941         key.type = BTRFS_EXTENT_CSUM_KEY;
5942         key.offset = 0;
5943
5944         path = btrfs_alloc_path();
5945         if (!path)
5946                 return -ENOMEM;
5947
5948         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5949         if (ret < 0) {
5950                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5951                 btrfs_free_path(path);
5952                 return ret;
5953         }
5954
5955         if (ret > 0 && path->slots[0])
5956                 path->slots[0]--;
5957         ret = 0;
5958
5959         while (1) {
5960                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5961                         ret = btrfs_next_leaf(root, path);
5962                         if (ret < 0) {
5963                                 fprintf(stderr, "Error going to next leaf "
5964                                         "%d\n", ret);
5965                                 break;
5966                         }
5967                         if (ret)
5968                                 break;
5969                 }
5970                 leaf = path->nodes[0];
5971
5972                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5973                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5974                         path->slots[0]++;
5975                         continue;
5976                 }
5977
5978                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5979                               csum_size) * root->sectorsize;
5980                 if (!check_data_csum)
5981                         goto skip_csum_check;
5982                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5983                 ret = check_extent_csums(root, key.offset, data_len,
5984                                          leaf_offset, leaf);
5985                 if (ret)
5986                         break;
5987 skip_csum_check:
5988                 if (!num_bytes) {
5989                         offset = key.offset;
5990                 } else if (key.offset != offset + num_bytes) {
5991                         ret = check_extent_exists(root, offset, num_bytes);
5992                         if (ret) {
5993                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5994                                         "there is no extent record\n",
5995                                         offset, offset+num_bytes);
5996                                 errors++;
5997                         }
5998                         offset = key.offset;
5999                         num_bytes = 0;
6000                 }
6001                 num_bytes += data_len;
6002                 path->slots[0]++;
6003         }
6004
6005         btrfs_free_path(path);
6006         return errors;
6007 }
6008
6009 static int is_dropped_key(struct btrfs_key *key,
6010                           struct btrfs_key *drop_key) {
6011         if (key->objectid < drop_key->objectid)
6012                 return 1;
6013         else if (key->objectid == drop_key->objectid) {
6014                 if (key->type < drop_key->type)
6015                         return 1;
6016                 else if (key->type == drop_key->type) {
6017                         if (key->offset < drop_key->offset)
6018                                 return 1;
6019                 }
6020         }
6021         return 0;
6022 }
6023
6024 /*
6025  * Here are the rules for FULL_BACKREF.
6026  *
6027  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6028  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6029  *      FULL_BACKREF set.
6030  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6031  *    if it happened after the relocation occurred since we'll have dropped the
6032  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6033  *    have no real way to know for sure.
6034  *
6035  * We process the blocks one root at a time, and we start from the lowest root
6036  * objectid and go to the highest.  So we can just lookup the owner backref for
6037  * the record and if we don't find it then we know it doesn't exist and we have
6038  * a FULL BACKREF.
6039  *
6040  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6041  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6042  * be set or not and then we can check later once we've gathered all the refs.
6043  */
6044 static int calc_extent_flag(struct btrfs_root *root,
6045                            struct cache_tree *extent_cache,
6046                            struct extent_buffer *buf,
6047                            struct root_item_record *ri,
6048                            u64 *flags)
6049 {
6050         struct extent_record *rec;
6051         struct cache_extent *cache;
6052         struct tree_backref *tback;
6053         u64 owner = 0;
6054
6055         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6056         /* we have added this extent before */
6057         BUG_ON(!cache);
6058         rec = container_of(cache, struct extent_record, cache);
6059
6060         /*
6061          * Except file/reloc tree, we can not have
6062          * FULL BACKREF MODE
6063          */
6064         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6065                 goto normal;
6066         /*
6067          * root node
6068          */
6069         if (buf->start == ri->bytenr)
6070                 goto normal;
6071
6072         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6073                 goto full_backref;
6074
6075         owner = btrfs_header_owner(buf);
6076         if (owner == ri->objectid)
6077                 goto normal;
6078
6079         tback = find_tree_backref(rec, 0, owner);
6080         if (!tback)
6081                 goto full_backref;
6082 normal:
6083         *flags = 0;
6084         if (rec->flag_block_full_backref != FLAG_UNSET &&
6085             rec->flag_block_full_backref != 0)
6086                 rec->bad_full_backref = 1;
6087         return 0;
6088 full_backref:
6089         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6090         if (rec->flag_block_full_backref != FLAG_UNSET &&
6091             rec->flag_block_full_backref != 1)
6092                 rec->bad_full_backref = 1;
6093         return 0;
6094 }
6095
6096 static int run_next_block(struct btrfs_root *root,
6097                           struct block_info *bits,
6098                           int bits_nr,
6099                           u64 *last,
6100                           struct cache_tree *pending,
6101                           struct cache_tree *seen,
6102                           struct cache_tree *reada,
6103                           struct cache_tree *nodes,
6104                           struct cache_tree *extent_cache,
6105                           struct cache_tree *chunk_cache,
6106                           struct rb_root *dev_cache,
6107                           struct block_group_tree *block_group_cache,
6108                           struct device_extent_tree *dev_extent_cache,
6109                           struct root_item_record *ri)
6110 {
6111         struct extent_buffer *buf;
6112         struct extent_record *rec = NULL;
6113         u64 bytenr;
6114         u32 size;
6115         u64 parent;
6116         u64 owner;
6117         u64 flags;
6118         u64 ptr;
6119         u64 gen = 0;
6120         int ret = 0;
6121         int i;
6122         int nritems;
6123         struct btrfs_key key;
6124         struct cache_extent *cache;
6125         int reada_bits;
6126
6127         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6128                                     bits_nr, &reada_bits);
6129         if (nritems == 0)
6130                 return 1;
6131
6132         if (!reada_bits) {
6133                 for(i = 0; i < nritems; i++) {
6134                         ret = add_cache_extent(reada, bits[i].start,
6135                                                bits[i].size);
6136                         if (ret == -EEXIST)
6137                                 continue;
6138
6139                         /* fixme, get the parent transid */
6140                         readahead_tree_block(root, bits[i].start,
6141                                              bits[i].size, 0);
6142                 }
6143         }
6144         *last = bits[0].start;
6145         bytenr = bits[0].start;
6146         size = bits[0].size;
6147
6148         cache = lookup_cache_extent(pending, bytenr, size);
6149         if (cache) {
6150                 remove_cache_extent(pending, cache);
6151                 free(cache);
6152         }
6153         cache = lookup_cache_extent(reada, bytenr, size);
6154         if (cache) {
6155                 remove_cache_extent(reada, cache);
6156                 free(cache);
6157         }
6158         cache = lookup_cache_extent(nodes, bytenr, size);
6159         if (cache) {
6160                 remove_cache_extent(nodes, cache);
6161                 free(cache);
6162         }
6163         cache = lookup_cache_extent(extent_cache, bytenr, size);
6164         if (cache) {
6165                 rec = container_of(cache, struct extent_record, cache);
6166                 gen = rec->parent_generation;
6167         }
6168
6169         /* fixme, get the real parent transid */
6170         buf = read_tree_block(root, bytenr, size, gen);
6171         if (!extent_buffer_uptodate(buf)) {
6172                 record_bad_block_io(root->fs_info,
6173                                     extent_cache, bytenr, size);
6174                 goto out;
6175         }
6176
6177         nritems = btrfs_header_nritems(buf);
6178
6179         flags = 0;
6180         if (!init_extent_tree) {
6181                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6182                                        btrfs_header_level(buf), 1, NULL,
6183                                        &flags);
6184                 if (ret < 0) {
6185                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6186                         if (ret < 0) {
6187                                 fprintf(stderr, "Couldn't calc extent flags\n");
6188                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6189                         }
6190                 }
6191         } else {
6192                 flags = 0;
6193                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6194                 if (ret < 0) {
6195                         fprintf(stderr, "Couldn't calc extent flags\n");
6196                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6197                 }
6198         }
6199
6200         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6201                 if (ri != NULL &&
6202                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6203                     ri->objectid == btrfs_header_owner(buf)) {
6204                         /*
6205                          * Ok we got to this block from it's original owner and
6206                          * we have FULL_BACKREF set.  Relocation can leave
6207                          * converted blocks over so this is altogether possible,
6208                          * however it's not possible if the generation > the
6209                          * last snapshot, so check for this case.
6210                          */
6211                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6212                             btrfs_header_generation(buf) > ri->last_snapshot) {
6213                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6214                                 rec->bad_full_backref = 1;
6215                         }
6216                 }
6217         } else {
6218                 if (ri != NULL &&
6219                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6220                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6221                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6222                         rec->bad_full_backref = 1;
6223                 }
6224         }
6225
6226         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6227                 rec->flag_block_full_backref = 1;
6228                 parent = bytenr;
6229                 owner = 0;
6230         } else {
6231                 rec->flag_block_full_backref = 0;
6232                 parent = 0;
6233                 owner = btrfs_header_owner(buf);
6234         }
6235
6236         ret = check_block(root, extent_cache, buf, flags);
6237         if (ret)
6238                 goto out;
6239
6240         if (btrfs_is_leaf(buf)) {
6241                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6242                 for (i = 0; i < nritems; i++) {
6243                         struct btrfs_file_extent_item *fi;
6244                         btrfs_item_key_to_cpu(buf, &key, i);
6245                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6246                                 process_extent_item(root, extent_cache, buf,
6247                                                     i);
6248                                 continue;
6249                         }
6250                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6251                                 process_extent_item(root, extent_cache, buf,
6252                                                     i);
6253                                 continue;
6254                         }
6255                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6256                                 total_csum_bytes +=
6257                                         btrfs_item_size_nr(buf, i);
6258                                 continue;
6259                         }
6260                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6261                                 process_chunk_item(chunk_cache, &key, buf, i);
6262                                 continue;
6263                         }
6264                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6265                                 process_device_item(dev_cache, &key, buf, i);
6266                                 continue;
6267                         }
6268                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6269                                 process_block_group_item(block_group_cache,
6270                                         &key, buf, i);
6271                                 continue;
6272                         }
6273                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6274                                 process_device_extent_item(dev_extent_cache,
6275                                         &key, buf, i);
6276                                 continue;
6277
6278                         }
6279                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6280 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6281                                 process_extent_ref_v0(extent_cache, buf, i);
6282 #else
6283                                 BUG();
6284 #endif
6285                                 continue;
6286                         }
6287
6288                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6289                                 add_tree_backref(extent_cache, key.objectid, 0,
6290                                                  key.offset, 0);
6291                                 continue;
6292                         }
6293                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6294                                 add_tree_backref(extent_cache, key.objectid,
6295                                                  key.offset, 0, 0);
6296                                 continue;
6297                         }
6298                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6299                                 struct btrfs_extent_data_ref *ref;
6300                                 ref = btrfs_item_ptr(buf, i,
6301                                                 struct btrfs_extent_data_ref);
6302                                 add_data_backref(extent_cache,
6303                                         key.objectid, 0,
6304                                         btrfs_extent_data_ref_root(buf, ref),
6305                                         btrfs_extent_data_ref_objectid(buf,
6306                                                                        ref),
6307                                         btrfs_extent_data_ref_offset(buf, ref),
6308                                         btrfs_extent_data_ref_count(buf, ref),
6309                                         0, root->sectorsize);
6310                                 continue;
6311                         }
6312                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6313                                 struct btrfs_shared_data_ref *ref;
6314                                 ref = btrfs_item_ptr(buf, i,
6315                                                 struct btrfs_shared_data_ref);
6316                                 add_data_backref(extent_cache,
6317                                         key.objectid, key.offset, 0, 0, 0,
6318                                         btrfs_shared_data_ref_count(buf, ref),
6319                                         0, root->sectorsize);
6320                                 continue;
6321                         }
6322                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6323                                 struct bad_item *bad;
6324
6325                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6326                                         continue;
6327                                 if (!owner)
6328                                         continue;
6329                                 bad = malloc(sizeof(struct bad_item));
6330                                 if (!bad)
6331                                         continue;
6332                                 INIT_LIST_HEAD(&bad->list);
6333                                 memcpy(&bad->key, &key,
6334                                        sizeof(struct btrfs_key));
6335                                 bad->root_id = owner;
6336                                 list_add_tail(&bad->list, &delete_items);
6337                                 continue;
6338                         }
6339                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6340                                 continue;
6341                         fi = btrfs_item_ptr(buf, i,
6342                                             struct btrfs_file_extent_item);
6343                         if (btrfs_file_extent_type(buf, fi) ==
6344                             BTRFS_FILE_EXTENT_INLINE)
6345                                 continue;
6346                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6347                                 continue;
6348
6349                         data_bytes_allocated +=
6350                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6351                         if (data_bytes_allocated < root->sectorsize) {
6352                                 abort();
6353                         }
6354                         data_bytes_referenced +=
6355                                 btrfs_file_extent_num_bytes(buf, fi);
6356                         add_data_backref(extent_cache,
6357                                 btrfs_file_extent_disk_bytenr(buf, fi),
6358                                 parent, owner, key.objectid, key.offset -
6359                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6360                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6361                 }
6362         } else {
6363                 int level;
6364                 struct btrfs_key first_key;
6365
6366                 first_key.objectid = 0;
6367
6368                 if (nritems > 0)
6369                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6370                 level = btrfs_header_level(buf);
6371                 for (i = 0; i < nritems; i++) {
6372                         struct extent_record tmpl;
6373
6374                         ptr = btrfs_node_blockptr(buf, i);
6375                         size = root->nodesize;
6376                         btrfs_node_key_to_cpu(buf, &key, i);
6377                         if (ri != NULL) {
6378                                 if ((level == ri->drop_level)
6379                                     && is_dropped_key(&key, &ri->drop_key)) {
6380                                         continue;
6381                                 }
6382                         }
6383
6384                         memset(&tmpl, 0, sizeof(tmpl));
6385                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6386                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6387                         tmpl.start = ptr;
6388                         tmpl.nr = size;
6389                         tmpl.refs = 1;
6390                         tmpl.metadata = 1;
6391                         tmpl.max_size = size;
6392                         ret = add_extent_rec(extent_cache, &tmpl);
6393                         BUG_ON(ret);
6394
6395                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6396
6397                         if (level > 1) {
6398                                 add_pending(nodes, seen, ptr, size);
6399                         } else {
6400                                 add_pending(pending, seen, ptr, size);
6401                         }
6402                 }
6403                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6404                                       nritems) * sizeof(struct btrfs_key_ptr);
6405         }
6406         total_btree_bytes += buf->len;
6407         if (fs_root_objectid(btrfs_header_owner(buf)))
6408                 total_fs_tree_bytes += buf->len;
6409         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6410                 total_extent_tree_bytes += buf->len;
6411         if (!found_old_backref &&
6412             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6413             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6414             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6415                 found_old_backref = 1;
6416 out:
6417         free_extent_buffer(buf);
6418         return ret;
6419 }
6420
6421 static int add_root_to_pending(struct extent_buffer *buf,
6422                                struct cache_tree *extent_cache,
6423                                struct cache_tree *pending,
6424                                struct cache_tree *seen,
6425                                struct cache_tree *nodes,
6426                                u64 objectid)
6427 {
6428         struct extent_record tmpl;
6429
6430         if (btrfs_header_level(buf) > 0)
6431                 add_pending(nodes, seen, buf->start, buf->len);
6432         else
6433                 add_pending(pending, seen, buf->start, buf->len);
6434
6435         memset(&tmpl, 0, sizeof(tmpl));
6436         tmpl.start = buf->start;
6437         tmpl.nr = buf->len;
6438         tmpl.is_root = 1;
6439         tmpl.refs = 1;
6440         tmpl.metadata = 1;
6441         tmpl.max_size = buf->len;
6442         add_extent_rec(extent_cache, &tmpl);
6443
6444         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6445             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6446                 add_tree_backref(extent_cache, buf->start, buf->start,
6447                                  0, 1);
6448         else
6449                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6450         return 0;
6451 }
6452
6453 /* as we fix the tree, we might be deleting blocks that
6454  * we're tracking for repair.  This hook makes sure we
6455  * remove any backrefs for blocks as we are fixing them.
6456  */
6457 static int free_extent_hook(struct btrfs_trans_handle *trans,
6458                             struct btrfs_root *root,
6459                             u64 bytenr, u64 num_bytes, u64 parent,
6460                             u64 root_objectid, u64 owner, u64 offset,
6461                             int refs_to_drop)
6462 {
6463         struct extent_record *rec;
6464         struct cache_extent *cache;
6465         int is_data;
6466         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6467
6468         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6469         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6470         if (!cache)
6471                 return 0;
6472
6473         rec = container_of(cache, struct extent_record, cache);
6474         if (is_data) {
6475                 struct data_backref *back;
6476                 back = find_data_backref(rec, parent, root_objectid, owner,
6477                                          offset, 1, bytenr, num_bytes);
6478                 if (!back)
6479                         goto out;
6480                 if (back->node.found_ref) {
6481                         back->found_ref -= refs_to_drop;
6482                         if (rec->refs)
6483                                 rec->refs -= refs_to_drop;
6484                 }
6485                 if (back->node.found_extent_tree) {
6486                         back->num_refs -= refs_to_drop;
6487                         if (rec->extent_item_refs)
6488                                 rec->extent_item_refs -= refs_to_drop;
6489                 }
6490                 if (back->found_ref == 0)
6491                         back->node.found_ref = 0;
6492                 if (back->num_refs == 0)
6493                         back->node.found_extent_tree = 0;
6494
6495                 if (!back->node.found_extent_tree && back->node.found_ref) {
6496                         rb_erase(&back->node.node, &rec->backref_tree);
6497                         free(back);
6498                 }
6499         } else {
6500                 struct tree_backref *back;
6501                 back = find_tree_backref(rec, parent, root_objectid);
6502                 if (!back)
6503                         goto out;
6504                 if (back->node.found_ref) {
6505                         if (rec->refs)
6506                                 rec->refs--;
6507                         back->node.found_ref = 0;
6508                 }
6509                 if (back->node.found_extent_tree) {
6510                         if (rec->extent_item_refs)
6511                                 rec->extent_item_refs--;
6512                         back->node.found_extent_tree = 0;
6513                 }
6514                 if (!back->node.found_extent_tree && back->node.found_ref) {
6515                         rb_erase(&back->node.node, &rec->backref_tree);
6516                         free(back);
6517                 }
6518         }
6519         maybe_free_extent_rec(extent_cache, rec);
6520 out:
6521         return 0;
6522 }
6523
6524 static int delete_extent_records(struct btrfs_trans_handle *trans,
6525                                  struct btrfs_root *root,
6526                                  struct btrfs_path *path,
6527                                  u64 bytenr, u64 new_len)
6528 {
6529         struct btrfs_key key;
6530         struct btrfs_key found_key;
6531         struct extent_buffer *leaf;
6532         int ret;
6533         int slot;
6534
6535
6536         key.objectid = bytenr;
6537         key.type = (u8)-1;
6538         key.offset = (u64)-1;
6539
6540         while(1) {
6541                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6542                                         &key, path, 0, 1);
6543                 if (ret < 0)
6544                         break;
6545
6546                 if (ret > 0) {
6547                         ret = 0;
6548                         if (path->slots[0] == 0)
6549                                 break;
6550                         path->slots[0]--;
6551                 }
6552                 ret = 0;
6553
6554                 leaf = path->nodes[0];
6555                 slot = path->slots[0];
6556
6557                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6558                 if (found_key.objectid != bytenr)
6559                         break;
6560
6561                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6562                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6563                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6564                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6565                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6566                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6567                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6568                         btrfs_release_path(path);
6569                         if (found_key.type == 0) {
6570                                 if (found_key.offset == 0)
6571                                         break;
6572                                 key.offset = found_key.offset - 1;
6573                                 key.type = found_key.type;
6574                         }
6575                         key.type = found_key.type - 1;
6576                         key.offset = (u64)-1;
6577                         continue;
6578                 }
6579
6580                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6581                         found_key.objectid, found_key.type, found_key.offset);
6582
6583                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6584                 if (ret)
6585                         break;
6586                 btrfs_release_path(path);
6587
6588                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6589                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6590                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6591                                 found_key.offset : root->nodesize;
6592
6593                         ret = btrfs_update_block_group(trans, root, bytenr,
6594                                                        bytes, 0, 0);
6595                         if (ret)
6596                                 break;
6597                 }
6598         }
6599
6600         btrfs_release_path(path);
6601         return ret;
6602 }
6603
6604 /*
6605  * for a single backref, this will allocate a new extent
6606  * and add the backref to it.
6607  */
6608 static int record_extent(struct btrfs_trans_handle *trans,
6609                          struct btrfs_fs_info *info,
6610                          struct btrfs_path *path,
6611                          struct extent_record *rec,
6612                          struct extent_backref *back,
6613                          int allocated, u64 flags)
6614 {
6615         int ret;
6616         struct btrfs_root *extent_root = info->extent_root;
6617         struct extent_buffer *leaf;
6618         struct btrfs_key ins_key;
6619         struct btrfs_extent_item *ei;
6620         struct tree_backref *tback;
6621         struct data_backref *dback;
6622         struct btrfs_tree_block_info *bi;
6623
6624         if (!back->is_data)
6625                 rec->max_size = max_t(u64, rec->max_size,
6626                                     info->extent_root->nodesize);
6627
6628         if (!allocated) {
6629                 u32 item_size = sizeof(*ei);
6630
6631                 if (!back->is_data)
6632                         item_size += sizeof(*bi);
6633
6634                 ins_key.objectid = rec->start;
6635                 ins_key.offset = rec->max_size;
6636                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6637
6638                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6639                                         &ins_key, item_size);
6640                 if (ret)
6641                         goto fail;
6642
6643                 leaf = path->nodes[0];
6644                 ei = btrfs_item_ptr(leaf, path->slots[0],
6645                                     struct btrfs_extent_item);
6646
6647                 btrfs_set_extent_refs(leaf, ei, 0);
6648                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6649
6650                 if (back->is_data) {
6651                         btrfs_set_extent_flags(leaf, ei,
6652                                                BTRFS_EXTENT_FLAG_DATA);
6653                 } else {
6654                         struct btrfs_disk_key copy_key;;
6655
6656                         tback = to_tree_backref(back);
6657                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6658                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6659                                              sizeof(*bi));
6660
6661                         btrfs_set_disk_key_objectid(&copy_key,
6662                                                     rec->info_objectid);
6663                         btrfs_set_disk_key_type(&copy_key, 0);
6664                         btrfs_set_disk_key_offset(&copy_key, 0);
6665
6666                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6667                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6668
6669                         btrfs_set_extent_flags(leaf, ei,
6670                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6671                 }
6672
6673                 btrfs_mark_buffer_dirty(leaf);
6674                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6675                                                rec->max_size, 1, 0);
6676                 if (ret)
6677                         goto fail;
6678                 btrfs_release_path(path);
6679         }
6680
6681         if (back->is_data) {
6682                 u64 parent;
6683                 int i;
6684
6685                 dback = to_data_backref(back);
6686                 if (back->full_backref)
6687                         parent = dback->parent;
6688                 else
6689                         parent = 0;
6690
6691                 for (i = 0; i < dback->found_ref; i++) {
6692                         /* if parent != 0, we're doing a full backref
6693                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6694                          * just makes the backref allocator create a data
6695                          * backref
6696                          */
6697                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6698                                                    rec->start, rec->max_size,
6699                                                    parent,
6700                                                    dback->root,
6701                                                    parent ?
6702                                                    BTRFS_FIRST_FREE_OBJECTID :
6703                                                    dback->owner,
6704                                                    dback->offset);
6705                         if (ret)
6706                                 break;
6707                 }
6708                 fprintf(stderr, "adding new data backref"
6709                                 " on %llu %s %llu owner %llu"
6710                                 " offset %llu found %d\n",
6711                                 (unsigned long long)rec->start,
6712                                 back->full_backref ?
6713                                 "parent" : "root",
6714                                 back->full_backref ?
6715                                 (unsigned long long)parent :
6716                                 (unsigned long long)dback->root,
6717                                 (unsigned long long)dback->owner,
6718                                 (unsigned long long)dback->offset,
6719                                 dback->found_ref);
6720         } else {
6721                 u64 parent;
6722
6723                 tback = to_tree_backref(back);
6724                 if (back->full_backref)
6725                         parent = tback->parent;
6726                 else
6727                         parent = 0;
6728
6729                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6730                                            rec->start, rec->max_size,
6731                                            parent, tback->root, 0, 0);
6732                 fprintf(stderr, "adding new tree backref on "
6733                         "start %llu len %llu parent %llu root %llu\n",
6734                         rec->start, rec->max_size, parent, tback->root);
6735         }
6736 fail:
6737         btrfs_release_path(path);
6738         return ret;
6739 }
6740
6741 static struct extent_entry *find_entry(struct list_head *entries,
6742                                        u64 bytenr, u64 bytes)
6743 {
6744         struct extent_entry *entry = NULL;
6745
6746         list_for_each_entry(entry, entries, list) {
6747                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6748                         return entry;
6749         }
6750
6751         return NULL;
6752 }
6753
6754 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6755 {
6756         struct extent_entry *entry, *best = NULL, *prev = NULL;
6757
6758         list_for_each_entry(entry, entries, list) {
6759                 if (!prev) {
6760                         prev = entry;
6761                         continue;
6762                 }
6763
6764                 /*
6765                  * If there are as many broken entries as entries then we know
6766                  * not to trust this particular entry.
6767                  */
6768                 if (entry->broken == entry->count)
6769                         continue;
6770
6771                 /*
6772                  * If our current entry == best then we can't be sure our best
6773                  * is really the best, so we need to keep searching.
6774                  */
6775                 if (best && best->count == entry->count) {
6776                         prev = entry;
6777                         best = NULL;
6778                         continue;
6779                 }
6780
6781                 /* Prev == entry, not good enough, have to keep searching */
6782                 if (!prev->broken && prev->count == entry->count)
6783                         continue;
6784
6785                 if (!best)
6786                         best = (prev->count > entry->count) ? prev : entry;
6787                 else if (best->count < entry->count)
6788                         best = entry;
6789                 prev = entry;
6790         }
6791
6792         return best;
6793 }
6794
6795 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6796                       struct data_backref *dback, struct extent_entry *entry)
6797 {
6798         struct btrfs_trans_handle *trans;
6799         struct btrfs_root *root;
6800         struct btrfs_file_extent_item *fi;
6801         struct extent_buffer *leaf;
6802         struct btrfs_key key;
6803         u64 bytenr, bytes;
6804         int ret, err;
6805
6806         key.objectid = dback->root;
6807         key.type = BTRFS_ROOT_ITEM_KEY;
6808         key.offset = (u64)-1;
6809         root = btrfs_read_fs_root(info, &key);
6810         if (IS_ERR(root)) {
6811                 fprintf(stderr, "Couldn't find root for our ref\n");
6812                 return -EINVAL;
6813         }
6814
6815         /*
6816          * The backref points to the original offset of the extent if it was
6817          * split, so we need to search down to the offset we have and then walk
6818          * forward until we find the backref we're looking for.
6819          */
6820         key.objectid = dback->owner;
6821         key.type = BTRFS_EXTENT_DATA_KEY;
6822         key.offset = dback->offset;
6823         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6824         if (ret < 0) {
6825                 fprintf(stderr, "Error looking up ref %d\n", ret);
6826                 return ret;
6827         }
6828
6829         while (1) {
6830                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6831                         ret = btrfs_next_leaf(root, path);
6832                         if (ret) {
6833                                 fprintf(stderr, "Couldn't find our ref, next\n");
6834                                 return -EINVAL;
6835                         }
6836                 }
6837                 leaf = path->nodes[0];
6838                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6839                 if (key.objectid != dback->owner ||
6840                     key.type != BTRFS_EXTENT_DATA_KEY) {
6841                         fprintf(stderr, "Couldn't find our ref, search\n");
6842                         return -EINVAL;
6843                 }
6844                 fi = btrfs_item_ptr(leaf, path->slots[0],
6845                                     struct btrfs_file_extent_item);
6846                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6847                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6848
6849                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6850                         break;
6851                 path->slots[0]++;
6852         }
6853
6854         btrfs_release_path(path);
6855
6856         trans = btrfs_start_transaction(root, 1);
6857         if (IS_ERR(trans))
6858                 return PTR_ERR(trans);
6859
6860         /*
6861          * Ok we have the key of the file extent we want to fix, now we can cow
6862          * down to the thing and fix it.
6863          */
6864         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6865         if (ret < 0) {
6866                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6867                         key.objectid, key.type, key.offset, ret);
6868                 goto out;
6869         }
6870         if (ret > 0) {
6871                 fprintf(stderr, "Well that's odd, we just found this key "
6872                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6873                         key.offset);
6874                 ret = -EINVAL;
6875                 goto out;
6876         }
6877         leaf = path->nodes[0];
6878         fi = btrfs_item_ptr(leaf, path->slots[0],
6879                             struct btrfs_file_extent_item);
6880
6881         if (btrfs_file_extent_compression(leaf, fi) &&
6882             dback->disk_bytenr != entry->bytenr) {
6883                 fprintf(stderr, "Ref doesn't match the record start and is "
6884                         "compressed, please take a btrfs-image of this file "
6885                         "system and send it to a btrfs developer so they can "
6886                         "complete this functionality for bytenr %Lu\n",
6887                         dback->disk_bytenr);
6888                 ret = -EINVAL;
6889                 goto out;
6890         }
6891
6892         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6893                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6894         } else if (dback->disk_bytenr > entry->bytenr) {
6895                 u64 off_diff, offset;
6896
6897                 off_diff = dback->disk_bytenr - entry->bytenr;
6898                 offset = btrfs_file_extent_offset(leaf, fi);
6899                 if (dback->disk_bytenr + offset +
6900                     btrfs_file_extent_num_bytes(leaf, fi) >
6901                     entry->bytenr + entry->bytes) {
6902                         fprintf(stderr, "Ref is past the entry end, please "
6903                                 "take a btrfs-image of this file system and "
6904                                 "send it to a btrfs developer, ref %Lu\n",
6905                                 dback->disk_bytenr);
6906                         ret = -EINVAL;
6907                         goto out;
6908                 }
6909                 offset += off_diff;
6910                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6911                 btrfs_set_file_extent_offset(leaf, fi, offset);
6912         } else if (dback->disk_bytenr < entry->bytenr) {
6913                 u64 offset;
6914
6915                 offset = btrfs_file_extent_offset(leaf, fi);
6916                 if (dback->disk_bytenr + offset < entry->bytenr) {
6917                         fprintf(stderr, "Ref is before the entry start, please"
6918                                 " take a btrfs-image of this file system and "
6919                                 "send it to a btrfs developer, ref %Lu\n",
6920                                 dback->disk_bytenr);
6921                         ret = -EINVAL;
6922                         goto out;
6923                 }
6924
6925                 offset += dback->disk_bytenr;
6926                 offset -= entry->bytenr;
6927                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6928                 btrfs_set_file_extent_offset(leaf, fi, offset);
6929         }
6930
6931         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6932
6933         /*
6934          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6935          * only do this if we aren't using compression, otherwise it's a
6936          * trickier case.
6937          */
6938         if (!btrfs_file_extent_compression(leaf, fi))
6939                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6940         else
6941                 printf("ram bytes may be wrong?\n");
6942         btrfs_mark_buffer_dirty(leaf);
6943 out:
6944         err = btrfs_commit_transaction(trans, root);
6945         btrfs_release_path(path);
6946         return ret ? ret : err;
6947 }
6948
6949 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6950                            struct extent_record *rec)
6951 {
6952         struct extent_backref *back, *tmp;
6953         struct data_backref *dback;
6954         struct extent_entry *entry, *best = NULL;
6955         LIST_HEAD(entries);
6956         int nr_entries = 0;
6957         int broken_entries = 0;
6958         int ret = 0;
6959         short mismatch = 0;
6960
6961         /*
6962          * Metadata is easy and the backrefs should always agree on bytenr and
6963          * size, if not we've got bigger issues.
6964          */
6965         if (rec->metadata)
6966                 return 0;
6967
6968         rbtree_postorder_for_each_entry_safe(back, tmp,
6969                                              &rec->backref_tree, node) {
6970                 if (back->full_backref || !back->is_data)
6971                         continue;
6972
6973                 dback = to_data_backref(back);
6974
6975                 /*
6976                  * We only pay attention to backrefs that we found a real
6977                  * backref for.
6978                  */
6979                 if (dback->found_ref == 0)
6980                         continue;
6981
6982                 /*
6983                  * For now we only catch when the bytes don't match, not the
6984                  * bytenr.  We can easily do this at the same time, but I want
6985                  * to have a fs image to test on before we just add repair
6986                  * functionality willy-nilly so we know we won't screw up the
6987                  * repair.
6988                  */
6989
6990                 entry = find_entry(&entries, dback->disk_bytenr,
6991                                    dback->bytes);
6992                 if (!entry) {
6993                         entry = malloc(sizeof(struct extent_entry));
6994                         if (!entry) {
6995                                 ret = -ENOMEM;
6996                                 goto out;
6997                         }
6998                         memset(entry, 0, sizeof(*entry));
6999                         entry->bytenr = dback->disk_bytenr;
7000                         entry->bytes = dback->bytes;
7001                         list_add_tail(&entry->list, &entries);
7002                         nr_entries++;
7003                 }
7004
7005                 /*
7006                  * If we only have on entry we may think the entries agree when
7007                  * in reality they don't so we have to do some extra checking.
7008                  */
7009                 if (dback->disk_bytenr != rec->start ||
7010                     dback->bytes != rec->nr || back->broken)
7011                         mismatch = 1;
7012
7013                 if (back->broken) {
7014                         entry->broken++;
7015                         broken_entries++;
7016                 }
7017
7018                 entry->count++;
7019         }
7020
7021         /* Yay all the backrefs agree, carry on good sir */
7022         if (nr_entries <= 1 && !mismatch)
7023                 goto out;
7024
7025         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7026                 "%Lu\n", rec->start);
7027
7028         /*
7029          * First we want to see if the backrefs can agree amongst themselves who
7030          * is right, so figure out which one of the entries has the highest
7031          * count.
7032          */
7033         best = find_most_right_entry(&entries);
7034
7035         /*
7036          * Ok so we may have an even split between what the backrefs think, so
7037          * this is where we use the extent ref to see what it thinks.
7038          */
7039         if (!best) {
7040                 entry = find_entry(&entries, rec->start, rec->nr);
7041                 if (!entry && (!broken_entries || !rec->found_rec)) {
7042                         fprintf(stderr, "Backrefs don't agree with each other "
7043                                 "and extent record doesn't agree with anybody,"
7044                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7045                                 rec->start, rec->nr);
7046                         ret = -EINVAL;
7047                         goto out;
7048                 } else if (!entry) {
7049                         /*
7050                          * Ok our backrefs were broken, we'll assume this is the
7051                          * correct value and add an entry for this range.
7052                          */
7053                         entry = malloc(sizeof(struct extent_entry));
7054                         if (!entry) {
7055                                 ret = -ENOMEM;
7056                                 goto out;
7057                         }
7058                         memset(entry, 0, sizeof(*entry));
7059                         entry->bytenr = rec->start;
7060                         entry->bytes = rec->nr;
7061                         list_add_tail(&entry->list, &entries);
7062                         nr_entries++;
7063                 }
7064                 entry->count++;
7065                 best = find_most_right_entry(&entries);
7066                 if (!best) {
7067                         fprintf(stderr, "Backrefs and extent record evenly "
7068                                 "split on who is right, this is going to "
7069                                 "require user input to fix bytenr %Lu bytes "
7070                                 "%Lu\n", rec->start, rec->nr);
7071                         ret = -EINVAL;
7072                         goto out;
7073                 }
7074         }
7075
7076         /*
7077          * I don't think this can happen currently as we'll abort() if we catch
7078          * this case higher up, but in case somebody removes that we still can't
7079          * deal with it properly here yet, so just bail out of that's the case.
7080          */
7081         if (best->bytenr != rec->start) {
7082                 fprintf(stderr, "Extent start and backref starts don't match, "
7083                         "please use btrfs-image on this file system and send "
7084                         "it to a btrfs developer so they can make fsck fix "
7085                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7086                         rec->start, rec->nr);
7087                 ret = -EINVAL;
7088                 goto out;
7089         }
7090
7091         /*
7092          * Ok great we all agreed on an extent record, let's go find the real
7093          * references and fix up the ones that don't match.
7094          */
7095         rbtree_postorder_for_each_entry_safe(back, tmp,
7096                                              &rec->backref_tree, node) {
7097                 if (back->full_backref || !back->is_data)
7098                         continue;
7099
7100                 dback = to_data_backref(back);
7101
7102                 /*
7103                  * Still ignoring backrefs that don't have a real ref attached
7104                  * to them.
7105                  */
7106                 if (dback->found_ref == 0)
7107                         continue;
7108
7109                 if (dback->bytes == best->bytes &&
7110                     dback->disk_bytenr == best->bytenr)
7111                         continue;
7112
7113                 ret = repair_ref(info, path, dback, best);
7114                 if (ret)
7115                         goto out;
7116         }
7117
7118         /*
7119          * Ok we messed with the actual refs, which means we need to drop our
7120          * entire cache and go back and rescan.  I know this is a huge pain and
7121          * adds a lot of extra work, but it's the only way to be safe.  Once all
7122          * the backrefs agree we may not need to do anything to the extent
7123          * record itself.
7124          */
7125         ret = -EAGAIN;
7126 out:
7127         while (!list_empty(&entries)) {
7128                 entry = list_entry(entries.next, struct extent_entry, list);
7129                 list_del_init(&entry->list);
7130                 free(entry);
7131         }
7132         return ret;
7133 }
7134
7135 static int process_duplicates(struct btrfs_root *root,
7136                               struct cache_tree *extent_cache,
7137                               struct extent_record *rec)
7138 {
7139         struct extent_record *good, *tmp;
7140         struct cache_extent *cache;
7141         int ret;
7142
7143         /*
7144          * If we found a extent record for this extent then return, or if we
7145          * have more than one duplicate we are likely going to need to delete
7146          * something.
7147          */
7148         if (rec->found_rec || rec->num_duplicates > 1)
7149                 return 0;
7150
7151         /* Shouldn't happen but just in case */
7152         BUG_ON(!rec->num_duplicates);
7153
7154         /*
7155          * So this happens if we end up with a backref that doesn't match the
7156          * actual extent entry.  So either the backref is bad or the extent
7157          * entry is bad.  Either way we want to have the extent_record actually
7158          * reflect what we found in the extent_tree, so we need to take the
7159          * duplicate out and use that as the extent_record since the only way we
7160          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7161          */
7162         remove_cache_extent(extent_cache, &rec->cache);
7163
7164         good = to_extent_record(rec->dups.next);
7165         list_del_init(&good->list);
7166         INIT_LIST_HEAD(&good->backrefs);
7167         INIT_LIST_HEAD(&good->dups);
7168         good->cache.start = good->start;
7169         good->cache.size = good->nr;
7170         good->content_checked = 0;
7171         good->owner_ref_checked = 0;
7172         good->num_duplicates = 0;
7173         good->refs = rec->refs;
7174         list_splice_init(&rec->backrefs, &good->backrefs);
7175         while (1) {
7176                 cache = lookup_cache_extent(extent_cache, good->start,
7177                                             good->nr);
7178                 if (!cache)
7179                         break;
7180                 tmp = container_of(cache, struct extent_record, cache);
7181
7182                 /*
7183                  * If we find another overlapping extent and it's found_rec is
7184                  * set then it's a duplicate and we need to try and delete
7185                  * something.
7186                  */
7187                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7188                         if (list_empty(&good->list))
7189                                 list_add_tail(&good->list,
7190                                               &duplicate_extents);
7191                         good->num_duplicates += tmp->num_duplicates + 1;
7192                         list_splice_init(&tmp->dups, &good->dups);
7193                         list_del_init(&tmp->list);
7194                         list_add_tail(&tmp->list, &good->dups);
7195                         remove_cache_extent(extent_cache, &tmp->cache);
7196                         continue;
7197                 }
7198
7199                 /*
7200                  * Ok we have another non extent item backed extent rec, so lets
7201                  * just add it to this extent and carry on like we did above.
7202                  */
7203                 good->refs += tmp->refs;
7204                 list_splice_init(&tmp->backrefs, &good->backrefs);
7205                 remove_cache_extent(extent_cache, &tmp->cache);
7206                 free(tmp);
7207         }
7208         ret = insert_cache_extent(extent_cache, &good->cache);
7209         BUG_ON(ret);
7210         free(rec);
7211         return good->num_duplicates ? 0 : 1;
7212 }
7213
7214 static int delete_duplicate_records(struct btrfs_root *root,
7215                                     struct extent_record *rec)
7216 {
7217         struct btrfs_trans_handle *trans;
7218         LIST_HEAD(delete_list);
7219         struct btrfs_path *path;
7220         struct extent_record *tmp, *good, *n;
7221         int nr_del = 0;
7222         int ret = 0, err;
7223         struct btrfs_key key;
7224
7225         path = btrfs_alloc_path();
7226         if (!path) {
7227                 ret = -ENOMEM;
7228                 goto out;
7229         }
7230
7231         good = rec;
7232         /* Find the record that covers all of the duplicates. */
7233         list_for_each_entry(tmp, &rec->dups, list) {
7234                 if (good->start < tmp->start)
7235                         continue;
7236                 if (good->nr > tmp->nr)
7237                         continue;
7238
7239                 if (tmp->start + tmp->nr < good->start + good->nr) {
7240                         fprintf(stderr, "Ok we have overlapping extents that "
7241                                 "aren't completely covered by each other, this "
7242                                 "is going to require more careful thought.  "
7243                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7244                                 tmp->start, tmp->nr, good->start, good->nr);
7245                         abort();
7246                 }
7247                 good = tmp;
7248         }
7249
7250         if (good != rec)
7251                 list_add_tail(&rec->list, &delete_list);
7252
7253         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7254                 if (tmp == good)
7255                         continue;
7256                 list_move_tail(&tmp->list, &delete_list);
7257         }
7258
7259         root = root->fs_info->extent_root;
7260         trans = btrfs_start_transaction(root, 1);
7261         if (IS_ERR(trans)) {
7262                 ret = PTR_ERR(trans);
7263                 goto out;
7264         }
7265
7266         list_for_each_entry(tmp, &delete_list, list) {
7267                 if (tmp->found_rec == 0)
7268                         continue;
7269                 key.objectid = tmp->start;
7270                 key.type = BTRFS_EXTENT_ITEM_KEY;
7271                 key.offset = tmp->nr;
7272
7273                 /* Shouldn't happen but just in case */
7274                 if (tmp->metadata) {
7275                         fprintf(stderr, "Well this shouldn't happen, extent "
7276                                 "record overlaps but is metadata? "
7277                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7278                         abort();
7279                 }
7280
7281                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7282                 if (ret) {
7283                         if (ret > 0)
7284                                 ret = -EINVAL;
7285                         break;
7286                 }
7287                 ret = btrfs_del_item(trans, root, path);
7288                 if (ret)
7289                         break;
7290                 btrfs_release_path(path);
7291                 nr_del++;
7292         }
7293         err = btrfs_commit_transaction(trans, root);
7294         if (err && !ret)
7295                 ret = err;
7296 out:
7297         while (!list_empty(&delete_list)) {
7298                 tmp = to_extent_record(delete_list.next);
7299                 list_del_init(&tmp->list);
7300                 if (tmp == rec)
7301                         continue;
7302                 free(tmp);
7303         }
7304
7305         while (!list_empty(&rec->dups)) {
7306                 tmp = to_extent_record(rec->dups.next);
7307                 list_del_init(&tmp->list);
7308                 free(tmp);
7309         }
7310
7311         btrfs_free_path(path);
7312
7313         if (!ret && !nr_del)
7314                 rec->num_duplicates = 0;
7315
7316         return ret ? ret : nr_del;
7317 }
7318
7319 static int find_possible_backrefs(struct btrfs_fs_info *info,
7320                                   struct btrfs_path *path,
7321                                   struct cache_tree *extent_cache,
7322                                   struct extent_record *rec)
7323 {
7324         struct btrfs_root *root;
7325         struct extent_backref *back, *tmp;
7326         struct data_backref *dback;
7327         struct cache_extent *cache;
7328         struct btrfs_file_extent_item *fi;
7329         struct btrfs_key key;
7330         u64 bytenr, bytes;
7331         int ret;
7332
7333         rbtree_postorder_for_each_entry_safe(back, tmp,
7334                                              &rec->backref_tree, node) {
7335                 /* Don't care about full backrefs (poor unloved backrefs) */
7336                 if (back->full_backref || !back->is_data)
7337                         continue;
7338
7339                 dback = to_data_backref(back);
7340
7341                 /* We found this one, we don't need to do a lookup */
7342                 if (dback->found_ref)
7343                         continue;
7344
7345                 key.objectid = dback->root;
7346                 key.type = BTRFS_ROOT_ITEM_KEY;
7347                 key.offset = (u64)-1;
7348
7349                 root = btrfs_read_fs_root(info, &key);
7350
7351                 /* No root, definitely a bad ref, skip */
7352                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7353                         continue;
7354                 /* Other err, exit */
7355                 if (IS_ERR(root))
7356                         return PTR_ERR(root);
7357
7358                 key.objectid = dback->owner;
7359                 key.type = BTRFS_EXTENT_DATA_KEY;
7360                 key.offset = dback->offset;
7361                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7362                 if (ret) {
7363                         btrfs_release_path(path);
7364                         if (ret < 0)
7365                                 return ret;
7366                         /* Didn't find it, we can carry on */
7367                         ret = 0;
7368                         continue;
7369                 }
7370
7371                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7372                                     struct btrfs_file_extent_item);
7373                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7374                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7375                 btrfs_release_path(path);
7376                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7377                 if (cache) {
7378                         struct extent_record *tmp;
7379                         tmp = container_of(cache, struct extent_record, cache);
7380
7381                         /*
7382                          * If we found an extent record for the bytenr for this
7383                          * particular backref then we can't add it to our
7384                          * current extent record.  We only want to add backrefs
7385                          * that don't have a corresponding extent item in the
7386                          * extent tree since they likely belong to this record
7387                          * and we need to fix it if it doesn't match bytenrs.
7388                          */
7389                         if  (tmp->found_rec)
7390                                 continue;
7391                 }
7392
7393                 dback->found_ref += 1;
7394                 dback->disk_bytenr = bytenr;
7395                 dback->bytes = bytes;
7396
7397                 /*
7398                  * Set this so the verify backref code knows not to trust the
7399                  * values in this backref.
7400                  */
7401                 back->broken = 1;
7402         }
7403
7404         return 0;
7405 }
7406
7407 /*
7408  * Record orphan data ref into corresponding root.
7409  *
7410  * Return 0 if the extent item contains data ref and recorded.
7411  * Return 1 if the extent item contains no useful data ref
7412  *   On that case, it may contains only shared_dataref or metadata backref
7413  *   or the file extent exists(this should be handled by the extent bytenr
7414  *   recovery routine)
7415  * Return <0 if something goes wrong.
7416  */
7417 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7418                                       struct extent_record *rec)
7419 {
7420         struct btrfs_key key;
7421         struct btrfs_root *dest_root;
7422         struct extent_backref *back, *tmp;
7423         struct data_backref *dback;
7424         struct orphan_data_extent *orphan;
7425         struct btrfs_path *path;
7426         int recorded_data_ref = 0;
7427         int ret = 0;
7428
7429         if (rec->metadata)
7430                 return 1;
7431         path = btrfs_alloc_path();
7432         if (!path)
7433                 return -ENOMEM;
7434         rbtree_postorder_for_each_entry_safe(back, tmp,
7435                                              &rec->backref_tree, node) {
7436                 if (back->full_backref || !back->is_data ||
7437                     !back->found_extent_tree)
7438                         continue;
7439                 dback = to_data_backref(back);
7440                 if (dback->found_ref)
7441                         continue;
7442                 key.objectid = dback->root;
7443                 key.type = BTRFS_ROOT_ITEM_KEY;
7444                 key.offset = (u64)-1;
7445
7446                 dest_root = btrfs_read_fs_root(fs_info, &key);
7447
7448                 /* For non-exist root we just skip it */
7449                 if (IS_ERR(dest_root) || !dest_root)
7450                         continue;
7451
7452                 key.objectid = dback->owner;
7453                 key.type = BTRFS_EXTENT_DATA_KEY;
7454                 key.offset = dback->offset;
7455
7456                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7457                 /*
7458                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7459                  * we need to record it for inode/file extent rebuild.
7460                  * For ret > 0, we record it only for file extent rebuild.
7461                  * For ret == 0, the file extent exists but only bytenr
7462                  * mismatch, let the original bytenr fix routine to handle,
7463                  * don't record it.
7464                  */
7465                 if (ret == 0)
7466                         continue;
7467                 ret = 0;
7468                 orphan = malloc(sizeof(*orphan));
7469                 if (!orphan) {
7470                         ret = -ENOMEM;
7471                         goto out;
7472                 }
7473                 INIT_LIST_HEAD(&orphan->list);
7474                 orphan->root = dback->root;
7475                 orphan->objectid = dback->owner;
7476                 orphan->offset = dback->offset;
7477                 orphan->disk_bytenr = rec->cache.start;
7478                 orphan->disk_len = rec->cache.size;
7479                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7480                 recorded_data_ref = 1;
7481         }
7482 out:
7483         btrfs_free_path(path);
7484         if (!ret)
7485                 return !recorded_data_ref;
7486         else
7487                 return ret;
7488 }
7489
7490 /*
7491  * when an incorrect extent item is found, this will delete
7492  * all of the existing entries for it and recreate them
7493  * based on what the tree scan found.
7494  */
7495 static int fixup_extent_refs(struct btrfs_fs_info *info,
7496                              struct cache_tree *extent_cache,
7497                              struct extent_record *rec)
7498 {
7499         struct btrfs_trans_handle *trans = NULL;
7500         int ret;
7501         struct btrfs_path *path;
7502         struct cache_extent *cache;
7503         struct extent_backref *back, *tmp;
7504         int allocated = 0;
7505         u64 flags = 0;
7506
7507         if (rec->flag_block_full_backref)
7508                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7509
7510         path = btrfs_alloc_path();
7511         if (!path)
7512                 return -ENOMEM;
7513
7514         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7515                 /*
7516                  * Sometimes the backrefs themselves are so broken they don't
7517                  * get attached to any meaningful rec, so first go back and
7518                  * check any of our backrefs that we couldn't find and throw
7519                  * them into the list if we find the backref so that
7520                  * verify_backrefs can figure out what to do.
7521                  */
7522                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7523                 if (ret < 0)
7524                         goto out;
7525         }
7526
7527         /* step one, make sure all of the backrefs agree */
7528         ret = verify_backrefs(info, path, rec);
7529         if (ret < 0)
7530                 goto out;
7531
7532         trans = btrfs_start_transaction(info->extent_root, 1);
7533         if (IS_ERR(trans)) {
7534                 ret = PTR_ERR(trans);
7535                 goto out;
7536         }
7537
7538         /* step two, delete all the existing records */
7539         ret = delete_extent_records(trans, info->extent_root, path,
7540                                     rec->start, rec->max_size);
7541
7542         if (ret < 0)
7543                 goto out;
7544
7545         /* was this block corrupt?  If so, don't add references to it */
7546         cache = lookup_cache_extent(info->corrupt_blocks,
7547                                     rec->start, rec->max_size);
7548         if (cache) {
7549                 ret = 0;
7550                 goto out;
7551         }
7552
7553         /* step three, recreate all the refs we did find */
7554         rbtree_postorder_for_each_entry_safe(back, tmp,
7555                                              &rec->backref_tree, node) {
7556                 /*
7557                  * if we didn't find any references, don't create a
7558                  * new extent record
7559                  */
7560                 if (!back->found_ref)
7561                         continue;
7562
7563                 rec->bad_full_backref = 0;
7564                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7565                 allocated = 1;
7566
7567                 if (ret)
7568                         goto out;
7569         }
7570 out:
7571         if (trans) {
7572                 int err = btrfs_commit_transaction(trans, info->extent_root);
7573                 if (!ret)
7574                         ret = err;
7575         }
7576
7577         btrfs_free_path(path);
7578         return ret;
7579 }
7580
7581 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7582                               struct extent_record *rec)
7583 {
7584         struct btrfs_trans_handle *trans;
7585         struct btrfs_root *root = fs_info->extent_root;
7586         struct btrfs_path *path;
7587         struct btrfs_extent_item *ei;
7588         struct btrfs_key key;
7589         u64 flags;
7590         int ret = 0;
7591
7592         key.objectid = rec->start;
7593         if (rec->metadata) {
7594                 key.type = BTRFS_METADATA_ITEM_KEY;
7595                 key.offset = rec->info_level;
7596         } else {
7597                 key.type = BTRFS_EXTENT_ITEM_KEY;
7598                 key.offset = rec->max_size;
7599         }
7600
7601         path = btrfs_alloc_path();
7602         if (!path)
7603                 return -ENOMEM;
7604
7605         trans = btrfs_start_transaction(root, 0);
7606         if (IS_ERR(trans)) {
7607                 btrfs_free_path(path);
7608                 return PTR_ERR(trans);
7609         }
7610
7611         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7612         if (ret < 0) {
7613                 btrfs_free_path(path);
7614                 btrfs_commit_transaction(trans, root);
7615                 return ret;
7616         } else if (ret) {
7617                 fprintf(stderr, "Didn't find extent for %llu\n",
7618                         (unsigned long long)rec->start);
7619                 btrfs_free_path(path);
7620                 btrfs_commit_transaction(trans, root);
7621                 return -ENOENT;
7622         }
7623
7624         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7625                             struct btrfs_extent_item);
7626         flags = btrfs_extent_flags(path->nodes[0], ei);
7627         if (rec->flag_block_full_backref) {
7628                 fprintf(stderr, "setting full backref on %llu\n",
7629                         (unsigned long long)key.objectid);
7630                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7631         } else {
7632                 fprintf(stderr, "clearing full backref on %llu\n",
7633                         (unsigned long long)key.objectid);
7634                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7635         }
7636         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7637         btrfs_mark_buffer_dirty(path->nodes[0]);
7638         btrfs_free_path(path);
7639         return btrfs_commit_transaction(trans, root);
7640 }
7641
7642 /* right now we only prune from the extent allocation tree */
7643 static int prune_one_block(struct btrfs_trans_handle *trans,
7644                            struct btrfs_fs_info *info,
7645                            struct btrfs_corrupt_block *corrupt)
7646 {
7647         int ret;
7648         struct btrfs_path path;
7649         struct extent_buffer *eb;
7650         u64 found;
7651         int slot;
7652         int nritems;
7653         int level = corrupt->level + 1;
7654
7655         btrfs_init_path(&path);
7656 again:
7657         /* we want to stop at the parent to our busted block */
7658         path.lowest_level = level;
7659
7660         ret = btrfs_search_slot(trans, info->extent_root,
7661                                 &corrupt->key, &path, -1, 1);
7662
7663         if (ret < 0)
7664                 goto out;
7665
7666         eb = path.nodes[level];
7667         if (!eb) {
7668                 ret = -ENOENT;
7669                 goto out;
7670         }
7671
7672         /*
7673          * hopefully the search gave us the block we want to prune,
7674          * lets try that first
7675          */
7676         slot = path.slots[level];
7677         found =  btrfs_node_blockptr(eb, slot);
7678         if (found == corrupt->cache.start)
7679                 goto del_ptr;
7680
7681         nritems = btrfs_header_nritems(eb);
7682
7683         /* the search failed, lets scan this node and hope we find it */
7684         for (slot = 0; slot < nritems; slot++) {
7685                 found =  btrfs_node_blockptr(eb, slot);
7686                 if (found == corrupt->cache.start)
7687                         goto del_ptr;
7688         }
7689         /*
7690          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7691          * to this block
7692          */
7693         if (eb == info->extent_root->node) {
7694                 ret = -ENOENT;
7695                 goto out;
7696         } else {
7697                 level++;
7698                 btrfs_release_path(&path);
7699                 goto again;
7700         }
7701
7702 del_ptr:
7703         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7704         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7705
7706 out:
7707         btrfs_release_path(&path);
7708         return ret;
7709 }
7710
7711 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7712 {
7713         struct btrfs_trans_handle *trans = NULL;
7714         struct cache_extent *cache;
7715         struct btrfs_corrupt_block *corrupt;
7716
7717         while (1) {
7718                 cache = search_cache_extent(info->corrupt_blocks, 0);
7719                 if (!cache)
7720                         break;
7721                 if (!trans) {
7722                         trans = btrfs_start_transaction(info->extent_root, 1);
7723                         if (IS_ERR(trans))
7724                                 return PTR_ERR(trans);
7725                 }
7726                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7727                 prune_one_block(trans, info, corrupt);
7728                 remove_cache_extent(info->corrupt_blocks, cache);
7729         }
7730         if (trans)
7731                 return btrfs_commit_transaction(trans, info->extent_root);
7732         return 0;
7733 }
7734
7735 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7736 {
7737         struct btrfs_block_group_cache *cache;
7738         u64 start, end;
7739         int ret;
7740
7741         while (1) {
7742                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7743                                             &start, &end, EXTENT_DIRTY);
7744                 if (ret)
7745                         break;
7746                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7747                                    GFP_NOFS);
7748         }
7749
7750         start = 0;
7751         while (1) {
7752                 cache = btrfs_lookup_first_block_group(fs_info, start);
7753                 if (!cache)
7754                         break;
7755                 if (cache->cached)
7756                         cache->cached = 0;
7757                 start = cache->key.objectid + cache->key.offset;
7758         }
7759 }
7760
7761 static int check_extent_refs(struct btrfs_root *root,
7762                              struct cache_tree *extent_cache)
7763 {
7764         struct extent_record *rec;
7765         struct cache_extent *cache;
7766         int err = 0;
7767         int ret = 0;
7768         int fixed = 0;
7769         int had_dups = 0;
7770         int recorded = 0;
7771
7772         if (repair) {
7773                 /*
7774                  * if we're doing a repair, we have to make sure
7775                  * we don't allocate from the problem extents.
7776                  * In the worst case, this will be all the
7777                  * extents in the FS
7778                  */
7779                 cache = search_cache_extent(extent_cache, 0);
7780                 while(cache) {
7781                         rec = container_of(cache, struct extent_record, cache);
7782                         set_extent_dirty(root->fs_info->excluded_extents,
7783                                          rec->start,
7784                                          rec->start + rec->max_size - 1,
7785                                          GFP_NOFS);
7786                         cache = next_cache_extent(cache);
7787                 }
7788
7789                 /* pin down all the corrupted blocks too */
7790                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7791                 while(cache) {
7792                         set_extent_dirty(root->fs_info->excluded_extents,
7793                                          cache->start,
7794                                          cache->start + cache->size - 1,
7795                                          GFP_NOFS);
7796                         cache = next_cache_extent(cache);
7797                 }
7798                 prune_corrupt_blocks(root->fs_info);
7799                 reset_cached_block_groups(root->fs_info);
7800         }
7801
7802         reset_cached_block_groups(root->fs_info);
7803
7804         /*
7805          * We need to delete any duplicate entries we find first otherwise we
7806          * could mess up the extent tree when we have backrefs that actually
7807          * belong to a different extent item and not the weird duplicate one.
7808          */
7809         while (repair && !list_empty(&duplicate_extents)) {
7810                 rec = to_extent_record(duplicate_extents.next);
7811                 list_del_init(&rec->list);
7812
7813                 /* Sometimes we can find a backref before we find an actual
7814                  * extent, so we need to process it a little bit to see if there
7815                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7816                  * if this is a backref screwup.  If we need to delete stuff
7817                  * process_duplicates() will return 0, otherwise it will return
7818                  * 1 and we
7819                  */
7820                 if (process_duplicates(root, extent_cache, rec))
7821                         continue;
7822                 ret = delete_duplicate_records(root, rec);
7823                 if (ret < 0)
7824                         return ret;
7825                 /*
7826                  * delete_duplicate_records will return the number of entries
7827                  * deleted, so if it's greater than 0 then we know we actually
7828                  * did something and we need to remove.
7829                  */
7830                 if (ret)
7831                         had_dups = 1;
7832         }
7833
7834         if (had_dups)
7835                 return -EAGAIN;
7836
7837         while(1) {
7838                 int cur_err = 0;
7839
7840                 fixed = 0;
7841                 recorded = 0;
7842                 cache = search_cache_extent(extent_cache, 0);
7843                 if (!cache)
7844                         break;
7845                 rec = container_of(cache, struct extent_record, cache);
7846                 if (rec->num_duplicates) {
7847                         fprintf(stderr, "extent item %llu has multiple extent "
7848                                 "items\n", (unsigned long long)rec->start);
7849                         err = 1;
7850                         cur_err = 1;
7851                 }
7852
7853                 if (rec->refs != rec->extent_item_refs) {
7854                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7855                                 (unsigned long long)rec->start,
7856                                 (unsigned long long)rec->nr);
7857                         fprintf(stderr, "extent item %llu, found %llu\n",
7858                                 (unsigned long long)rec->extent_item_refs,
7859                                 (unsigned long long)rec->refs);
7860                         ret = record_orphan_data_extents(root->fs_info, rec);
7861                         if (ret < 0)
7862                                 goto repair_abort;
7863                         if (ret == 0) {
7864                                 recorded = 1;
7865                         } else {
7866                                 /*
7867                                  * we can't use the extent to repair file
7868                                  * extent, let the fallback method handle it.
7869                                  */
7870                                 if (!fixed && repair) {
7871                                         ret = fixup_extent_refs(
7872                                                         root->fs_info,
7873                                                         extent_cache, rec);
7874                                         if (ret)
7875                                                 goto repair_abort;
7876                                         fixed = 1;
7877                                 }
7878                         }
7879                         err = 1;
7880                         cur_err = 1;
7881                 }
7882                 if (all_backpointers_checked(rec, 1)) {
7883                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7884                                 (unsigned long long)rec->start,
7885                                 (unsigned long long)rec->nr);
7886
7887                         if (!fixed && !recorded && repair) {
7888                                 ret = fixup_extent_refs(root->fs_info,
7889                                                         extent_cache, rec);
7890                                 if (ret)
7891                                         goto repair_abort;
7892                                 fixed = 1;
7893                         }
7894                         cur_err = 1;
7895                         err = 1;
7896                 }
7897                 if (!rec->owner_ref_checked) {
7898                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7899                                 (unsigned long long)rec->start,
7900                                 (unsigned long long)rec->nr);
7901                         if (!fixed && !recorded && repair) {
7902                                 ret = fixup_extent_refs(root->fs_info,
7903                                                         extent_cache, rec);
7904                                 if (ret)
7905                                         goto repair_abort;
7906                                 fixed = 1;
7907                         }
7908                         err = 1;
7909                         cur_err = 1;
7910                 }
7911                 if (rec->bad_full_backref) {
7912                         fprintf(stderr, "bad full backref, on [%llu]\n",
7913                                 (unsigned long long)rec->start);
7914                         if (repair) {
7915                                 ret = fixup_extent_flags(root->fs_info, rec);
7916                                 if (ret)
7917                                         goto repair_abort;
7918                                 fixed = 1;
7919                         }
7920                         err = 1;
7921                         cur_err = 1;
7922                 }
7923                 /*
7924                  * Although it's not a extent ref's problem, we reuse this
7925                  * routine for error reporting.
7926                  * No repair function yet.
7927                  */
7928                 if (rec->crossing_stripes) {
7929                         fprintf(stderr,
7930                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7931                                 rec->start, rec->start + rec->max_size);
7932                         err = 1;
7933                         cur_err = 1;
7934                 }
7935
7936                 if (rec->wrong_chunk_type) {
7937                         fprintf(stderr,
7938                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7939                                 rec->start, rec->start + rec->max_size);
7940                         err = 1;
7941                         cur_err = 1;
7942                 }
7943
7944                 remove_cache_extent(extent_cache, cache);
7945                 free_all_extent_backrefs(rec);
7946                 if (!init_extent_tree && repair && (!cur_err || fixed))
7947                         clear_extent_dirty(root->fs_info->excluded_extents,
7948                                            rec->start,
7949                                            rec->start + rec->max_size - 1,
7950                                            GFP_NOFS);
7951                 free(rec);
7952         }
7953 repair_abort:
7954         if (repair) {
7955                 if (ret && ret != -EAGAIN) {
7956                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7957                         exit(1);
7958                 } else if (!ret) {
7959                         struct btrfs_trans_handle *trans;
7960
7961                         root = root->fs_info->extent_root;
7962                         trans = btrfs_start_transaction(root, 1);
7963                         if (IS_ERR(trans)) {
7964                                 ret = PTR_ERR(trans);
7965                                 goto repair_abort;
7966                         }
7967
7968                         btrfs_fix_block_accounting(trans, root);
7969                         ret = btrfs_commit_transaction(trans, root);
7970                         if (ret)
7971                                 goto repair_abort;
7972                 }
7973                 if (err)
7974                         fprintf(stderr, "repaired damaged extent references\n");
7975                 return ret;
7976         }
7977         return err;
7978 }
7979
7980 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7981 {
7982         u64 stripe_size;
7983
7984         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7985                 stripe_size = length;
7986                 stripe_size /= num_stripes;
7987         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7988                 stripe_size = length * 2;
7989                 stripe_size /= num_stripes;
7990         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7991                 stripe_size = length;
7992                 stripe_size /= (num_stripes - 1);
7993         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7994                 stripe_size = length;
7995                 stripe_size /= (num_stripes - 2);
7996         } else {
7997                 stripe_size = length;
7998         }
7999         return stripe_size;
8000 }
8001
8002 /*
8003  * Check the chunk with its block group/dev list ref:
8004  * Return 0 if all refs seems valid.
8005  * Return 1 if part of refs seems valid, need later check for rebuild ref
8006  * like missing block group and needs to search extent tree to rebuild them.
8007  * Return -1 if essential refs are missing and unable to rebuild.
8008  */
8009 static int check_chunk_refs(struct chunk_record *chunk_rec,
8010                             struct block_group_tree *block_group_cache,
8011                             struct device_extent_tree *dev_extent_cache,
8012                             int silent)
8013 {
8014         struct cache_extent *block_group_item;
8015         struct block_group_record *block_group_rec;
8016         struct cache_extent *dev_extent_item;
8017         struct device_extent_record *dev_extent_rec;
8018         u64 devid;
8019         u64 offset;
8020         u64 length;
8021         int metadump_v2 = 0;
8022         int i;
8023         int ret = 0;
8024
8025         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8026                                                chunk_rec->offset,
8027                                                chunk_rec->length);
8028         if (block_group_item) {
8029                 block_group_rec = container_of(block_group_item,
8030                                                struct block_group_record,
8031                                                cache);
8032                 if (chunk_rec->length != block_group_rec->offset ||
8033                     chunk_rec->offset != block_group_rec->objectid ||
8034                     (!metadump_v2 &&
8035                      chunk_rec->type_flags != block_group_rec->flags)) {
8036                         if (!silent)
8037                                 fprintf(stderr,
8038                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8039                                         chunk_rec->objectid,
8040                                         chunk_rec->type,
8041                                         chunk_rec->offset,
8042                                         chunk_rec->length,
8043                                         chunk_rec->offset,
8044                                         chunk_rec->type_flags,
8045                                         block_group_rec->objectid,
8046                                         block_group_rec->type,
8047                                         block_group_rec->offset,
8048                                         block_group_rec->offset,
8049                                         block_group_rec->objectid,
8050                                         block_group_rec->flags);
8051                         ret = -1;
8052                 } else {
8053                         list_del_init(&block_group_rec->list);
8054                         chunk_rec->bg_rec = block_group_rec;
8055                 }
8056         } else {
8057                 if (!silent)
8058                         fprintf(stderr,
8059                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8060                                 chunk_rec->objectid,
8061                                 chunk_rec->type,
8062                                 chunk_rec->offset,
8063                                 chunk_rec->length,
8064                                 chunk_rec->offset,
8065                                 chunk_rec->type_flags);
8066                 ret = 1;
8067         }
8068
8069         if (metadump_v2)
8070                 return ret;
8071
8072         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8073                                     chunk_rec->num_stripes);
8074         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8075                 devid = chunk_rec->stripes[i].devid;
8076                 offset = chunk_rec->stripes[i].offset;
8077                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8078                                                        devid, offset, length);
8079                 if (dev_extent_item) {
8080                         dev_extent_rec = container_of(dev_extent_item,
8081                                                 struct device_extent_record,
8082                                                 cache);
8083                         if (dev_extent_rec->objectid != devid ||
8084                             dev_extent_rec->offset != offset ||
8085                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8086                             dev_extent_rec->length != length) {
8087                                 if (!silent)
8088                                         fprintf(stderr,
8089                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8090                                                 chunk_rec->objectid,
8091                                                 chunk_rec->type,
8092                                                 chunk_rec->offset,
8093                                                 chunk_rec->stripes[i].devid,
8094                                                 chunk_rec->stripes[i].offset,
8095                                                 dev_extent_rec->objectid,
8096                                                 dev_extent_rec->offset,
8097                                                 dev_extent_rec->length);
8098                                 ret = -1;
8099                         } else {
8100                                 list_move(&dev_extent_rec->chunk_list,
8101                                           &chunk_rec->dextents);
8102                         }
8103                 } else {
8104                         if (!silent)
8105                                 fprintf(stderr,
8106                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8107                                         chunk_rec->objectid,
8108                                         chunk_rec->type,
8109                                         chunk_rec->offset,
8110                                         chunk_rec->stripes[i].devid,
8111                                         chunk_rec->stripes[i].offset);
8112                         ret = -1;
8113                 }
8114         }
8115         return ret;
8116 }
8117
8118 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8119 int check_chunks(struct cache_tree *chunk_cache,
8120                  struct block_group_tree *block_group_cache,
8121                  struct device_extent_tree *dev_extent_cache,
8122                  struct list_head *good, struct list_head *bad,
8123                  struct list_head *rebuild, int silent)
8124 {
8125         struct cache_extent *chunk_item;
8126         struct chunk_record *chunk_rec;
8127         struct block_group_record *bg_rec;
8128         struct device_extent_record *dext_rec;
8129         int err;
8130         int ret = 0;
8131
8132         chunk_item = first_cache_extent(chunk_cache);
8133         while (chunk_item) {
8134                 chunk_rec = container_of(chunk_item, struct chunk_record,
8135                                          cache);
8136                 err = check_chunk_refs(chunk_rec, block_group_cache,
8137                                        dev_extent_cache, silent);
8138                 if (err < 0)
8139                         ret = err;
8140                 if (err == 0 && good)
8141                         list_add_tail(&chunk_rec->list, good);
8142                 if (err > 0 && rebuild)
8143                         list_add_tail(&chunk_rec->list, rebuild);
8144                 if (err < 0 && bad)
8145                         list_add_tail(&chunk_rec->list, bad);
8146                 chunk_item = next_cache_extent(chunk_item);
8147         }
8148
8149         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8150                 if (!silent)
8151                         fprintf(stderr,
8152                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8153                                 bg_rec->objectid,
8154                                 bg_rec->offset,
8155                                 bg_rec->flags);
8156                 if (!ret)
8157                         ret = 1;
8158         }
8159
8160         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8161                             chunk_list) {
8162                 if (!silent)
8163                         fprintf(stderr,
8164                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8165                                 dext_rec->objectid,
8166                                 dext_rec->offset,
8167                                 dext_rec->length);
8168                 if (!ret)
8169                         ret = 1;
8170         }
8171         return ret;
8172 }
8173
8174
8175 static int check_device_used(struct device_record *dev_rec,
8176                              struct device_extent_tree *dext_cache)
8177 {
8178         struct cache_extent *cache;
8179         struct device_extent_record *dev_extent_rec;
8180         u64 total_byte = 0;
8181
8182         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8183         while (cache) {
8184                 dev_extent_rec = container_of(cache,
8185                                               struct device_extent_record,
8186                                               cache);
8187                 if (dev_extent_rec->objectid != dev_rec->devid)
8188                         break;
8189
8190                 list_del_init(&dev_extent_rec->device_list);
8191                 total_byte += dev_extent_rec->length;
8192                 cache = next_cache_extent(cache);
8193         }
8194
8195         if (total_byte != dev_rec->byte_used) {
8196                 fprintf(stderr,
8197                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8198                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8199                         dev_rec->type, dev_rec->offset);
8200                 return -1;
8201         } else {
8202                 return 0;
8203         }
8204 }
8205
8206 /* check btrfs_dev_item -> btrfs_dev_extent */
8207 static int check_devices(struct rb_root *dev_cache,
8208                          struct device_extent_tree *dev_extent_cache)
8209 {
8210         struct rb_node *dev_node;
8211         struct device_record *dev_rec;
8212         struct device_extent_record *dext_rec;
8213         int err;
8214         int ret = 0;
8215
8216         dev_node = rb_first(dev_cache);
8217         while (dev_node) {
8218                 dev_rec = container_of(dev_node, struct device_record, node);
8219                 err = check_device_used(dev_rec, dev_extent_cache);
8220                 if (err)
8221                         ret = err;
8222
8223                 dev_node = rb_next(dev_node);
8224         }
8225         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8226                             device_list) {
8227                 fprintf(stderr,
8228                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8229                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8230                 if (!ret)
8231                         ret = 1;
8232         }
8233         return ret;
8234 }
8235
8236 static int add_root_item_to_list(struct list_head *head,
8237                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8238                                   u8 level, u8 drop_level,
8239                                   int level_size, struct btrfs_key *drop_key)
8240 {
8241
8242         struct root_item_record *ri_rec;
8243         ri_rec = malloc(sizeof(*ri_rec));
8244         if (!ri_rec)
8245                 return -ENOMEM;
8246         ri_rec->bytenr = bytenr;
8247         ri_rec->objectid = objectid;
8248         ri_rec->level = level;
8249         ri_rec->level_size = level_size;
8250         ri_rec->drop_level = drop_level;
8251         ri_rec->last_snapshot = last_snapshot;
8252         if (drop_key)
8253                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8254         list_add_tail(&ri_rec->list, head);
8255
8256         return 0;
8257 }
8258
8259 static void free_root_item_list(struct list_head *list)
8260 {
8261         struct root_item_record *ri_rec;
8262
8263         while (!list_empty(list)) {
8264                 ri_rec = list_first_entry(list, struct root_item_record,
8265                                           list);
8266                 list_del_init(&ri_rec->list);
8267                 free(ri_rec);
8268         }
8269 }
8270
8271 static int deal_root_from_list(struct list_head *list,
8272                                struct btrfs_root *root,
8273                                struct block_info *bits,
8274                                int bits_nr,
8275                                struct cache_tree *pending,
8276                                struct cache_tree *seen,
8277                                struct cache_tree *reada,
8278                                struct cache_tree *nodes,
8279                                struct cache_tree *extent_cache,
8280                                struct cache_tree *chunk_cache,
8281                                struct rb_root *dev_cache,
8282                                struct block_group_tree *block_group_cache,
8283                                struct device_extent_tree *dev_extent_cache)
8284 {
8285         int ret = 0;
8286         u64 last;
8287
8288         while (!list_empty(list)) {
8289                 struct root_item_record *rec;
8290                 struct extent_buffer *buf;
8291                 rec = list_entry(list->next,
8292                                  struct root_item_record, list);
8293                 last = 0;
8294                 buf = read_tree_block(root->fs_info->tree_root,
8295                                       rec->bytenr, rec->level_size, 0);
8296                 if (!extent_buffer_uptodate(buf)) {
8297                         free_extent_buffer(buf);
8298                         ret = -EIO;
8299                         break;
8300                 }
8301                 add_root_to_pending(buf, extent_cache, pending,
8302                                     seen, nodes, rec->objectid);
8303                 /*
8304                  * To rebuild extent tree, we need deal with snapshot
8305                  * one by one, otherwise we deal with node firstly which
8306                  * can maximize readahead.
8307                  */
8308                 while (1) {
8309                         ret = run_next_block(root, bits, bits_nr, &last,
8310                                              pending, seen, reada, nodes,
8311                                              extent_cache, chunk_cache,
8312                                              dev_cache, block_group_cache,
8313                                              dev_extent_cache, rec);
8314                         if (ret != 0)
8315                                 break;
8316                 }
8317                 free_extent_buffer(buf);
8318                 list_del(&rec->list);
8319                 free(rec);
8320                 if (ret < 0)
8321                         break;
8322         }
8323         while (ret >= 0) {
8324                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8325                                      reada, nodes, extent_cache, chunk_cache,
8326                                      dev_cache, block_group_cache,
8327                                      dev_extent_cache, NULL);
8328                 if (ret != 0) {
8329                         if (ret > 0)
8330                                 ret = 0;
8331                         break;
8332                 }
8333         }
8334         return ret;
8335 }
8336
8337 static int check_chunks_and_extents(struct btrfs_root *root)
8338 {
8339         struct rb_root dev_cache;
8340         struct cache_tree chunk_cache;
8341         struct block_group_tree block_group_cache;
8342         struct device_extent_tree dev_extent_cache;
8343         struct cache_tree extent_cache;
8344         struct cache_tree seen;
8345         struct cache_tree pending;
8346         struct cache_tree reada;
8347         struct cache_tree nodes;
8348         struct extent_io_tree excluded_extents;
8349         struct cache_tree corrupt_blocks;
8350         struct btrfs_path path;
8351         struct btrfs_key key;
8352         struct btrfs_key found_key;
8353         int ret, err = 0;
8354         struct block_info *bits;
8355         int bits_nr;
8356         struct extent_buffer *leaf;
8357         int slot;
8358         struct btrfs_root_item ri;
8359         struct list_head dropping_trees;
8360         struct list_head normal_trees;
8361         struct btrfs_root *root1;
8362         u64 objectid;
8363         u32 level_size;
8364         u8 level;
8365
8366         dev_cache = RB_ROOT;
8367         cache_tree_init(&chunk_cache);
8368         block_group_tree_init(&block_group_cache);
8369         device_extent_tree_init(&dev_extent_cache);
8370
8371         cache_tree_init(&extent_cache);
8372         cache_tree_init(&seen);
8373         cache_tree_init(&pending);
8374         cache_tree_init(&nodes);
8375         cache_tree_init(&reada);
8376         cache_tree_init(&corrupt_blocks);
8377         extent_io_tree_init(&excluded_extents);
8378         INIT_LIST_HEAD(&dropping_trees);
8379         INIT_LIST_HEAD(&normal_trees);
8380
8381         if (repair) {
8382                 root->fs_info->excluded_extents = &excluded_extents;
8383                 root->fs_info->fsck_extent_cache = &extent_cache;
8384                 root->fs_info->free_extent_hook = free_extent_hook;
8385                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8386         }
8387
8388         bits_nr = 1024;
8389         bits = malloc(bits_nr * sizeof(struct block_info));
8390         if (!bits) {
8391                 perror("malloc");
8392                 exit(1);
8393         }
8394
8395         if (ctx.progress_enabled) {
8396                 ctx.tp = TASK_EXTENTS;
8397                 task_start(ctx.info);
8398         }
8399
8400 again:
8401         root1 = root->fs_info->tree_root;
8402         level = btrfs_header_level(root1->node);
8403         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8404                                     root1->node->start, 0, level, 0,
8405                                     root1->nodesize, NULL);
8406         if (ret < 0)
8407                 goto out;
8408         root1 = root->fs_info->chunk_root;
8409         level = btrfs_header_level(root1->node);
8410         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8411                                     root1->node->start, 0, level, 0,
8412                                     root1->nodesize, NULL);
8413         if (ret < 0)
8414                 goto out;
8415         btrfs_init_path(&path);
8416         key.offset = 0;
8417         key.objectid = 0;
8418         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8419         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8420                                         &key, &path, 0, 0);
8421         if (ret < 0)
8422                 goto out;
8423         while(1) {
8424                 leaf = path.nodes[0];
8425                 slot = path.slots[0];
8426                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8427                         ret = btrfs_next_leaf(root, &path);
8428                         if (ret != 0)
8429                                 break;
8430                         leaf = path.nodes[0];
8431                         slot = path.slots[0];
8432                 }
8433                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8434                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8435                         unsigned long offset;
8436                         u64 last_snapshot;
8437
8438                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8439                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8440                         last_snapshot = btrfs_root_last_snapshot(&ri);
8441                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8442                                 level = btrfs_root_level(&ri);
8443                                 level_size = root->nodesize;
8444                                 ret = add_root_item_to_list(&normal_trees,
8445                                                 found_key.objectid,
8446                                                 btrfs_root_bytenr(&ri),
8447                                                 last_snapshot, level,
8448                                                 0, level_size, NULL);
8449                                 if (ret < 0)
8450                                         goto out;
8451                         } else {
8452                                 level = btrfs_root_level(&ri);
8453                                 level_size = root->nodesize;
8454                                 objectid = found_key.objectid;
8455                                 btrfs_disk_key_to_cpu(&found_key,
8456                                                       &ri.drop_progress);
8457                                 ret = add_root_item_to_list(&dropping_trees,
8458                                                 objectid,
8459                                                 btrfs_root_bytenr(&ri),
8460                                                 last_snapshot, level,
8461                                                 ri.drop_level,
8462                                                 level_size, &found_key);
8463                                 if (ret < 0)
8464                                         goto out;
8465                         }
8466                 }
8467                 path.slots[0]++;
8468         }
8469         btrfs_release_path(&path);
8470
8471         /*
8472          * check_block can return -EAGAIN if it fixes something, please keep
8473          * this in mind when dealing with return values from these functions, if
8474          * we get -EAGAIN we want to fall through and restart the loop.
8475          */
8476         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8477                                   &seen, &reada, &nodes, &extent_cache,
8478                                   &chunk_cache, &dev_cache, &block_group_cache,
8479                                   &dev_extent_cache);
8480         if (ret < 0) {
8481                 if (ret == -EAGAIN)
8482                         goto loop;
8483                 goto out;
8484         }
8485         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8486                                   &pending, &seen, &reada, &nodes,
8487                                   &extent_cache, &chunk_cache, &dev_cache,
8488                                   &block_group_cache, &dev_extent_cache);
8489         if (ret < 0) {
8490                 if (ret == -EAGAIN)
8491                         goto loop;
8492                 goto out;
8493         }
8494
8495         ret = check_chunks(&chunk_cache, &block_group_cache,
8496                            &dev_extent_cache, NULL, NULL, NULL, 0);
8497         if (ret) {
8498                 if (ret == -EAGAIN)
8499                         goto loop;
8500                 err = ret;
8501         }
8502
8503         ret = check_extent_refs(root, &extent_cache);
8504         if (ret < 0) {
8505                 if (ret == -EAGAIN)
8506                         goto loop;
8507                 goto out;
8508         }
8509
8510         ret = check_devices(&dev_cache, &dev_extent_cache);
8511         if (ret && err)
8512                 ret = err;
8513
8514 out:
8515         task_stop(ctx.info);
8516         if (repair) {
8517                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8518                 extent_io_tree_cleanup(&excluded_extents);
8519                 root->fs_info->fsck_extent_cache = NULL;
8520                 root->fs_info->free_extent_hook = NULL;
8521                 root->fs_info->corrupt_blocks = NULL;
8522                 root->fs_info->excluded_extents = NULL;
8523         }
8524         free(bits);
8525         free_chunk_cache_tree(&chunk_cache);
8526         free_device_cache_tree(&dev_cache);
8527         free_block_group_tree(&block_group_cache);
8528         free_device_extent_tree(&dev_extent_cache);
8529         free_extent_cache_tree(&seen);
8530         free_extent_cache_tree(&pending);
8531         free_extent_cache_tree(&reada);
8532         free_extent_cache_tree(&nodes);
8533         return ret;
8534 loop:
8535         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8536         free_extent_cache_tree(&seen);
8537         free_extent_cache_tree(&pending);
8538         free_extent_cache_tree(&reada);
8539         free_extent_cache_tree(&nodes);
8540         free_chunk_cache_tree(&chunk_cache);
8541         free_block_group_tree(&block_group_cache);
8542         free_device_cache_tree(&dev_cache);
8543         free_device_extent_tree(&dev_extent_cache);
8544         free_extent_record_cache(root->fs_info, &extent_cache);
8545         free_root_item_list(&normal_trees);
8546         free_root_item_list(&dropping_trees);
8547         extent_io_tree_cleanup(&excluded_extents);
8548         goto again;
8549 }
8550
8551 /*
8552  * Check backrefs of a tree block given by @bytenr or @eb.
8553  *
8554  * @root:       the root containing the @bytenr or @eb
8555  * @eb:         tree block extent buffer, can be NULL
8556  * @bytenr:     bytenr of the tree block to search
8557  * @level:      tree level of the tree block
8558  * @owner:      owner of the tree block
8559  *
8560  * Return >0 for any error found and output error message
8561  * Return 0 for no error found
8562  */
8563 static int check_tree_block_ref(struct btrfs_root *root,
8564                                 struct extent_buffer *eb, u64 bytenr,
8565                                 int level, u64 owner)
8566 {
8567         struct btrfs_key key;
8568         struct btrfs_root *extent_root = root->fs_info->extent_root;
8569         struct btrfs_path path;
8570         struct btrfs_extent_item *ei;
8571         struct btrfs_extent_inline_ref *iref;
8572         struct extent_buffer *leaf;
8573         unsigned long end;
8574         unsigned long ptr;
8575         int slot;
8576         int skinny_level;
8577         int type;
8578         u32 nodesize = root->nodesize;
8579         u32 item_size;
8580         u64 offset;
8581         int found_ref = 0;
8582         int err = 0;
8583         int ret;
8584
8585         btrfs_init_path(&path);
8586         key.objectid = bytenr;
8587         if (btrfs_fs_incompat(root->fs_info,
8588                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8589                 key.type = BTRFS_METADATA_ITEM_KEY;
8590         else
8591                 key.type = BTRFS_EXTENT_ITEM_KEY;
8592         key.offset = (u64)-1;
8593
8594         /* Search for the backref in extent tree */
8595         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8596         if (ret < 0) {
8597                 err |= BACKREF_MISSING;
8598                 goto out;
8599         }
8600         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8601         if (ret) {
8602                 err |= BACKREF_MISSING;
8603                 goto out;
8604         }
8605
8606         leaf = path.nodes[0];
8607         slot = path.slots[0];
8608         btrfs_item_key_to_cpu(leaf, &key, slot);
8609
8610         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8611
8612         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8613                 skinny_level = (int)key.offset;
8614                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8615         } else {
8616                 struct btrfs_tree_block_info *info;
8617
8618                 info = (struct btrfs_tree_block_info *)(ei + 1);
8619                 skinny_level = btrfs_tree_block_level(leaf, info);
8620                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8621         }
8622
8623         if (eb) {
8624                 u64 header_gen;
8625                 u64 extent_gen;
8626
8627                 if (!(btrfs_extent_flags(leaf, ei) &
8628                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8629                         error(
8630                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8631                                 key.objectid, nodesize,
8632                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8633                         err = BACKREF_MISMATCH;
8634                 }
8635                 header_gen = btrfs_header_generation(eb);
8636                 extent_gen = btrfs_extent_generation(leaf, ei);
8637                 if (header_gen != extent_gen) {
8638                         error(
8639         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8640                                 key.objectid, nodesize, header_gen,
8641                                 extent_gen);
8642                         err = BACKREF_MISMATCH;
8643                 }
8644                 if (level != skinny_level) {
8645                         error(
8646                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8647                                 key.objectid, nodesize, level, skinny_level);
8648                         err = BACKREF_MISMATCH;
8649                 }
8650                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8651                         error(
8652                         "extent[%llu %u] is referred by other roots than %llu",
8653                                 key.objectid, nodesize, root->objectid);
8654                         err = BACKREF_MISMATCH;
8655                 }
8656         }
8657
8658         /*
8659          * Iterate the extent/metadata item to find the exact backref
8660          */
8661         item_size = btrfs_item_size_nr(leaf, slot);
8662         ptr = (unsigned long)iref;
8663         end = (unsigned long)ei + item_size;
8664         while (ptr < end) {
8665                 iref = (struct btrfs_extent_inline_ref *)ptr;
8666                 type = btrfs_extent_inline_ref_type(leaf, iref);
8667                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8668
8669                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8670                         (offset == root->objectid || offset == owner)) {
8671                         found_ref = 1;
8672                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8673                         /* Check if the backref points to valid referencer */
8674                         found_ref = !check_tree_block_ref(root, NULL, offset,
8675                                                           level + 1, owner);
8676                 }
8677
8678                 if (found_ref)
8679                         break;
8680                 ptr += btrfs_extent_inline_ref_size(type);
8681         }
8682
8683         /*
8684          * Inlined extent item doesn't have what we need, check
8685          * TREE_BLOCK_REF_KEY
8686          */
8687         if (!found_ref) {
8688                 btrfs_release_path(&path);
8689                 key.objectid = bytenr;
8690                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8691                 key.offset = root->objectid;
8692
8693                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8694                 if (!ret)
8695                         found_ref = 1;
8696         }
8697         if (!found_ref)
8698                 err |= BACKREF_MISSING;
8699 out:
8700         btrfs_release_path(&path);
8701         if (eb && (err & BACKREF_MISSING))
8702                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8703                         bytenr, nodesize, owner, level);
8704         return err;
8705 }
8706
8707 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8708                            struct btrfs_root *root, int overwrite)
8709 {
8710         struct extent_buffer *c;
8711         struct extent_buffer *old = root->node;
8712         int level;
8713         int ret;
8714         struct btrfs_disk_key disk_key = {0,0,0};
8715
8716         level = 0;
8717
8718         if (overwrite) {
8719                 c = old;
8720                 extent_buffer_get(c);
8721                 goto init;
8722         }
8723         c = btrfs_alloc_free_block(trans, root,
8724                                    root->nodesize,
8725                                    root->root_key.objectid,
8726                                    &disk_key, level, 0, 0);
8727         if (IS_ERR(c)) {
8728                 c = old;
8729                 extent_buffer_get(c);
8730                 overwrite = 1;
8731         }
8732 init:
8733         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8734         btrfs_set_header_level(c, level);
8735         btrfs_set_header_bytenr(c, c->start);
8736         btrfs_set_header_generation(c, trans->transid);
8737         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8738         btrfs_set_header_owner(c, root->root_key.objectid);
8739
8740         write_extent_buffer(c, root->fs_info->fsid,
8741                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8742
8743         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8744                             btrfs_header_chunk_tree_uuid(c),
8745                             BTRFS_UUID_SIZE);
8746
8747         btrfs_mark_buffer_dirty(c);
8748         /*
8749          * this case can happen in the following case:
8750          *
8751          * 1.overwrite previous root.
8752          *
8753          * 2.reinit reloc data root, this is because we skip pin
8754          * down reloc data tree before which means we can allocate
8755          * same block bytenr here.
8756          */
8757         if (old->start == c->start) {
8758                 btrfs_set_root_generation(&root->root_item,
8759                                           trans->transid);
8760                 root->root_item.level = btrfs_header_level(root->node);
8761                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8762                                         &root->root_key, &root->root_item);
8763                 if (ret) {
8764                         free_extent_buffer(c);
8765                         return ret;
8766                 }
8767         }
8768         free_extent_buffer(old);
8769         root->node = c;
8770         add_root_to_dirty_list(root);
8771         return 0;
8772 }
8773
8774 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8775                                 struct extent_buffer *eb, int tree_root)
8776 {
8777         struct extent_buffer *tmp;
8778         struct btrfs_root_item *ri;
8779         struct btrfs_key key;
8780         u64 bytenr;
8781         u32 nodesize;
8782         int level = btrfs_header_level(eb);
8783         int nritems;
8784         int ret;
8785         int i;
8786
8787         /*
8788          * If we have pinned this block before, don't pin it again.
8789          * This can not only avoid forever loop with broken filesystem
8790          * but also give us some speedups.
8791          */
8792         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8793                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8794                 return 0;
8795
8796         btrfs_pin_extent(fs_info, eb->start, eb->len);
8797
8798         nodesize = btrfs_super_nodesize(fs_info->super_copy);
8799         nritems = btrfs_header_nritems(eb);
8800         for (i = 0; i < nritems; i++) {
8801                 if (level == 0) {
8802                         btrfs_item_key_to_cpu(eb, &key, i);
8803                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8804                                 continue;
8805                         /* Skip the extent root and reloc roots */
8806                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8807                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8808                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8809                                 continue;
8810                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8811                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8812
8813                         /*
8814                          * If at any point we start needing the real root we
8815                          * will have to build a stump root for the root we are
8816                          * in, but for now this doesn't actually use the root so
8817                          * just pass in extent_root.
8818                          */
8819                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8820                                               nodesize, 0);
8821                         if (!extent_buffer_uptodate(tmp)) {
8822                                 fprintf(stderr, "Error reading root block\n");
8823                                 return -EIO;
8824                         }
8825                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8826                         free_extent_buffer(tmp);
8827                         if (ret)
8828                                 return ret;
8829                 } else {
8830                         bytenr = btrfs_node_blockptr(eb, i);
8831
8832                         /* If we aren't the tree root don't read the block */
8833                         if (level == 1 && !tree_root) {
8834                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
8835                                 continue;
8836                         }
8837
8838                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8839                                               nodesize, 0);
8840                         if (!extent_buffer_uptodate(tmp)) {
8841                                 fprintf(stderr, "Error reading tree block\n");
8842                                 return -EIO;
8843                         }
8844                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8845                         free_extent_buffer(tmp);
8846                         if (ret)
8847                                 return ret;
8848                 }
8849         }
8850
8851         return 0;
8852 }
8853
8854 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8855 {
8856         int ret;
8857
8858         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8859         if (ret)
8860                 return ret;
8861
8862         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8863 }
8864
8865 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8866 {
8867         struct btrfs_block_group_cache *cache;
8868         struct btrfs_path *path;
8869         struct extent_buffer *leaf;
8870         struct btrfs_chunk *chunk;
8871         struct btrfs_key key;
8872         int ret;
8873         u64 start;
8874
8875         path = btrfs_alloc_path();
8876         if (!path)
8877                 return -ENOMEM;
8878
8879         key.objectid = 0;
8880         key.type = BTRFS_CHUNK_ITEM_KEY;
8881         key.offset = 0;
8882
8883         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8884         if (ret < 0) {
8885                 btrfs_free_path(path);
8886                 return ret;
8887         }
8888
8889         /*
8890          * We do this in case the block groups were screwed up and had alloc
8891          * bits that aren't actually set on the chunks.  This happens with
8892          * restored images every time and could happen in real life I guess.
8893          */
8894         fs_info->avail_data_alloc_bits = 0;
8895         fs_info->avail_metadata_alloc_bits = 0;
8896         fs_info->avail_system_alloc_bits = 0;
8897
8898         /* First we need to create the in-memory block groups */
8899         while (1) {
8900                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8901                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
8902                         if (ret < 0) {
8903                                 btrfs_free_path(path);
8904                                 return ret;
8905                         }
8906                         if (ret) {
8907                                 ret = 0;
8908                                 break;
8909                         }
8910                 }
8911                 leaf = path->nodes[0];
8912                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8913                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8914                         path->slots[0]++;
8915                         continue;
8916                 }
8917
8918                 chunk = btrfs_item_ptr(leaf, path->slots[0],
8919                                        struct btrfs_chunk);
8920                 btrfs_add_block_group(fs_info, 0,
8921                                       btrfs_chunk_type(leaf, chunk),
8922                                       key.objectid, key.offset,
8923                                       btrfs_chunk_length(leaf, chunk));
8924                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8925                                  key.offset + btrfs_chunk_length(leaf, chunk),
8926                                  GFP_NOFS);
8927                 path->slots[0]++;
8928         }
8929         start = 0;
8930         while (1) {
8931                 cache = btrfs_lookup_first_block_group(fs_info, start);
8932                 if (!cache)
8933                         break;
8934                 cache->cached = 1;
8935                 start = cache->key.objectid + cache->key.offset;
8936         }
8937
8938         btrfs_free_path(path);
8939         return 0;
8940 }
8941
8942 static int reset_balance(struct btrfs_trans_handle *trans,
8943                          struct btrfs_fs_info *fs_info)
8944 {
8945         struct btrfs_root *root = fs_info->tree_root;
8946         struct btrfs_path *path;
8947         struct extent_buffer *leaf;
8948         struct btrfs_key key;
8949         int del_slot, del_nr = 0;
8950         int ret;
8951         int found = 0;
8952
8953         path = btrfs_alloc_path();
8954         if (!path)
8955                 return -ENOMEM;
8956
8957         key.objectid = BTRFS_BALANCE_OBJECTID;
8958         key.type = BTRFS_BALANCE_ITEM_KEY;
8959         key.offset = 0;
8960
8961         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8962         if (ret) {
8963                 if (ret > 0)
8964                         ret = 0;
8965                 if (!ret)
8966                         goto reinit_data_reloc;
8967                 else
8968                         goto out;
8969         }
8970
8971         ret = btrfs_del_item(trans, root, path);
8972         if (ret)
8973                 goto out;
8974         btrfs_release_path(path);
8975
8976         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8977         key.type = BTRFS_ROOT_ITEM_KEY;
8978         key.offset = 0;
8979
8980         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8981         if (ret < 0)
8982                 goto out;
8983         while (1) {
8984                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8985                         if (!found)
8986                                 break;
8987
8988                         if (del_nr) {
8989                                 ret = btrfs_del_items(trans, root, path,
8990                                                       del_slot, del_nr);
8991                                 del_nr = 0;
8992                                 if (ret)
8993                                         goto out;
8994                         }
8995                         key.offset++;
8996                         btrfs_release_path(path);
8997
8998                         found = 0;
8999                         ret = btrfs_search_slot(trans, root, &key, path,
9000                                                 -1, 1);
9001                         if (ret < 0)
9002                                 goto out;
9003                         continue;
9004                 }
9005                 found = 1;
9006                 leaf = path->nodes[0];
9007                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9008                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
9009                         break;
9010                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
9011                         path->slots[0]++;
9012                         continue;
9013                 }
9014                 if (!del_nr) {
9015                         del_slot = path->slots[0];
9016                         del_nr = 1;
9017                 } else {
9018                         del_nr++;
9019                 }
9020                 path->slots[0]++;
9021         }
9022
9023         if (del_nr) {
9024                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
9025                 if (ret)
9026                         goto out;
9027         }
9028         btrfs_release_path(path);
9029
9030 reinit_data_reloc:
9031         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
9032         key.type = BTRFS_ROOT_ITEM_KEY;
9033         key.offset = (u64)-1;
9034         root = btrfs_read_fs_root(fs_info, &key);
9035         if (IS_ERR(root)) {
9036                 fprintf(stderr, "Error reading data reloc tree\n");
9037                 ret = PTR_ERR(root);
9038                 goto out;
9039         }
9040         record_root_in_trans(trans, root);
9041         ret = btrfs_fsck_reinit_root(trans, root, 0);
9042         if (ret)
9043                 goto out;
9044         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
9045 out:
9046         btrfs_free_path(path);
9047         return ret;
9048 }
9049
9050 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
9051                               struct btrfs_fs_info *fs_info)
9052 {
9053         u64 start = 0;
9054         int ret;
9055
9056         /*
9057          * The only reason we don't do this is because right now we're just
9058          * walking the trees we find and pinning down their bytes, we don't look
9059          * at any of the leaves.  In order to do mixed groups we'd have to check
9060          * the leaves of any fs roots and pin down the bytes for any file
9061          * extents we find.  Not hard but why do it if we don't have to?
9062          */
9063         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
9064                 fprintf(stderr, "We don't support re-initing the extent tree "
9065                         "for mixed block groups yet, please notify a btrfs "
9066                         "developer you want to do this so they can add this "
9067                         "functionality.\n");
9068                 return -EINVAL;
9069         }
9070
9071         /*
9072          * first we need to walk all of the trees except the extent tree and pin
9073          * down the bytes that are in use so we don't overwrite any existing
9074          * metadata.
9075          */
9076         ret = pin_metadata_blocks(fs_info);
9077         if (ret) {
9078                 fprintf(stderr, "error pinning down used bytes\n");
9079                 return ret;
9080         }
9081
9082         /*
9083          * Need to drop all the block groups since we're going to recreate all
9084          * of them again.
9085          */
9086         btrfs_free_block_groups(fs_info);
9087         ret = reset_block_groups(fs_info);
9088         if (ret) {
9089                 fprintf(stderr, "error resetting the block groups\n");
9090                 return ret;
9091         }
9092
9093         /* Ok we can allocate now, reinit the extent root */
9094         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
9095         if (ret) {
9096                 fprintf(stderr, "extent root initialization failed\n");
9097                 /*
9098                  * When the transaction code is updated we should end the
9099                  * transaction, but for now progs only knows about commit so
9100                  * just return an error.
9101                  */
9102                 return ret;
9103         }
9104
9105         /*
9106          * Now we have all the in-memory block groups setup so we can make
9107          * allocations properly, and the metadata we care about is safe since we
9108          * pinned all of it above.
9109          */
9110         while (1) {
9111                 struct btrfs_block_group_cache *cache;
9112
9113                 cache = btrfs_lookup_first_block_group(fs_info, start);
9114                 if (!cache)
9115                         break;
9116                 start = cache->key.objectid + cache->key.offset;
9117                 ret = btrfs_insert_item(trans, fs_info->extent_root,
9118                                         &cache->key, &cache->item,
9119                                         sizeof(cache->item));
9120                 if (ret) {
9121                         fprintf(stderr, "Error adding block group\n");
9122                         return ret;
9123                 }
9124                 btrfs_extent_post_op(trans, fs_info->extent_root);
9125         }
9126
9127         ret = reset_balance(trans, fs_info);
9128         if (ret)
9129                 fprintf(stderr, "error resetting the pending balance\n");
9130
9131         return ret;
9132 }
9133
9134 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
9135 {
9136         struct btrfs_path *path;
9137         struct btrfs_trans_handle *trans;
9138         struct btrfs_key key;
9139         int ret;
9140
9141         printf("Recowing metadata block %llu\n", eb->start);
9142         key.objectid = btrfs_header_owner(eb);
9143         key.type = BTRFS_ROOT_ITEM_KEY;
9144         key.offset = (u64)-1;
9145
9146         root = btrfs_read_fs_root(root->fs_info, &key);
9147         if (IS_ERR(root)) {
9148                 fprintf(stderr, "Couldn't find owner root %llu\n",
9149                         key.objectid);
9150                 return PTR_ERR(root);
9151         }
9152
9153         path = btrfs_alloc_path();
9154         if (!path)
9155                 return -ENOMEM;
9156
9157         trans = btrfs_start_transaction(root, 1);
9158         if (IS_ERR(trans)) {
9159                 btrfs_free_path(path);
9160                 return PTR_ERR(trans);
9161         }
9162
9163         path->lowest_level = btrfs_header_level(eb);
9164         if (path->lowest_level)
9165                 btrfs_node_key_to_cpu(eb, &key, 0);
9166         else
9167                 btrfs_item_key_to_cpu(eb, &key, 0);
9168
9169         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9170         btrfs_commit_transaction(trans, root);
9171         btrfs_free_path(path);
9172         return ret;
9173 }
9174
9175 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
9176 {
9177         struct btrfs_path *path;
9178         struct btrfs_trans_handle *trans;
9179         struct btrfs_key key;
9180         int ret;
9181
9182         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
9183                bad->key.type, bad->key.offset);
9184         key.objectid = bad->root_id;
9185         key.type = BTRFS_ROOT_ITEM_KEY;
9186         key.offset = (u64)-1;
9187
9188         root = btrfs_read_fs_root(root->fs_info, &key);
9189         if (IS_ERR(root)) {
9190                 fprintf(stderr, "Couldn't find owner root %llu\n",
9191                         key.objectid);
9192                 return PTR_ERR(root);
9193         }
9194
9195         path = btrfs_alloc_path();
9196         if (!path)
9197                 return -ENOMEM;
9198
9199         trans = btrfs_start_transaction(root, 1);
9200         if (IS_ERR(trans)) {
9201                 btrfs_free_path(path);
9202                 return PTR_ERR(trans);
9203         }
9204
9205         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
9206         if (ret) {
9207                 if (ret > 0)
9208                         ret = 0;
9209                 goto out;
9210         }
9211         ret = btrfs_del_item(trans, root, path);
9212 out:
9213         btrfs_commit_transaction(trans, root);
9214         btrfs_free_path(path);
9215         return ret;
9216 }
9217
9218 static int zero_log_tree(struct btrfs_root *root)
9219 {
9220         struct btrfs_trans_handle *trans;
9221         int ret;
9222
9223         trans = btrfs_start_transaction(root, 1);
9224         if (IS_ERR(trans)) {
9225                 ret = PTR_ERR(trans);
9226                 return ret;
9227         }
9228         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
9229         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
9230         ret = btrfs_commit_transaction(trans, root);
9231         return ret;
9232 }
9233
9234 static int populate_csum(struct btrfs_trans_handle *trans,
9235                          struct btrfs_root *csum_root, char *buf, u64 start,
9236                          u64 len)
9237 {
9238         u64 offset = 0;
9239         u64 sectorsize;
9240         int ret = 0;
9241
9242         while (offset < len) {
9243                 sectorsize = csum_root->sectorsize;
9244                 ret = read_extent_data(csum_root, buf, start + offset,
9245                                        &sectorsize, 0);
9246                 if (ret)
9247                         break;
9248                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
9249                                             start + offset, buf, sectorsize);
9250                 if (ret)
9251                         break;
9252                 offset += sectorsize;
9253         }
9254         return ret;
9255 }
9256
9257 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
9258                                       struct btrfs_root *csum_root,
9259                                       struct btrfs_root *cur_root)
9260 {
9261         struct btrfs_path *path;
9262         struct btrfs_key key;
9263         struct extent_buffer *node;
9264         struct btrfs_file_extent_item *fi;
9265         char *buf = NULL;
9266         u64 start = 0;
9267         u64 len = 0;
9268         int slot = 0;
9269         int ret = 0;
9270
9271         path = btrfs_alloc_path();
9272         if (!path)
9273                 return -ENOMEM;
9274         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
9275         if (!buf) {
9276                 ret = -ENOMEM;
9277                 goto out;
9278         }
9279
9280         key.objectid = 0;
9281         key.offset = 0;
9282         key.type = 0;
9283
9284         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
9285         if (ret < 0)
9286                 goto out;
9287         /* Iterate all regular file extents and fill its csum */
9288         while (1) {
9289                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
9290
9291                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9292                         goto next;
9293                 node = path->nodes[0];
9294                 slot = path->slots[0];
9295                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
9296                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
9297                         goto next;
9298                 start = btrfs_file_extent_disk_bytenr(node, fi);
9299                 len = btrfs_file_extent_disk_num_bytes(node, fi);
9300
9301                 ret = populate_csum(trans, csum_root, buf, start, len);
9302                 if (ret == -EEXIST)
9303                         ret = 0;
9304                 if (ret < 0)
9305                         goto out;
9306 next:
9307                 /*
9308                  * TODO: if next leaf is corrupted, jump to nearest next valid
9309                  * leaf.
9310                  */
9311                 ret = btrfs_next_item(cur_root, path);
9312                 if (ret < 0)
9313                         goto out;
9314                 if (ret > 0) {
9315                         ret = 0;
9316                         goto out;
9317                 }
9318         }
9319
9320 out:
9321         btrfs_free_path(path);
9322         free(buf);
9323         return ret;
9324 }
9325
9326 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
9327                                   struct btrfs_root *csum_root)
9328 {
9329         struct btrfs_fs_info *fs_info = csum_root->fs_info;
9330         struct btrfs_path *path;
9331         struct btrfs_root *tree_root = fs_info->tree_root;
9332         struct btrfs_root *cur_root;
9333         struct extent_buffer *node;
9334         struct btrfs_key key;
9335         int slot = 0;
9336         int ret = 0;
9337
9338         path = btrfs_alloc_path();
9339         if (!path)
9340                 return -ENOMEM;
9341
9342         key.objectid = BTRFS_FS_TREE_OBJECTID;
9343         key.offset = 0;
9344         key.type = BTRFS_ROOT_ITEM_KEY;
9345
9346         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
9347         if (ret < 0)
9348                 goto out;
9349         if (ret > 0) {
9350                 ret = -ENOENT;
9351                 goto out;
9352         }
9353
9354         while (1) {
9355                 node = path->nodes[0];
9356                 slot = path->slots[0];
9357                 btrfs_item_key_to_cpu(node, &key, slot);
9358                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
9359                         goto out;
9360                 if (key.type != BTRFS_ROOT_ITEM_KEY)
9361                         goto next;
9362                 if (!is_fstree(key.objectid))
9363                         goto next;
9364                 key.offset = (u64)-1;
9365
9366                 cur_root = btrfs_read_fs_root(fs_info, &key);
9367                 if (IS_ERR(cur_root) || !cur_root) {
9368                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
9369                                 key.objectid);
9370                         goto out;
9371                 }
9372                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
9373                                 cur_root);
9374                 if (ret < 0)
9375                         goto out;
9376 next:
9377                 ret = btrfs_next_item(tree_root, path);
9378                 if (ret > 0) {
9379                         ret = 0;
9380                         goto out;
9381                 }
9382                 if (ret < 0)
9383                         goto out;
9384         }
9385
9386 out:
9387         btrfs_free_path(path);
9388         return ret;
9389 }
9390
9391 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9392                                       struct btrfs_root *csum_root)
9393 {
9394         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9395         struct btrfs_path *path;
9396         struct btrfs_extent_item *ei;
9397         struct extent_buffer *leaf;
9398         char *buf;
9399         struct btrfs_key key;
9400         int ret;
9401
9402         path = btrfs_alloc_path();
9403         if (!path)
9404                 return -ENOMEM;
9405
9406         key.objectid = 0;
9407         key.type = BTRFS_EXTENT_ITEM_KEY;
9408         key.offset = 0;
9409
9410         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9411         if (ret < 0) {
9412                 btrfs_free_path(path);
9413                 return ret;
9414         }
9415
9416         buf = malloc(csum_root->sectorsize);
9417         if (!buf) {
9418                 btrfs_free_path(path);
9419                 return -ENOMEM;
9420         }
9421
9422         while (1) {
9423                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9424                         ret = btrfs_next_leaf(extent_root, path);
9425                         if (ret < 0)
9426                                 break;
9427                         if (ret) {
9428                                 ret = 0;
9429                                 break;
9430                         }
9431                 }
9432                 leaf = path->nodes[0];
9433
9434                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9435                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9436                         path->slots[0]++;
9437                         continue;
9438                 }
9439
9440                 ei = btrfs_item_ptr(leaf, path->slots[0],
9441                                     struct btrfs_extent_item);
9442                 if (!(btrfs_extent_flags(leaf, ei) &
9443                       BTRFS_EXTENT_FLAG_DATA)) {
9444                         path->slots[0]++;
9445                         continue;
9446                 }
9447
9448                 ret = populate_csum(trans, csum_root, buf, key.objectid,
9449                                     key.offset);
9450                 if (ret)
9451                         break;
9452                 path->slots[0]++;
9453         }
9454
9455         btrfs_free_path(path);
9456         free(buf);
9457         return ret;
9458 }
9459
9460 /*
9461  * Recalculate the csum and put it into the csum tree.
9462  *
9463  * Extent tree init will wipe out all the extent info, so in that case, we
9464  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
9465  * will use fs/subvol trees to init the csum tree.
9466  */
9467 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9468                           struct btrfs_root *csum_root,
9469                           int search_fs_tree)
9470 {
9471         if (search_fs_tree)
9472                 return fill_csum_tree_from_fs(trans, csum_root);
9473         else
9474                 return fill_csum_tree_from_extent(trans, csum_root);
9475 }
9476
9477 static void free_roots_info_cache(void)
9478 {
9479         if (!roots_info_cache)
9480                 return;
9481
9482         while (!cache_tree_empty(roots_info_cache)) {
9483                 struct cache_extent *entry;
9484                 struct root_item_info *rii;
9485
9486                 entry = first_cache_extent(roots_info_cache);
9487                 if (!entry)
9488                         break;
9489                 remove_cache_extent(roots_info_cache, entry);
9490                 rii = container_of(entry, struct root_item_info, cache_extent);
9491                 free(rii);
9492         }
9493
9494         free(roots_info_cache);
9495         roots_info_cache = NULL;
9496 }
9497
9498 static int build_roots_info_cache(struct btrfs_fs_info *info)
9499 {
9500         int ret = 0;
9501         struct btrfs_key key;
9502         struct extent_buffer *leaf;
9503         struct btrfs_path *path;
9504
9505         if (!roots_info_cache) {
9506                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9507                 if (!roots_info_cache)
9508                         return -ENOMEM;
9509                 cache_tree_init(roots_info_cache);
9510         }
9511
9512         path = btrfs_alloc_path();
9513         if (!path)
9514                 return -ENOMEM;
9515
9516         key.objectid = 0;
9517         key.type = BTRFS_EXTENT_ITEM_KEY;
9518         key.offset = 0;
9519
9520         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9521         if (ret < 0)
9522                 goto out;
9523         leaf = path->nodes[0];
9524
9525         while (1) {
9526                 struct btrfs_key found_key;
9527                 struct btrfs_extent_item *ei;
9528                 struct btrfs_extent_inline_ref *iref;
9529                 int slot = path->slots[0];
9530                 int type;
9531                 u64 flags;
9532                 u64 root_id;
9533                 u8 level;
9534                 struct cache_extent *entry;
9535                 struct root_item_info *rii;
9536
9537                 if (slot >= btrfs_header_nritems(leaf)) {
9538                         ret = btrfs_next_leaf(info->extent_root, path);
9539                         if (ret < 0) {
9540                                 break;
9541                         } else if (ret) {
9542                                 ret = 0;
9543                                 break;
9544                         }
9545                         leaf = path->nodes[0];
9546                         slot = path->slots[0];
9547                 }
9548
9549                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9550
9551                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9552                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9553                         goto next;
9554
9555                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9556                 flags = btrfs_extent_flags(leaf, ei);
9557
9558                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9559                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9560                         goto next;
9561
9562                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9563                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9564                         level = found_key.offset;
9565                 } else {
9566                         struct btrfs_tree_block_info *binfo;
9567
9568                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
9569                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9570                         level = btrfs_tree_block_level(leaf, binfo);
9571                 }
9572
9573                 /*
9574                  * For a root extent, it must be of the following type and the
9575                  * first (and only one) iref in the item.
9576                  */
9577                 type = btrfs_extent_inline_ref_type(leaf, iref);
9578                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9579                         goto next;
9580
9581                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9582                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9583                 if (!entry) {
9584                         rii = malloc(sizeof(struct root_item_info));
9585                         if (!rii) {
9586                                 ret = -ENOMEM;
9587                                 goto out;
9588                         }
9589                         rii->cache_extent.start = root_id;
9590                         rii->cache_extent.size = 1;
9591                         rii->level = (u8)-1;
9592                         entry = &rii->cache_extent;
9593                         ret = insert_cache_extent(roots_info_cache, entry);
9594                         ASSERT(ret == 0);
9595                 } else {
9596                         rii = container_of(entry, struct root_item_info,
9597                                            cache_extent);
9598                 }
9599
9600                 ASSERT(rii->cache_extent.start == root_id);
9601                 ASSERT(rii->cache_extent.size == 1);
9602
9603                 if (level > rii->level || rii->level == (u8)-1) {
9604                         rii->level = level;
9605                         rii->bytenr = found_key.objectid;
9606                         rii->gen = btrfs_extent_generation(leaf, ei);
9607                         rii->node_count = 1;
9608                 } else if (level == rii->level) {
9609                         rii->node_count++;
9610                 }
9611 next:
9612                 path->slots[0]++;
9613         }
9614
9615 out:
9616         btrfs_free_path(path);
9617
9618         return ret;
9619 }
9620
9621 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9622                                   struct btrfs_path *path,
9623                                   const struct btrfs_key *root_key,
9624                                   const int read_only_mode)
9625 {
9626         const u64 root_id = root_key->objectid;
9627         struct cache_extent *entry;
9628         struct root_item_info *rii;
9629         struct btrfs_root_item ri;
9630         unsigned long offset;
9631
9632         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9633         if (!entry) {
9634                 fprintf(stderr,
9635                         "Error: could not find extent items for root %llu\n",
9636                         root_key->objectid);
9637                 return -ENOENT;
9638         }
9639
9640         rii = container_of(entry, struct root_item_info, cache_extent);
9641         ASSERT(rii->cache_extent.start == root_id);
9642         ASSERT(rii->cache_extent.size == 1);
9643
9644         if (rii->node_count != 1) {
9645                 fprintf(stderr,
9646                         "Error: could not find btree root extent for root %llu\n",
9647                         root_id);
9648                 return -ENOENT;
9649         }
9650
9651         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9652         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9653
9654         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9655             btrfs_root_level(&ri) != rii->level ||
9656             btrfs_root_generation(&ri) != rii->gen) {
9657
9658                 /*
9659                  * If we're in repair mode but our caller told us to not update
9660                  * the root item, i.e. just check if it needs to be updated, don't
9661                  * print this message, since the caller will call us again shortly
9662                  * for the same root item without read only mode (the caller will
9663                  * open a transaction first).
9664                  */
9665                 if (!(read_only_mode && repair))
9666                         fprintf(stderr,
9667                                 "%sroot item for root %llu,"
9668                                 " current bytenr %llu, current gen %llu, current level %u,"
9669                                 " new bytenr %llu, new gen %llu, new level %u\n",
9670                                 (read_only_mode ? "" : "fixing "),
9671                                 root_id,
9672                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9673                                 btrfs_root_level(&ri),
9674                                 rii->bytenr, rii->gen, rii->level);
9675
9676                 if (btrfs_root_generation(&ri) > rii->gen) {
9677                         fprintf(stderr,
9678                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9679                                 root_id, btrfs_root_generation(&ri), rii->gen);
9680                         return -EINVAL;
9681                 }
9682
9683                 if (!read_only_mode) {
9684                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9685                         btrfs_set_root_level(&ri, rii->level);
9686                         btrfs_set_root_generation(&ri, rii->gen);
9687                         write_extent_buffer(path->nodes[0], &ri,
9688                                             offset, sizeof(ri));
9689                 }
9690
9691                 return 1;
9692         }
9693
9694         return 0;
9695 }
9696
9697 /*
9698  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9699  * caused read-only snapshots to be corrupted if they were created at a moment
9700  * when the source subvolume/snapshot had orphan items. The issue was that the
9701  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9702  * node instead of the post orphan cleanup root node.
9703  * So this function, and its callees, just detects and fixes those cases. Even
9704  * though the regression was for read-only snapshots, this function applies to
9705  * any snapshot/subvolume root.
9706  * This must be run before any other repair code - not doing it so, makes other
9707  * repair code delete or modify backrefs in the extent tree for example, which
9708  * will result in an inconsistent fs after repairing the root items.
9709  */
9710 static int repair_root_items(struct btrfs_fs_info *info)
9711 {
9712         struct btrfs_path *path = NULL;
9713         struct btrfs_key key;
9714         struct extent_buffer *leaf;
9715         struct btrfs_trans_handle *trans = NULL;
9716         int ret = 0;
9717         int bad_roots = 0;
9718         int need_trans = 0;
9719
9720         ret = build_roots_info_cache(info);
9721         if (ret)
9722                 goto out;
9723
9724         path = btrfs_alloc_path();
9725         if (!path) {
9726                 ret = -ENOMEM;
9727                 goto out;
9728         }
9729
9730         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9731         key.type = BTRFS_ROOT_ITEM_KEY;
9732         key.offset = 0;
9733
9734 again:
9735         /*
9736          * Avoid opening and committing transactions if a leaf doesn't have
9737          * any root items that need to be fixed, so that we avoid rotating
9738          * backup roots unnecessarily.
9739          */
9740         if (need_trans) {
9741                 trans = btrfs_start_transaction(info->tree_root, 1);
9742                 if (IS_ERR(trans)) {
9743                         ret = PTR_ERR(trans);
9744                         goto out;
9745                 }
9746         }
9747
9748         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9749                                 0, trans ? 1 : 0);
9750         if (ret < 0)
9751                 goto out;
9752         leaf = path->nodes[0];
9753
9754         while (1) {
9755                 struct btrfs_key found_key;
9756
9757                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9758                         int no_more_keys = find_next_key(path, &key);
9759
9760                         btrfs_release_path(path);
9761                         if (trans) {
9762                                 ret = btrfs_commit_transaction(trans,
9763                                                                info->tree_root);
9764                                 trans = NULL;
9765                                 if (ret < 0)
9766                                         goto out;
9767                         }
9768                         need_trans = 0;
9769                         if (no_more_keys)
9770                                 break;
9771                         goto again;
9772                 }
9773
9774                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9775
9776                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9777                         goto next;
9778                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9779                         goto next;
9780
9781                 ret = maybe_repair_root_item(info, path, &found_key,
9782                                              trans ? 0 : 1);
9783                 if (ret < 0)
9784                         goto out;
9785                 if (ret) {
9786                         if (!trans && repair) {
9787                                 need_trans = 1;
9788                                 key = found_key;
9789                                 btrfs_release_path(path);
9790                                 goto again;
9791                         }
9792                         bad_roots++;
9793                 }
9794 next:
9795                 path->slots[0]++;
9796         }
9797         ret = 0;
9798 out:
9799         free_roots_info_cache();
9800         btrfs_free_path(path);
9801         if (trans)
9802                 btrfs_commit_transaction(trans, info->tree_root);
9803         if (ret < 0)
9804                 return ret;
9805
9806         return bad_roots;
9807 }
9808
9809 const char * const cmd_check_usage[] = {
9810         "btrfs check [options] <device>",
9811         "Check structural integrity of a filesystem (unmounted).",
9812         "Check structural integrity of an unmounted filesystem. Verify internal",
9813         "trees' consistency and item connectivity. In the repair mode try to",
9814         "fix the problems found.",
9815         "WARNING: the repair mode is considered dangerous",
9816         "",
9817         "-s|--super <superblock>     use this superblock copy",
9818         "-b|--backup                 use the first valid backup root copy",
9819         "--repair                    try to repair the filesystem",
9820         "--readonly                  run in read-only mode (default)",
9821         "--init-csum-tree            create a new CRC tree",
9822         "--init-extent-tree          create a new extent tree",
9823         "--check-data-csum           verify checksums of data blocks",
9824         "-Q|--qgroup-report           print a report on qgroup consistency",
9825         "-E|--subvol-extents <subvolid>",
9826         "                            print subvolume extents and sharing state",
9827         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
9828         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
9829         "-p|--progress               indicate progress",
9830         NULL
9831 };
9832
9833 int cmd_check(int argc, char **argv)
9834 {
9835         struct cache_tree root_cache;
9836         struct btrfs_root *root;
9837         struct btrfs_fs_info *info;
9838         u64 bytenr = 0;
9839         u64 subvolid = 0;
9840         u64 tree_root_bytenr = 0;
9841         u64 chunk_root_bytenr = 0;
9842         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9843         int ret;
9844         u64 num;
9845         int init_csum_tree = 0;
9846         int readonly = 0;
9847         int qgroup_report = 0;
9848         int qgroups_repaired = 0;
9849         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9850
9851         while(1) {
9852                 int c;
9853                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
9854                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
9855                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE };
9856                 static const struct option long_options[] = {
9857                         { "super", required_argument, NULL, 's' },
9858                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
9859                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
9860                         { "init-csum-tree", no_argument, NULL,
9861                                 GETOPT_VAL_INIT_CSUM },
9862                         { "init-extent-tree", no_argument, NULL,
9863                                 GETOPT_VAL_INIT_EXTENT },
9864                         { "check-data-csum", no_argument, NULL,
9865                                 GETOPT_VAL_CHECK_CSUM },
9866                         { "backup", no_argument, NULL, 'b' },
9867                         { "subvol-extents", required_argument, NULL, 'E' },
9868                         { "qgroup-report", no_argument, NULL, 'Q' },
9869                         { "tree-root", required_argument, NULL, 'r' },
9870                         { "chunk-root", required_argument, NULL,
9871                                 GETOPT_VAL_CHUNK_TREE },
9872                         { "progress", no_argument, NULL, 'p' },
9873                         { NULL, 0, NULL, 0}
9874                 };
9875
9876                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9877                 if (c < 0)
9878                         break;
9879                 switch(c) {
9880                         case 'a': /* ignored */ break;
9881                         case 'b':
9882                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9883                                 break;
9884                         case 's':
9885                                 num = arg_strtou64(optarg);
9886                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9887                                         fprintf(stderr,
9888                                                 "ERROR: super mirror should be less than: %d\n",
9889                                                 BTRFS_SUPER_MIRROR_MAX);
9890                                         exit(1);
9891                                 }
9892                                 bytenr = btrfs_sb_offset(((int)num));
9893                                 printf("using SB copy %llu, bytenr %llu\n", num,
9894                                        (unsigned long long)bytenr);
9895                                 break;
9896                         case 'Q':
9897                                 qgroup_report = 1;
9898                                 break;
9899                         case 'E':
9900                                 subvolid = arg_strtou64(optarg);
9901                                 break;
9902                         case 'r':
9903                                 tree_root_bytenr = arg_strtou64(optarg);
9904                                 break;
9905                         case GETOPT_VAL_CHUNK_TREE:
9906                                 chunk_root_bytenr = arg_strtou64(optarg);
9907                                 break;
9908                         case 'p':
9909                                 ctx.progress_enabled = true;
9910                                 break;
9911                         case '?':
9912                         case 'h':
9913                                 usage(cmd_check_usage);
9914                         case GETOPT_VAL_REPAIR:
9915                                 printf("enabling repair mode\n");
9916                                 repair = 1;
9917                                 ctree_flags |= OPEN_CTREE_WRITES;
9918                                 break;
9919                         case GETOPT_VAL_READONLY:
9920                                 readonly = 1;
9921                                 break;
9922                         case GETOPT_VAL_INIT_CSUM:
9923                                 printf("Creating a new CRC tree\n");
9924                                 init_csum_tree = 1;
9925                                 repair = 1;
9926                                 ctree_flags |= OPEN_CTREE_WRITES;
9927                                 break;
9928                         case GETOPT_VAL_INIT_EXTENT:
9929                                 init_extent_tree = 1;
9930                                 ctree_flags |= (OPEN_CTREE_WRITES |
9931                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9932                                 repair = 1;
9933                                 break;
9934                         case GETOPT_VAL_CHECK_CSUM:
9935                                 check_data_csum = 1;
9936                                 break;
9937                 }
9938         }
9939
9940         if (check_argc_exact(argc - optind, 1))
9941                 usage(cmd_check_usage);
9942
9943         if (ctx.progress_enabled) {
9944                 ctx.tp = TASK_NOTHING;
9945                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9946         }
9947
9948         /* This check is the only reason for --readonly to exist */
9949         if (readonly && repair) {
9950                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9951                 exit(1);
9952         }
9953
9954         radix_tree_init();
9955         cache_tree_init(&root_cache);
9956
9957         if((ret = check_mounted(argv[optind])) < 0) {
9958                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9959                 goto err_out;
9960         } else if(ret) {
9961                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9962                 ret = -EBUSY;
9963                 goto err_out;
9964         }
9965
9966         /* only allow partial opening under repair mode */
9967         if (repair)
9968                 ctree_flags |= OPEN_CTREE_PARTIAL;
9969
9970         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9971                                   chunk_root_bytenr, ctree_flags);
9972         if (!info) {
9973                 fprintf(stderr, "Couldn't open file system\n");
9974                 ret = -EIO;
9975                 goto err_out;
9976         }
9977
9978         global_info = info;
9979         root = info->fs_root;
9980
9981         /*
9982          * repair mode will force us to commit transaction which
9983          * will make us fail to load log tree when mounting.
9984          */
9985         if (repair && btrfs_super_log_root(info->super_copy)) {
9986                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9987                 if (!ret) {
9988                         ret = 1;
9989                         goto close_out;
9990                 }
9991                 ret = zero_log_tree(root);
9992                 if (ret) {
9993                         fprintf(stderr, "fail to zero log tree\n");
9994                         goto close_out;
9995                 }
9996         }
9997
9998         uuid_unparse(info->super_copy->fsid, uuidbuf);
9999         if (qgroup_report) {
10000                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
10001                        uuidbuf);
10002                 ret = qgroup_verify_all(info);
10003                 if (ret == 0)
10004                         report_qgroups(1);
10005                 goto close_out;
10006         }
10007         if (subvolid) {
10008                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
10009                        subvolid, argv[optind], uuidbuf);
10010                 ret = print_extent_state(info, subvolid);
10011                 goto close_out;
10012         }
10013         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
10014
10015         if (!extent_buffer_uptodate(info->tree_root->node) ||
10016             !extent_buffer_uptodate(info->dev_root->node) ||
10017             !extent_buffer_uptodate(info->chunk_root->node)) {
10018                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
10019                 ret = -EIO;
10020                 goto close_out;
10021         }
10022
10023         if (init_extent_tree || init_csum_tree) {
10024                 struct btrfs_trans_handle *trans;
10025
10026                 trans = btrfs_start_transaction(info->extent_root, 0);
10027                 if (IS_ERR(trans)) {
10028                         fprintf(stderr, "Error starting transaction\n");
10029                         ret = PTR_ERR(trans);
10030                         goto close_out;
10031                 }
10032
10033                 if (init_extent_tree) {
10034                         printf("Creating a new extent tree\n");
10035                         ret = reinit_extent_tree(trans, info);
10036                         if (ret)
10037                                 goto close_out;
10038                 }
10039
10040                 if (init_csum_tree) {
10041                         fprintf(stderr, "Reinit crc root\n");
10042                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
10043                         if (ret) {
10044                                 fprintf(stderr, "crc root initialization failed\n");
10045                                 ret = -EIO;
10046                                 goto close_out;
10047                         }
10048
10049                         ret = fill_csum_tree(trans, info->csum_root,
10050                                              init_extent_tree);
10051                         if (ret) {
10052                                 fprintf(stderr, "crc refilling failed\n");
10053                                 return -EIO;
10054                         }
10055                 }
10056                 /*
10057                  * Ok now we commit and run the normal fsck, which will add
10058                  * extent entries for all of the items it finds.
10059                  */
10060                 ret = btrfs_commit_transaction(trans, info->extent_root);
10061                 if (ret)
10062                         goto close_out;
10063         }
10064         if (!extent_buffer_uptodate(info->extent_root->node)) {
10065                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
10066                 ret = -EIO;
10067                 goto close_out;
10068         }
10069         if (!extent_buffer_uptodate(info->csum_root->node)) {
10070                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
10071                 ret = -EIO;
10072                 goto close_out;
10073         }
10074
10075         if (!ctx.progress_enabled)
10076                 fprintf(stderr, "checking extents\n");
10077         ret = check_chunks_and_extents(root);
10078         if (ret)
10079                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
10080
10081         ret = repair_root_items(info);
10082         if (ret < 0)
10083                 goto close_out;
10084         if (repair) {
10085                 fprintf(stderr, "Fixed %d roots.\n", ret);
10086                 ret = 0;
10087         } else if (ret > 0) {
10088                 fprintf(stderr,
10089                        "Found %d roots with an outdated root item.\n",
10090                        ret);
10091                 fprintf(stderr,
10092                         "Please run a filesystem check with the option --repair to fix them.\n");
10093                 ret = 1;
10094                 goto close_out;
10095         }
10096
10097         if (!ctx.progress_enabled) {
10098                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
10099                         fprintf(stderr, "checking free space tree\n");
10100                 else
10101                         fprintf(stderr, "checking free space cache\n");
10102         }
10103         ret = check_space_cache(root);
10104         if (ret)
10105                 goto out;
10106
10107         /*
10108          * We used to have to have these hole extents in between our real
10109          * extents so if we don't have this flag set we need to make sure there
10110          * are no gaps in the file extents for inodes, otherwise we can just
10111          * ignore it when this happens.
10112          */
10113         no_holes = btrfs_fs_incompat(root->fs_info,
10114                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
10115         if (!ctx.progress_enabled)
10116                 fprintf(stderr, "checking fs roots\n");
10117         ret = check_fs_roots(root, &root_cache);
10118         if (ret)
10119                 goto out;
10120
10121         fprintf(stderr, "checking csums\n");
10122         ret = check_csums(root);
10123         if (ret)
10124                 goto out;
10125
10126         fprintf(stderr, "checking root refs\n");
10127         ret = check_root_refs(root, &root_cache);
10128         if (ret)
10129                 goto out;
10130
10131         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
10132                 struct extent_buffer *eb;
10133
10134                 eb = list_first_entry(&root->fs_info->recow_ebs,
10135                                       struct extent_buffer, recow);
10136                 list_del_init(&eb->recow);
10137                 ret = recow_extent_buffer(root, eb);
10138                 if (ret)
10139                         break;
10140         }
10141
10142         while (!list_empty(&delete_items)) {
10143                 struct bad_item *bad;
10144
10145                 bad = list_first_entry(&delete_items, struct bad_item, list);
10146                 list_del_init(&bad->list);
10147                 if (repair)
10148                         ret = delete_bad_item(root, bad);
10149                 free(bad);
10150         }
10151
10152         if (info->quota_enabled) {
10153                 int err;
10154                 fprintf(stderr, "checking quota groups\n");
10155                 err = qgroup_verify_all(info);
10156                 if (err)
10157                         goto out;
10158                 report_qgroups(0);
10159                 err = repair_qgroups(info, &qgroups_repaired);
10160                 if (err)
10161                         goto out;
10162         }
10163
10164         if (!list_empty(&root->fs_info->recow_ebs)) {
10165                 fprintf(stderr, "Transid errors in file system\n");
10166                 ret = 1;
10167         }
10168 out:
10169         /* Don't override original ret */
10170         if (!ret && qgroups_repaired)
10171                 ret = qgroups_repaired;
10172
10173         if (found_old_backref) { /*
10174                  * there was a disk format change when mixed
10175                  * backref was in testing tree. The old format
10176                  * existed about one week.
10177                  */
10178                 printf("\n * Found old mixed backref format. "
10179                        "The old format is not supported! *"
10180                        "\n * Please mount the FS in readonly mode, "
10181                        "backup data and re-format the FS. *\n\n");
10182                 ret = 1;
10183         }
10184         printf("found %llu bytes used err is %d\n",
10185                (unsigned long long)bytes_used, ret);
10186         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
10187         printf("total tree bytes: %llu\n",
10188                (unsigned long long)total_btree_bytes);
10189         printf("total fs tree bytes: %llu\n",
10190                (unsigned long long)total_fs_tree_bytes);
10191         printf("total extent tree bytes: %llu\n",
10192                (unsigned long long)total_extent_tree_bytes);
10193         printf("btree space waste bytes: %llu\n",
10194                (unsigned long long)btree_space_waste);
10195         printf("file data blocks allocated: %llu\n referenced %llu\n",
10196                 (unsigned long long)data_bytes_allocated,
10197                 (unsigned long long)data_bytes_referenced);
10198
10199         free_qgroup_counts();
10200         free_root_recs_tree(&root_cache);
10201 close_out:
10202         close_ctree(root);
10203 err_out:
10204         if (ctx.progress_enabled)
10205                 task_deinit(ctx.info);
10206
10207         return ret;
10208 }