ba1d96460bc16b5a3562f640794ba60c975381a1
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
76
77 struct extent_backref {
78         struct rb_node node;
79         unsigned int is_data:1;
80         unsigned int found_extent_tree:1;
81         unsigned int full_backref:1;
82         unsigned int found_ref:1;
83         unsigned int broken:1;
84 };
85
86 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
87 {
88         return rb_entry(node, struct extent_backref, node);
89 }
90
91 struct data_backref {
92         struct extent_backref node;
93         union {
94                 u64 parent;
95                 u64 root;
96         };
97         u64 owner;
98         u64 offset;
99         u64 disk_bytenr;
100         u64 bytes;
101         u64 ram_bytes;
102         u32 num_refs;
103         u32 found_ref;
104 };
105
106 static inline struct data_backref* to_data_backref(struct extent_backref *back)
107 {
108         return container_of(back, struct data_backref, node);
109 }
110
111 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
112 {
113         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
114         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
115         struct data_backref *back1 = to_data_backref(ext1);
116         struct data_backref *back2 = to_data_backref(ext2);
117
118         WARN_ON(!ext1->is_data);
119         WARN_ON(!ext2->is_data);
120
121         /* parent and root are a union, so this covers both */
122         if (back1->parent > back2->parent)
123                 return 1;
124         if (back1->parent < back2->parent)
125                 return -1;
126
127         /* This is a full backref and the parents match. */
128         if (back1->node.full_backref)
129                 return 0;
130
131         if (back1->owner > back2->owner)
132                 return 1;
133         if (back1->owner < back2->owner)
134                 return -1;
135
136         if (back1->offset > back2->offset)
137                 return 1;
138         if (back1->offset < back2->offset)
139                 return -1;
140
141         if (back1->bytes > back2->bytes)
142                 return 1;
143         if (back1->bytes < back2->bytes)
144                 return -1;
145
146         if (back1->found_ref && back2->found_ref) {
147                 if (back1->disk_bytenr > back2->disk_bytenr)
148                         return 1;
149                 if (back1->disk_bytenr < back2->disk_bytenr)
150                         return -1;
151
152                 if (back1->found_ref > back2->found_ref)
153                         return 1;
154                 if (back1->found_ref < back2->found_ref)
155                         return -1;
156         }
157
158         return 0;
159 }
160
161 /*
162  * Much like data_backref, just removed the undetermined members
163  * and change it to use list_head.
164  * During extent scan, it is stored in root->orphan_data_extent.
165  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
166  */
167 struct orphan_data_extent {
168         struct list_head list;
169         u64 root;
170         u64 objectid;
171         u64 offset;
172         u64 disk_bytenr;
173         u64 disk_len;
174 };
175
176 struct tree_backref {
177         struct extent_backref node;
178         union {
179                 u64 parent;
180                 u64 root;
181         };
182 };
183
184 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
185 {
186         return container_of(back, struct tree_backref, node);
187 }
188
189 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
190 {
191         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
192         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
193         struct tree_backref *back1 = to_tree_backref(ext1);
194         struct tree_backref *back2 = to_tree_backref(ext2);
195
196         WARN_ON(ext1->is_data);
197         WARN_ON(ext2->is_data);
198
199         /* parent and root are a union, so this covers both */
200         if (back1->parent > back2->parent)
201                 return 1;
202         if (back1->parent < back2->parent)
203                 return -1;
204
205         return 0;
206 }
207
208 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
209 {
210         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
211         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
212
213         if (ext1->is_data > ext2->is_data)
214                 return 1;
215
216         if (ext1->is_data < ext2->is_data)
217                 return -1;
218
219         if (ext1->full_backref > ext2->full_backref)
220                 return 1;
221         if (ext1->full_backref < ext2->full_backref)
222                 return -1;
223
224         if (ext1->is_data)
225                 return compare_data_backref(node1, node2);
226         else
227                 return compare_tree_backref(node1, node2);
228 }
229
230 /* Explicit initialization for extent_record::flag_block_full_backref */
231 enum { FLAG_UNSET = 2 };
232
233 struct extent_record {
234         struct list_head backrefs;
235         struct list_head dups;
236         struct rb_root backref_tree;
237         struct list_head list;
238         struct cache_extent cache;
239         struct btrfs_disk_key parent_key;
240         u64 start;
241         u64 max_size;
242         u64 nr;
243         u64 refs;
244         u64 extent_item_refs;
245         u64 generation;
246         u64 parent_generation;
247         u64 info_objectid;
248         u32 num_duplicates;
249         u8 info_level;
250         unsigned int flag_block_full_backref:2;
251         unsigned int found_rec:1;
252         unsigned int content_checked:1;
253         unsigned int owner_ref_checked:1;
254         unsigned int is_root:1;
255         unsigned int metadata:1;
256         unsigned int bad_full_backref:1;
257         unsigned int crossing_stripes:1;
258         unsigned int wrong_chunk_type:1;
259 };
260
261 static inline struct extent_record* to_extent_record(struct list_head *entry)
262 {
263         return container_of(entry, struct extent_record, list);
264 }
265
266 struct inode_backref {
267         struct list_head list;
268         unsigned int found_dir_item:1;
269         unsigned int found_dir_index:1;
270         unsigned int found_inode_ref:1;
271         unsigned int filetype:8;
272         int errors;
273         unsigned int ref_type;
274         u64 dir;
275         u64 index;
276         u16 namelen;
277         char name[0];
278 };
279
280 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
281 {
282         return list_entry(entry, struct inode_backref, list);
283 }
284
285 struct root_item_record {
286         struct list_head list;
287         u64 objectid;
288         u64 bytenr;
289         u64 last_snapshot;
290         u8 level;
291         u8 drop_level;
292         int level_size;
293         struct btrfs_key drop_key;
294 };
295
296 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
297 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
298 #define REF_ERR_NO_INODE_REF            (1 << 2)
299 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
300 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
301 #define REF_ERR_DUP_INODE_REF           (1 << 5)
302 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
303 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
304 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
305 #define REF_ERR_NO_ROOT_REF             (1 << 9)
306 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
307 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
308 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
309
310 struct file_extent_hole {
311         struct rb_node node;
312         u64 start;
313         u64 len;
314 };
315
316 struct inode_record {
317         struct list_head backrefs;
318         unsigned int checked:1;
319         unsigned int merging:1;
320         unsigned int found_inode_item:1;
321         unsigned int found_dir_item:1;
322         unsigned int found_file_extent:1;
323         unsigned int found_csum_item:1;
324         unsigned int some_csum_missing:1;
325         unsigned int nodatasum:1;
326         int errors;
327
328         u64 ino;
329         u32 nlink;
330         u32 imode;
331         u64 isize;
332         u64 nbytes;
333
334         u32 found_link;
335         u64 found_size;
336         u64 extent_start;
337         u64 extent_end;
338         struct rb_root holes;
339         struct list_head orphan_extents;
340
341         u32 refs;
342 };
343
344 #define I_ERR_NO_INODE_ITEM             (1 << 0)
345 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
346 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
347 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
348 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
349 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
350 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
351 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
352 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
353 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
354 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
355 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
356 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
357 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
358 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
359
360 struct root_backref {
361         struct list_head list;
362         unsigned int found_dir_item:1;
363         unsigned int found_dir_index:1;
364         unsigned int found_back_ref:1;
365         unsigned int found_forward_ref:1;
366         unsigned int reachable:1;
367         int errors;
368         u64 ref_root;
369         u64 dir;
370         u64 index;
371         u16 namelen;
372         char name[0];
373 };
374
375 static inline struct root_backref* to_root_backref(struct list_head *entry)
376 {
377         return list_entry(entry, struct root_backref, list);
378 }
379
380 struct root_record {
381         struct list_head backrefs;
382         struct cache_extent cache;
383         unsigned int found_root_item:1;
384         u64 objectid;
385         u32 found_ref;
386 };
387
388 struct ptr_node {
389         struct cache_extent cache;
390         void *data;
391 };
392
393 struct shared_node {
394         struct cache_extent cache;
395         struct cache_tree root_cache;
396         struct cache_tree inode_cache;
397         struct inode_record *current;
398         u32 refs;
399 };
400
401 struct block_info {
402         u64 start;
403         u32 size;
404 };
405
406 struct walk_control {
407         struct cache_tree shared;
408         struct shared_node *nodes[BTRFS_MAX_LEVEL];
409         int active_node;
410         int root_level;
411 };
412
413 struct bad_item {
414         struct btrfs_key key;
415         u64 root_id;
416         struct list_head list;
417 };
418
419 struct extent_entry {
420         u64 bytenr;
421         u64 bytes;
422         int count;
423         int broken;
424         struct list_head list;
425 };
426
427 struct root_item_info {
428         /* level of the root */
429         u8 level;
430         /* number of nodes at this level, must be 1 for a root */
431         int node_count;
432         u64 bytenr;
433         u64 gen;
434         struct cache_extent cache_extent;
435 };
436
437 /*
438  * Error bit for low memory mode check.
439  *
440  * Currently no caller cares about it yet.  Just internal use for error
441  * classification.
442  */
443 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
444 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
445 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
446 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
447 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
448 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
449 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
450 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
451 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
452
453 static void *print_status_check(void *p)
454 {
455         struct task_ctx *priv = p;
456         const char work_indicator[] = { '.', 'o', 'O', 'o' };
457         uint32_t count = 0;
458         static char *task_position_string[] = {
459                 "checking extents",
460                 "checking free space cache",
461                 "checking fs roots",
462         };
463
464         task_period_start(priv->info, 1000 /* 1s */);
465
466         if (priv->tp == TASK_NOTHING)
467                 return NULL;
468
469         while (1) {
470                 printf("%s [%c]\r", task_position_string[priv->tp],
471                                 work_indicator[count % 4]);
472                 count++;
473                 fflush(stdout);
474                 task_period_wait(priv->info);
475         }
476         return NULL;
477 }
478
479 static int print_status_return(void *p)
480 {
481         printf("\n");
482         fflush(stdout);
483
484         return 0;
485 }
486
487 /* Compatible function to allow reuse of old codes */
488 static u64 first_extent_gap(struct rb_root *holes)
489 {
490         struct file_extent_hole *hole;
491
492         if (RB_EMPTY_ROOT(holes))
493                 return (u64)-1;
494
495         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
496         return hole->start;
497 }
498
499 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
500 {
501         struct file_extent_hole *hole1;
502         struct file_extent_hole *hole2;
503
504         hole1 = rb_entry(node1, struct file_extent_hole, node);
505         hole2 = rb_entry(node2, struct file_extent_hole, node);
506
507         if (hole1->start > hole2->start)
508                 return -1;
509         if (hole1->start < hole2->start)
510                 return 1;
511         /* Now hole1->start == hole2->start */
512         if (hole1->len >= hole2->len)
513                 /*
514                  * Hole 1 will be merge center
515                  * Same hole will be merged later
516                  */
517                 return -1;
518         /* Hole 2 will be merge center */
519         return 1;
520 }
521
522 /*
523  * Add a hole to the record
524  *
525  * This will do hole merge for copy_file_extent_holes(),
526  * which will ensure there won't be continuous holes.
527  */
528 static int add_file_extent_hole(struct rb_root *holes,
529                                 u64 start, u64 len)
530 {
531         struct file_extent_hole *hole;
532         struct file_extent_hole *prev = NULL;
533         struct file_extent_hole *next = NULL;
534
535         hole = malloc(sizeof(*hole));
536         if (!hole)
537                 return -ENOMEM;
538         hole->start = start;
539         hole->len = len;
540         /* Since compare will not return 0, no -EEXIST will happen */
541         rb_insert(holes, &hole->node, compare_hole);
542
543         /* simple merge with previous hole */
544         if (rb_prev(&hole->node))
545                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
546                                 node);
547         if (prev && prev->start + prev->len >= hole->start) {
548                 hole->len = hole->start + hole->len - prev->start;
549                 hole->start = prev->start;
550                 rb_erase(&prev->node, holes);
551                 free(prev);
552                 prev = NULL;
553         }
554
555         /* iterate merge with next holes */
556         while (1) {
557                 if (!rb_next(&hole->node))
558                         break;
559                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
560                                         node);
561                 if (hole->start + hole->len >= next->start) {
562                         if (hole->start + hole->len <= next->start + next->len)
563                                 hole->len = next->start + next->len -
564                                             hole->start;
565                         rb_erase(&next->node, holes);
566                         free(next);
567                         next = NULL;
568                 } else
569                         break;
570         }
571         return 0;
572 }
573
574 static int compare_hole_range(struct rb_node *node, void *data)
575 {
576         struct file_extent_hole *hole;
577         u64 start;
578
579         hole = (struct file_extent_hole *)data;
580         start = hole->start;
581
582         hole = rb_entry(node, struct file_extent_hole, node);
583         if (start < hole->start)
584                 return -1;
585         if (start >= hole->start && start < hole->start + hole->len)
586                 return 0;
587         return 1;
588 }
589
590 /*
591  * Delete a hole in the record
592  *
593  * This will do the hole split and is much restrict than add.
594  */
595 static int del_file_extent_hole(struct rb_root *holes,
596                                 u64 start, u64 len)
597 {
598         struct file_extent_hole *hole;
599         struct file_extent_hole tmp;
600         u64 prev_start = 0;
601         u64 prev_len = 0;
602         u64 next_start = 0;
603         u64 next_len = 0;
604         struct rb_node *node;
605         int have_prev = 0;
606         int have_next = 0;
607         int ret = 0;
608
609         tmp.start = start;
610         tmp.len = len;
611         node = rb_search(holes, &tmp, compare_hole_range, NULL);
612         if (!node)
613                 return -EEXIST;
614         hole = rb_entry(node, struct file_extent_hole, node);
615         if (start + len > hole->start + hole->len)
616                 return -EEXIST;
617
618         /*
619          * Now there will be no overlap, delete the hole and re-add the
620          * split(s) if they exists.
621          */
622         if (start > hole->start) {
623                 prev_start = hole->start;
624                 prev_len = start - hole->start;
625                 have_prev = 1;
626         }
627         if (hole->start + hole->len > start + len) {
628                 next_start = start + len;
629                 next_len = hole->start + hole->len - start - len;
630                 have_next = 1;
631         }
632         rb_erase(node, holes);
633         free(hole);
634         if (have_prev) {
635                 ret = add_file_extent_hole(holes, prev_start, prev_len);
636                 if (ret < 0)
637                         return ret;
638         }
639         if (have_next) {
640                 ret = add_file_extent_hole(holes, next_start, next_len);
641                 if (ret < 0)
642                         return ret;
643         }
644         return 0;
645 }
646
647 static int copy_file_extent_holes(struct rb_root *dst,
648                                   struct rb_root *src)
649 {
650         struct file_extent_hole *hole;
651         struct rb_node *node;
652         int ret = 0;
653
654         node = rb_first(src);
655         while (node) {
656                 hole = rb_entry(node, struct file_extent_hole, node);
657                 ret = add_file_extent_hole(dst, hole->start, hole->len);
658                 if (ret)
659                         break;
660                 node = rb_next(node);
661         }
662         return ret;
663 }
664
665 static void free_file_extent_holes(struct rb_root *holes)
666 {
667         struct rb_node *node;
668         struct file_extent_hole *hole;
669
670         node = rb_first(holes);
671         while (node) {
672                 hole = rb_entry(node, struct file_extent_hole, node);
673                 rb_erase(node, holes);
674                 free(hole);
675                 node = rb_first(holes);
676         }
677 }
678
679 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
680
681 static void record_root_in_trans(struct btrfs_trans_handle *trans,
682                                  struct btrfs_root *root)
683 {
684         if (root->last_trans != trans->transid) {
685                 root->track_dirty = 1;
686                 root->last_trans = trans->transid;
687                 root->commit_root = root->node;
688                 extent_buffer_get(root->node);
689         }
690 }
691
692 static u8 imode_to_type(u32 imode)
693 {
694 #define S_SHIFT 12
695         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
696                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
697                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
698                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
699                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
700                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
701                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
702                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
703         };
704
705         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
706 #undef S_SHIFT
707 }
708
709 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
710 {
711         struct device_record *rec1;
712         struct device_record *rec2;
713
714         rec1 = rb_entry(node1, struct device_record, node);
715         rec2 = rb_entry(node2, struct device_record, node);
716         if (rec1->devid > rec2->devid)
717                 return -1;
718         else if (rec1->devid < rec2->devid)
719                 return 1;
720         else
721                 return 0;
722 }
723
724 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
725 {
726         struct inode_record *rec;
727         struct inode_backref *backref;
728         struct inode_backref *orig;
729         struct inode_backref *tmp;
730         struct orphan_data_extent *src_orphan;
731         struct orphan_data_extent *dst_orphan;
732         size_t size;
733         int ret;
734
735         rec = malloc(sizeof(*rec));
736         if (!rec)
737                 return ERR_PTR(-ENOMEM);
738         memcpy(rec, orig_rec, sizeof(*rec));
739         rec->refs = 1;
740         INIT_LIST_HEAD(&rec->backrefs);
741         INIT_LIST_HEAD(&rec->orphan_extents);
742         rec->holes = RB_ROOT;
743
744         list_for_each_entry(orig, &orig_rec->backrefs, list) {
745                 size = sizeof(*orig) + orig->namelen + 1;
746                 backref = malloc(size);
747                 if (!backref) {
748                         ret = -ENOMEM;
749                         goto cleanup;
750                 }
751                 memcpy(backref, orig, size);
752                 list_add_tail(&backref->list, &rec->backrefs);
753         }
754         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
755                 dst_orphan = malloc(sizeof(*dst_orphan));
756                 if (!dst_orphan) {
757                         ret = -ENOMEM;
758                         goto cleanup;
759                 }
760                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
761                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
762         }
763         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
764         BUG_ON(ret < 0);
765
766         return rec;
767
768 cleanup:
769         if (!list_empty(&rec->backrefs))
770                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
771                         list_del(&orig->list);
772                         free(orig);
773                 }
774
775         if (!list_empty(&rec->orphan_extents))
776                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
777                         list_del(&orig->list);
778                         free(orig);
779                 }
780
781         free(rec);
782
783         return ERR_PTR(ret);
784 }
785
786 static void print_orphan_data_extents(struct list_head *orphan_extents,
787                                       u64 objectid)
788 {
789         struct orphan_data_extent *orphan;
790
791         if (list_empty(orphan_extents))
792                 return;
793         printf("The following data extent is lost in tree %llu:\n",
794                objectid);
795         list_for_each_entry(orphan, orphan_extents, list) {
796                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
797                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
798                        orphan->disk_len);
799         }
800 }
801
802 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
803 {
804         u64 root_objectid = root->root_key.objectid;
805         int errors = rec->errors;
806
807         if (!errors)
808                 return;
809         /* reloc root errors, we print its corresponding fs root objectid*/
810         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
811                 root_objectid = root->root_key.offset;
812                 fprintf(stderr, "reloc");
813         }
814         fprintf(stderr, "root %llu inode %llu errors %x",
815                 (unsigned long long) root_objectid,
816                 (unsigned long long) rec->ino, rec->errors);
817
818         if (errors & I_ERR_NO_INODE_ITEM)
819                 fprintf(stderr, ", no inode item");
820         if (errors & I_ERR_NO_ORPHAN_ITEM)
821                 fprintf(stderr, ", no orphan item");
822         if (errors & I_ERR_DUP_INODE_ITEM)
823                 fprintf(stderr, ", dup inode item");
824         if (errors & I_ERR_DUP_DIR_INDEX)
825                 fprintf(stderr, ", dup dir index");
826         if (errors & I_ERR_ODD_DIR_ITEM)
827                 fprintf(stderr, ", odd dir item");
828         if (errors & I_ERR_ODD_FILE_EXTENT)
829                 fprintf(stderr, ", odd file extent");
830         if (errors & I_ERR_BAD_FILE_EXTENT)
831                 fprintf(stderr, ", bad file extent");
832         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
833                 fprintf(stderr, ", file extent overlap");
834         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
835                 fprintf(stderr, ", file extent discount");
836         if (errors & I_ERR_DIR_ISIZE_WRONG)
837                 fprintf(stderr, ", dir isize wrong");
838         if (errors & I_ERR_FILE_NBYTES_WRONG)
839                 fprintf(stderr, ", nbytes wrong");
840         if (errors & I_ERR_ODD_CSUM_ITEM)
841                 fprintf(stderr, ", odd csum item");
842         if (errors & I_ERR_SOME_CSUM_MISSING)
843                 fprintf(stderr, ", some csum missing");
844         if (errors & I_ERR_LINK_COUNT_WRONG)
845                 fprintf(stderr, ", link count wrong");
846         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
847                 fprintf(stderr, ", orphan file extent");
848         fprintf(stderr, "\n");
849         /* Print the orphan extents if needed */
850         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
851                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
852
853         /* Print the holes if needed */
854         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
855                 struct file_extent_hole *hole;
856                 struct rb_node *node;
857                 int found = 0;
858
859                 node = rb_first(&rec->holes);
860                 fprintf(stderr, "Found file extent holes:\n");
861                 while (node) {
862                         found = 1;
863                         hole = rb_entry(node, struct file_extent_hole, node);
864                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
865                                 hole->start, hole->len);
866                         node = rb_next(node);
867                 }
868                 if (!found)
869                         fprintf(stderr, "\tstart: 0, len: %llu\n",
870                                 round_up(rec->isize, root->sectorsize));
871         }
872 }
873
874 static void print_ref_error(int errors)
875 {
876         if (errors & REF_ERR_NO_DIR_ITEM)
877                 fprintf(stderr, ", no dir item");
878         if (errors & REF_ERR_NO_DIR_INDEX)
879                 fprintf(stderr, ", no dir index");
880         if (errors & REF_ERR_NO_INODE_REF)
881                 fprintf(stderr, ", no inode ref");
882         if (errors & REF_ERR_DUP_DIR_ITEM)
883                 fprintf(stderr, ", dup dir item");
884         if (errors & REF_ERR_DUP_DIR_INDEX)
885                 fprintf(stderr, ", dup dir index");
886         if (errors & REF_ERR_DUP_INODE_REF)
887                 fprintf(stderr, ", dup inode ref");
888         if (errors & REF_ERR_INDEX_UNMATCH)
889                 fprintf(stderr, ", index mismatch");
890         if (errors & REF_ERR_FILETYPE_UNMATCH)
891                 fprintf(stderr, ", filetype mismatch");
892         if (errors & REF_ERR_NAME_TOO_LONG)
893                 fprintf(stderr, ", name too long");
894         if (errors & REF_ERR_NO_ROOT_REF)
895                 fprintf(stderr, ", no root ref");
896         if (errors & REF_ERR_NO_ROOT_BACKREF)
897                 fprintf(stderr, ", no root backref");
898         if (errors & REF_ERR_DUP_ROOT_REF)
899                 fprintf(stderr, ", dup root ref");
900         if (errors & REF_ERR_DUP_ROOT_BACKREF)
901                 fprintf(stderr, ", dup root backref");
902         fprintf(stderr, "\n");
903 }
904
905 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
906                                           u64 ino, int mod)
907 {
908         struct ptr_node *node;
909         struct cache_extent *cache;
910         struct inode_record *rec = NULL;
911         int ret;
912
913         cache = lookup_cache_extent(inode_cache, ino, 1);
914         if (cache) {
915                 node = container_of(cache, struct ptr_node, cache);
916                 rec = node->data;
917                 if (mod && rec->refs > 1) {
918                         node->data = clone_inode_rec(rec);
919                         if (IS_ERR(node->data))
920                                 return node->data;
921                         rec->refs--;
922                         rec = node->data;
923                 }
924         } else if (mod) {
925                 rec = calloc(1, sizeof(*rec));
926                 if (!rec)
927                         return ERR_PTR(-ENOMEM);
928                 rec->ino = ino;
929                 rec->extent_start = (u64)-1;
930                 rec->refs = 1;
931                 INIT_LIST_HEAD(&rec->backrefs);
932                 INIT_LIST_HEAD(&rec->orphan_extents);
933                 rec->holes = RB_ROOT;
934
935                 node = malloc(sizeof(*node));
936                 if (!node) {
937                         free(rec);
938                         return ERR_PTR(-ENOMEM);
939                 }
940                 node->cache.start = ino;
941                 node->cache.size = 1;
942                 node->data = rec;
943
944                 if (ino == BTRFS_FREE_INO_OBJECTID)
945                         rec->found_link = 1;
946
947                 ret = insert_cache_extent(inode_cache, &node->cache);
948                 if (ret)
949                         return ERR_PTR(-EEXIST);
950         }
951         return rec;
952 }
953
954 static void free_orphan_data_extents(struct list_head *orphan_extents)
955 {
956         struct orphan_data_extent *orphan;
957
958         while (!list_empty(orphan_extents)) {
959                 orphan = list_entry(orphan_extents->next,
960                                     struct orphan_data_extent, list);
961                 list_del(&orphan->list);
962                 free(orphan);
963         }
964 }
965
966 static void free_inode_rec(struct inode_record *rec)
967 {
968         struct inode_backref *backref;
969
970         if (--rec->refs > 0)
971                 return;
972
973         while (!list_empty(&rec->backrefs)) {
974                 backref = to_inode_backref(rec->backrefs.next);
975                 list_del(&backref->list);
976                 free(backref);
977         }
978         free_orphan_data_extents(&rec->orphan_extents);
979         free_file_extent_holes(&rec->holes);
980         free(rec);
981 }
982
983 static int can_free_inode_rec(struct inode_record *rec)
984 {
985         if (!rec->errors && rec->checked && rec->found_inode_item &&
986             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
987                 return 1;
988         return 0;
989 }
990
991 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
992                                  struct inode_record *rec)
993 {
994         struct cache_extent *cache;
995         struct inode_backref *tmp, *backref;
996         struct ptr_node *node;
997         unsigned char filetype;
998
999         if (!rec->found_inode_item)
1000                 return;
1001
1002         filetype = imode_to_type(rec->imode);
1003         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1004                 if (backref->found_dir_item && backref->found_dir_index) {
1005                         if (backref->filetype != filetype)
1006                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1007                         if (!backref->errors && backref->found_inode_ref &&
1008                             rec->nlink == rec->found_link) {
1009                                 list_del(&backref->list);
1010                                 free(backref);
1011                         }
1012                 }
1013         }
1014
1015         if (!rec->checked || rec->merging)
1016                 return;
1017
1018         if (S_ISDIR(rec->imode)) {
1019                 if (rec->found_size != rec->isize)
1020                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1021                 if (rec->found_file_extent)
1022                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1023         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1024                 if (rec->found_dir_item)
1025                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1026                 if (rec->found_size != rec->nbytes)
1027                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1028                 if (rec->nlink > 0 && !no_holes &&
1029                     (rec->extent_end < rec->isize ||
1030                      first_extent_gap(&rec->holes) < rec->isize))
1031                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1032         }
1033
1034         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1035                 if (rec->found_csum_item && rec->nodatasum)
1036                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1037                 if (rec->some_csum_missing && !rec->nodatasum)
1038                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1039         }
1040
1041         BUG_ON(rec->refs != 1);
1042         if (can_free_inode_rec(rec)) {
1043                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1044                 node = container_of(cache, struct ptr_node, cache);
1045                 BUG_ON(node->data != rec);
1046                 remove_cache_extent(inode_cache, &node->cache);
1047                 free(node);
1048                 free_inode_rec(rec);
1049         }
1050 }
1051
1052 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1053 {
1054         struct btrfs_path path;
1055         struct btrfs_key key;
1056         int ret;
1057
1058         key.objectid = BTRFS_ORPHAN_OBJECTID;
1059         key.type = BTRFS_ORPHAN_ITEM_KEY;
1060         key.offset = ino;
1061
1062         btrfs_init_path(&path);
1063         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1064         btrfs_release_path(&path);
1065         if (ret > 0)
1066                 ret = -ENOENT;
1067         return ret;
1068 }
1069
1070 static int process_inode_item(struct extent_buffer *eb,
1071                               int slot, struct btrfs_key *key,
1072                               struct shared_node *active_node)
1073 {
1074         struct inode_record *rec;
1075         struct btrfs_inode_item *item;
1076
1077         rec = active_node->current;
1078         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1079         if (rec->found_inode_item) {
1080                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1081                 return 1;
1082         }
1083         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1084         rec->nlink = btrfs_inode_nlink(eb, item);
1085         rec->isize = btrfs_inode_size(eb, item);
1086         rec->nbytes = btrfs_inode_nbytes(eb, item);
1087         rec->imode = btrfs_inode_mode(eb, item);
1088         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1089                 rec->nodatasum = 1;
1090         rec->found_inode_item = 1;
1091         if (rec->nlink == 0)
1092                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1093         maybe_free_inode_rec(&active_node->inode_cache, rec);
1094         return 0;
1095 }
1096
1097 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1098                                                 const char *name,
1099                                                 int namelen, u64 dir)
1100 {
1101         struct inode_backref *backref;
1102
1103         list_for_each_entry(backref, &rec->backrefs, list) {
1104                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1105                         break;
1106                 if (backref->dir != dir || backref->namelen != namelen)
1107                         continue;
1108                 if (memcmp(name, backref->name, namelen))
1109                         continue;
1110                 return backref;
1111         }
1112
1113         backref = malloc(sizeof(*backref) + namelen + 1);
1114         if (!backref)
1115                 return NULL;
1116         memset(backref, 0, sizeof(*backref));
1117         backref->dir = dir;
1118         backref->namelen = namelen;
1119         memcpy(backref->name, name, namelen);
1120         backref->name[namelen] = '\0';
1121         list_add_tail(&backref->list, &rec->backrefs);
1122         return backref;
1123 }
1124
1125 static int add_inode_backref(struct cache_tree *inode_cache,
1126                              u64 ino, u64 dir, u64 index,
1127                              const char *name, int namelen,
1128                              int filetype, int itemtype, int errors)
1129 {
1130         struct inode_record *rec;
1131         struct inode_backref *backref;
1132
1133         rec = get_inode_rec(inode_cache, ino, 1);
1134         BUG_ON(IS_ERR(rec));
1135         backref = get_inode_backref(rec, name, namelen, dir);
1136         BUG_ON(!backref);
1137         if (errors)
1138                 backref->errors |= errors;
1139         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1140                 if (backref->found_dir_index)
1141                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1142                 if (backref->found_inode_ref && backref->index != index)
1143                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1144                 if (backref->found_dir_item && backref->filetype != filetype)
1145                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1146
1147                 backref->index = index;
1148                 backref->filetype = filetype;
1149                 backref->found_dir_index = 1;
1150         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1151                 rec->found_link++;
1152                 if (backref->found_dir_item)
1153                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1154                 if (backref->found_dir_index && backref->filetype != filetype)
1155                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1156
1157                 backref->filetype = filetype;
1158                 backref->found_dir_item = 1;
1159         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1160                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1161                 if (backref->found_inode_ref)
1162                         backref->errors |= REF_ERR_DUP_INODE_REF;
1163                 if (backref->found_dir_index && backref->index != index)
1164                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1165                 else
1166                         backref->index = index;
1167
1168                 backref->ref_type = itemtype;
1169                 backref->found_inode_ref = 1;
1170         } else {
1171                 BUG_ON(1);
1172         }
1173
1174         maybe_free_inode_rec(inode_cache, rec);
1175         return 0;
1176 }
1177
1178 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1179                             struct cache_tree *dst_cache)
1180 {
1181         struct inode_backref *backref;
1182         u32 dir_count = 0;
1183         int ret = 0;
1184
1185         dst->merging = 1;
1186         list_for_each_entry(backref, &src->backrefs, list) {
1187                 if (backref->found_dir_index) {
1188                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1189                                         backref->index, backref->name,
1190                                         backref->namelen, backref->filetype,
1191                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1192                 }
1193                 if (backref->found_dir_item) {
1194                         dir_count++;
1195                         add_inode_backref(dst_cache, dst->ino,
1196                                         backref->dir, 0, backref->name,
1197                                         backref->namelen, backref->filetype,
1198                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1199                 }
1200                 if (backref->found_inode_ref) {
1201                         add_inode_backref(dst_cache, dst->ino,
1202                                         backref->dir, backref->index,
1203                                         backref->name, backref->namelen, 0,
1204                                         backref->ref_type, backref->errors);
1205                 }
1206         }
1207
1208         if (src->found_dir_item)
1209                 dst->found_dir_item = 1;
1210         if (src->found_file_extent)
1211                 dst->found_file_extent = 1;
1212         if (src->found_csum_item)
1213                 dst->found_csum_item = 1;
1214         if (src->some_csum_missing)
1215                 dst->some_csum_missing = 1;
1216         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1217                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1218                 if (ret < 0)
1219                         return ret;
1220         }
1221
1222         BUG_ON(src->found_link < dir_count);
1223         dst->found_link += src->found_link - dir_count;
1224         dst->found_size += src->found_size;
1225         if (src->extent_start != (u64)-1) {
1226                 if (dst->extent_start == (u64)-1) {
1227                         dst->extent_start = src->extent_start;
1228                         dst->extent_end = src->extent_end;
1229                 } else {
1230                         if (dst->extent_end > src->extent_start)
1231                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1232                         else if (dst->extent_end < src->extent_start) {
1233                                 ret = add_file_extent_hole(&dst->holes,
1234                                         dst->extent_end,
1235                                         src->extent_start - dst->extent_end);
1236                         }
1237                         if (dst->extent_end < src->extent_end)
1238                                 dst->extent_end = src->extent_end;
1239                 }
1240         }
1241
1242         dst->errors |= src->errors;
1243         if (src->found_inode_item) {
1244                 if (!dst->found_inode_item) {
1245                         dst->nlink = src->nlink;
1246                         dst->isize = src->isize;
1247                         dst->nbytes = src->nbytes;
1248                         dst->imode = src->imode;
1249                         dst->nodatasum = src->nodatasum;
1250                         dst->found_inode_item = 1;
1251                 } else {
1252                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1253                 }
1254         }
1255         dst->merging = 0;
1256
1257         return 0;
1258 }
1259
1260 static int splice_shared_node(struct shared_node *src_node,
1261                               struct shared_node *dst_node)
1262 {
1263         struct cache_extent *cache;
1264         struct ptr_node *node, *ins;
1265         struct cache_tree *src, *dst;
1266         struct inode_record *rec, *conflict;
1267         u64 current_ino = 0;
1268         int splice = 0;
1269         int ret;
1270
1271         if (--src_node->refs == 0)
1272                 splice = 1;
1273         if (src_node->current)
1274                 current_ino = src_node->current->ino;
1275
1276         src = &src_node->root_cache;
1277         dst = &dst_node->root_cache;
1278 again:
1279         cache = search_cache_extent(src, 0);
1280         while (cache) {
1281                 node = container_of(cache, struct ptr_node, cache);
1282                 rec = node->data;
1283                 cache = next_cache_extent(cache);
1284
1285                 if (splice) {
1286                         remove_cache_extent(src, &node->cache);
1287                         ins = node;
1288                 } else {
1289                         ins = malloc(sizeof(*ins));
1290                         BUG_ON(!ins);
1291                         ins->cache.start = node->cache.start;
1292                         ins->cache.size = node->cache.size;
1293                         ins->data = rec;
1294                         rec->refs++;
1295                 }
1296                 ret = insert_cache_extent(dst, &ins->cache);
1297                 if (ret == -EEXIST) {
1298                         conflict = get_inode_rec(dst, rec->ino, 1);
1299                         BUG_ON(IS_ERR(conflict));
1300                         merge_inode_recs(rec, conflict, dst);
1301                         if (rec->checked) {
1302                                 conflict->checked = 1;
1303                                 if (dst_node->current == conflict)
1304                                         dst_node->current = NULL;
1305                         }
1306                         maybe_free_inode_rec(dst, conflict);
1307                         free_inode_rec(rec);
1308                         free(ins);
1309                 } else {
1310                         BUG_ON(ret);
1311                 }
1312         }
1313
1314         if (src == &src_node->root_cache) {
1315                 src = &src_node->inode_cache;
1316                 dst = &dst_node->inode_cache;
1317                 goto again;
1318         }
1319
1320         if (current_ino > 0 && (!dst_node->current ||
1321             current_ino > dst_node->current->ino)) {
1322                 if (dst_node->current) {
1323                         dst_node->current->checked = 1;
1324                         maybe_free_inode_rec(dst, dst_node->current);
1325                 }
1326                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1327                 BUG_ON(IS_ERR(dst_node->current));
1328         }
1329         return 0;
1330 }
1331
1332 static void free_inode_ptr(struct cache_extent *cache)
1333 {
1334         struct ptr_node *node;
1335         struct inode_record *rec;
1336
1337         node = container_of(cache, struct ptr_node, cache);
1338         rec = node->data;
1339         free_inode_rec(rec);
1340         free(node);
1341 }
1342
1343 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1344
1345 static struct shared_node *find_shared_node(struct cache_tree *shared,
1346                                             u64 bytenr)
1347 {
1348         struct cache_extent *cache;
1349         struct shared_node *node;
1350
1351         cache = lookup_cache_extent(shared, bytenr, 1);
1352         if (cache) {
1353                 node = container_of(cache, struct shared_node, cache);
1354                 return node;
1355         }
1356         return NULL;
1357 }
1358
1359 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1360 {
1361         int ret;
1362         struct shared_node *node;
1363
1364         node = calloc(1, sizeof(*node));
1365         if (!node)
1366                 return -ENOMEM;
1367         node->cache.start = bytenr;
1368         node->cache.size = 1;
1369         cache_tree_init(&node->root_cache);
1370         cache_tree_init(&node->inode_cache);
1371         node->refs = refs;
1372
1373         ret = insert_cache_extent(shared, &node->cache);
1374
1375         return ret;
1376 }
1377
1378 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1379                              struct walk_control *wc, int level)
1380 {
1381         struct shared_node *node;
1382         struct shared_node *dest;
1383         int ret;
1384
1385         if (level == wc->active_node)
1386                 return 0;
1387
1388         BUG_ON(wc->active_node <= level);
1389         node = find_shared_node(&wc->shared, bytenr);
1390         if (!node) {
1391                 ret = add_shared_node(&wc->shared, bytenr, refs);
1392                 BUG_ON(ret);
1393                 node = find_shared_node(&wc->shared, bytenr);
1394                 wc->nodes[level] = node;
1395                 wc->active_node = level;
1396                 return 0;
1397         }
1398
1399         if (wc->root_level == wc->active_node &&
1400             btrfs_root_refs(&root->root_item) == 0) {
1401                 if (--node->refs == 0) {
1402                         free_inode_recs_tree(&node->root_cache);
1403                         free_inode_recs_tree(&node->inode_cache);
1404                         remove_cache_extent(&wc->shared, &node->cache);
1405                         free(node);
1406                 }
1407                 return 1;
1408         }
1409
1410         dest = wc->nodes[wc->active_node];
1411         splice_shared_node(node, dest);
1412         if (node->refs == 0) {
1413                 remove_cache_extent(&wc->shared, &node->cache);
1414                 free(node);
1415         }
1416         return 1;
1417 }
1418
1419 static int leave_shared_node(struct btrfs_root *root,
1420                              struct walk_control *wc, int level)
1421 {
1422         struct shared_node *node;
1423         struct shared_node *dest;
1424         int i;
1425
1426         if (level == wc->root_level)
1427                 return 0;
1428
1429         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1430                 if (wc->nodes[i])
1431                         break;
1432         }
1433         BUG_ON(i >= BTRFS_MAX_LEVEL);
1434
1435         node = wc->nodes[wc->active_node];
1436         wc->nodes[wc->active_node] = NULL;
1437         wc->active_node = i;
1438
1439         dest = wc->nodes[wc->active_node];
1440         if (wc->active_node < wc->root_level ||
1441             btrfs_root_refs(&root->root_item) > 0) {
1442                 BUG_ON(node->refs <= 1);
1443                 splice_shared_node(node, dest);
1444         } else {
1445                 BUG_ON(node->refs < 2);
1446                 node->refs--;
1447         }
1448         return 0;
1449 }
1450
1451 /*
1452  * Returns:
1453  * < 0 - on error
1454  * 1   - if the root with id child_root_id is a child of root parent_root_id
1455  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1456  *       has other root(s) as parent(s)
1457  * 2   - if the root child_root_id doesn't have any parent roots
1458  */
1459 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1460                          u64 child_root_id)
1461 {
1462         struct btrfs_path path;
1463         struct btrfs_key key;
1464         struct extent_buffer *leaf;
1465         int has_parent = 0;
1466         int ret;
1467
1468         btrfs_init_path(&path);
1469
1470         key.objectid = parent_root_id;
1471         key.type = BTRFS_ROOT_REF_KEY;
1472         key.offset = child_root_id;
1473         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1474                                 0, 0);
1475         if (ret < 0)
1476                 return ret;
1477         btrfs_release_path(&path);
1478         if (!ret)
1479                 return 1;
1480
1481         key.objectid = child_root_id;
1482         key.type = BTRFS_ROOT_BACKREF_KEY;
1483         key.offset = 0;
1484         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1485                                 0, 0);
1486         if (ret < 0)
1487                 goto out;
1488
1489         while (1) {
1490                 leaf = path.nodes[0];
1491                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1492                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1493                         if (ret)
1494                                 break;
1495                         leaf = path.nodes[0];
1496                 }
1497
1498                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1499                 if (key.objectid != child_root_id ||
1500                     key.type != BTRFS_ROOT_BACKREF_KEY)
1501                         break;
1502
1503                 has_parent = 1;
1504
1505                 if (key.offset == parent_root_id) {
1506                         btrfs_release_path(&path);
1507                         return 1;
1508                 }
1509
1510                 path.slots[0]++;
1511         }
1512 out:
1513         btrfs_release_path(&path);
1514         if (ret < 0)
1515                 return ret;
1516         return has_parent ? 0 : 2;
1517 }
1518
1519 static int process_dir_item(struct btrfs_root *root,
1520                             struct extent_buffer *eb,
1521                             int slot, struct btrfs_key *key,
1522                             struct shared_node *active_node)
1523 {
1524         u32 total;
1525         u32 cur = 0;
1526         u32 len;
1527         u32 name_len;
1528         u32 data_len;
1529         int error;
1530         int nritems = 0;
1531         int filetype;
1532         struct btrfs_dir_item *di;
1533         struct inode_record *rec;
1534         struct cache_tree *root_cache;
1535         struct cache_tree *inode_cache;
1536         struct btrfs_key location;
1537         char namebuf[BTRFS_NAME_LEN];
1538
1539         root_cache = &active_node->root_cache;
1540         inode_cache = &active_node->inode_cache;
1541         rec = active_node->current;
1542         rec->found_dir_item = 1;
1543
1544         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1545         total = btrfs_item_size_nr(eb, slot);
1546         while (cur < total) {
1547                 nritems++;
1548                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1549                 name_len = btrfs_dir_name_len(eb, di);
1550                 data_len = btrfs_dir_data_len(eb, di);
1551                 filetype = btrfs_dir_type(eb, di);
1552
1553                 rec->found_size += name_len;
1554                 if (name_len <= BTRFS_NAME_LEN) {
1555                         len = name_len;
1556                         error = 0;
1557                 } else {
1558                         len = BTRFS_NAME_LEN;
1559                         error = REF_ERR_NAME_TOO_LONG;
1560                 }
1561                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1562
1563                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1564                         add_inode_backref(inode_cache, location.objectid,
1565                                           key->objectid, key->offset, namebuf,
1566                                           len, filetype, key->type, error);
1567                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1568                         add_inode_backref(root_cache, location.objectid,
1569                                           key->objectid, key->offset,
1570                                           namebuf, len, filetype,
1571                                           key->type, error);
1572                 } else {
1573                         fprintf(stderr, "invalid location in dir item %u\n",
1574                                 location.type);
1575                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1576                                           key->objectid, key->offset, namebuf,
1577                                           len, filetype, key->type, error);
1578                 }
1579
1580                 len = sizeof(*di) + name_len + data_len;
1581                 di = (struct btrfs_dir_item *)((char *)di + len);
1582                 cur += len;
1583         }
1584         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1585                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1586
1587         return 0;
1588 }
1589
1590 static int process_inode_ref(struct extent_buffer *eb,
1591                              int slot, struct btrfs_key *key,
1592                              struct shared_node *active_node)
1593 {
1594         u32 total;
1595         u32 cur = 0;
1596         u32 len;
1597         u32 name_len;
1598         u64 index;
1599         int error;
1600         struct cache_tree *inode_cache;
1601         struct btrfs_inode_ref *ref;
1602         char namebuf[BTRFS_NAME_LEN];
1603
1604         inode_cache = &active_node->inode_cache;
1605
1606         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1607         total = btrfs_item_size_nr(eb, slot);
1608         while (cur < total) {
1609                 name_len = btrfs_inode_ref_name_len(eb, ref);
1610                 index = btrfs_inode_ref_index(eb, ref);
1611                 if (name_len <= BTRFS_NAME_LEN) {
1612                         len = name_len;
1613                         error = 0;
1614                 } else {
1615                         len = BTRFS_NAME_LEN;
1616                         error = REF_ERR_NAME_TOO_LONG;
1617                 }
1618                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1619                 add_inode_backref(inode_cache, key->objectid, key->offset,
1620                                   index, namebuf, len, 0, key->type, error);
1621
1622                 len = sizeof(*ref) + name_len;
1623                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1624                 cur += len;
1625         }
1626         return 0;
1627 }
1628
1629 static int process_inode_extref(struct extent_buffer *eb,
1630                                 int slot, struct btrfs_key *key,
1631                                 struct shared_node *active_node)
1632 {
1633         u32 total;
1634         u32 cur = 0;
1635         u32 len;
1636         u32 name_len;
1637         u64 index;
1638         u64 parent;
1639         int error;
1640         struct cache_tree *inode_cache;
1641         struct btrfs_inode_extref *extref;
1642         char namebuf[BTRFS_NAME_LEN];
1643
1644         inode_cache = &active_node->inode_cache;
1645
1646         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1647         total = btrfs_item_size_nr(eb, slot);
1648         while (cur < total) {
1649                 name_len = btrfs_inode_extref_name_len(eb, extref);
1650                 index = btrfs_inode_extref_index(eb, extref);
1651                 parent = btrfs_inode_extref_parent(eb, extref);
1652                 if (name_len <= BTRFS_NAME_LEN) {
1653                         len = name_len;
1654                         error = 0;
1655                 } else {
1656                         len = BTRFS_NAME_LEN;
1657                         error = REF_ERR_NAME_TOO_LONG;
1658                 }
1659                 read_extent_buffer(eb, namebuf,
1660                                    (unsigned long)(extref + 1), len);
1661                 add_inode_backref(inode_cache, key->objectid, parent,
1662                                   index, namebuf, len, 0, key->type, error);
1663
1664                 len = sizeof(*extref) + name_len;
1665                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1666                 cur += len;
1667         }
1668         return 0;
1669
1670 }
1671
1672 static int count_csum_range(struct btrfs_root *root, u64 start,
1673                             u64 len, u64 *found)
1674 {
1675         struct btrfs_key key;
1676         struct btrfs_path path;
1677         struct extent_buffer *leaf;
1678         int ret;
1679         size_t size;
1680         *found = 0;
1681         u64 csum_end;
1682         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1683
1684         btrfs_init_path(&path);
1685
1686         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1687         key.offset = start;
1688         key.type = BTRFS_EXTENT_CSUM_KEY;
1689
1690         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1691                                 &key, &path, 0, 0);
1692         if (ret < 0)
1693                 goto out;
1694         if (ret > 0 && path.slots[0] > 0) {
1695                 leaf = path.nodes[0];
1696                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1697                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1698                     key.type == BTRFS_EXTENT_CSUM_KEY)
1699                         path.slots[0]--;
1700         }
1701
1702         while (len > 0) {
1703                 leaf = path.nodes[0];
1704                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1705                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1706                         if (ret > 0)
1707                                 break;
1708                         else if (ret < 0)
1709                                 goto out;
1710                         leaf = path.nodes[0];
1711                 }
1712
1713                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1714                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1715                     key.type != BTRFS_EXTENT_CSUM_KEY)
1716                         break;
1717
1718                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1719                 if (key.offset >= start + len)
1720                         break;
1721
1722                 if (key.offset > start)
1723                         start = key.offset;
1724
1725                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1726                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1727                 if (csum_end > start) {
1728                         size = min(csum_end - start, len);
1729                         len -= size;
1730                         start += size;
1731                         *found += size;
1732                 }
1733
1734                 path.slots[0]++;
1735         }
1736 out:
1737         btrfs_release_path(&path);
1738         if (ret < 0)
1739                 return ret;
1740         return 0;
1741 }
1742
1743 static int process_file_extent(struct btrfs_root *root,
1744                                 struct extent_buffer *eb,
1745                                 int slot, struct btrfs_key *key,
1746                                 struct shared_node *active_node)
1747 {
1748         struct inode_record *rec;
1749         struct btrfs_file_extent_item *fi;
1750         u64 num_bytes = 0;
1751         u64 disk_bytenr = 0;
1752         u64 extent_offset = 0;
1753         u64 mask = root->sectorsize - 1;
1754         int extent_type;
1755         int ret;
1756
1757         rec = active_node->current;
1758         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1759         rec->found_file_extent = 1;
1760
1761         if (rec->extent_start == (u64)-1) {
1762                 rec->extent_start = key->offset;
1763                 rec->extent_end = key->offset;
1764         }
1765
1766         if (rec->extent_end > key->offset)
1767                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1768         else if (rec->extent_end < key->offset) {
1769                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1770                                            key->offset - rec->extent_end);
1771                 if (ret < 0)
1772                         return ret;
1773         }
1774
1775         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1776         extent_type = btrfs_file_extent_type(eb, fi);
1777
1778         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1779                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1780                 if (num_bytes == 0)
1781                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1782                 rec->found_size += num_bytes;
1783                 num_bytes = (num_bytes + mask) & ~mask;
1784         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1785                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1786                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1787                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1788                 extent_offset = btrfs_file_extent_offset(eb, fi);
1789                 if (num_bytes == 0 || (num_bytes & mask))
1790                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1791                 if (num_bytes + extent_offset >
1792                     btrfs_file_extent_ram_bytes(eb, fi))
1793                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1794                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1795                     (btrfs_file_extent_compression(eb, fi) ||
1796                      btrfs_file_extent_encryption(eb, fi) ||
1797                      btrfs_file_extent_other_encoding(eb, fi)))
1798                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1799                 if (disk_bytenr > 0)
1800                         rec->found_size += num_bytes;
1801         } else {
1802                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1803         }
1804         rec->extent_end = key->offset + num_bytes;
1805
1806         /*
1807          * The data reloc tree will copy full extents into its inode and then
1808          * copy the corresponding csums.  Because the extent it copied could be
1809          * a preallocated extent that hasn't been written to yet there may be no
1810          * csums to copy, ergo we won't have csums for our file extent.  This is
1811          * ok so just don't bother checking csums if the inode belongs to the
1812          * data reloc tree.
1813          */
1814         if (disk_bytenr > 0 &&
1815             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1816                 u64 found;
1817                 if (btrfs_file_extent_compression(eb, fi))
1818                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1819                 else
1820                         disk_bytenr += extent_offset;
1821
1822                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1823                 if (ret < 0)
1824                         return ret;
1825                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1826                         if (found > 0)
1827                                 rec->found_csum_item = 1;
1828                         if (found < num_bytes)
1829                                 rec->some_csum_missing = 1;
1830                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1831                         if (found > 0)
1832                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1833                 }
1834         }
1835         return 0;
1836 }
1837
1838 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1839                             struct walk_control *wc)
1840 {
1841         struct btrfs_key key;
1842         u32 nritems;
1843         int i;
1844         int ret = 0;
1845         struct cache_tree *inode_cache;
1846         struct shared_node *active_node;
1847
1848         if (wc->root_level == wc->active_node &&
1849             btrfs_root_refs(&root->root_item) == 0)
1850                 return 0;
1851
1852         active_node = wc->nodes[wc->active_node];
1853         inode_cache = &active_node->inode_cache;
1854         nritems = btrfs_header_nritems(eb);
1855         for (i = 0; i < nritems; i++) {
1856                 btrfs_item_key_to_cpu(eb, &key, i);
1857
1858                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1859                         continue;
1860                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1861                         continue;
1862
1863                 if (active_node->current == NULL ||
1864                     active_node->current->ino < key.objectid) {
1865                         if (active_node->current) {
1866                                 active_node->current->checked = 1;
1867                                 maybe_free_inode_rec(inode_cache,
1868                                                      active_node->current);
1869                         }
1870                         active_node->current = get_inode_rec(inode_cache,
1871                                                              key.objectid, 1);
1872                         BUG_ON(IS_ERR(active_node->current));
1873                 }
1874                 switch (key.type) {
1875                 case BTRFS_DIR_ITEM_KEY:
1876                 case BTRFS_DIR_INDEX_KEY:
1877                         ret = process_dir_item(root, eb, i, &key, active_node);
1878                         break;
1879                 case BTRFS_INODE_REF_KEY:
1880                         ret = process_inode_ref(eb, i, &key, active_node);
1881                         break;
1882                 case BTRFS_INODE_EXTREF_KEY:
1883                         ret = process_inode_extref(eb, i, &key, active_node);
1884                         break;
1885                 case BTRFS_INODE_ITEM_KEY:
1886                         ret = process_inode_item(eb, i, &key, active_node);
1887                         break;
1888                 case BTRFS_EXTENT_DATA_KEY:
1889                         ret = process_file_extent(root, eb, i, &key,
1890                                                   active_node);
1891                         break;
1892                 default:
1893                         break;
1894                 };
1895         }
1896         return ret;
1897 }
1898
1899 static void reada_walk_down(struct btrfs_root *root,
1900                             struct extent_buffer *node, int slot)
1901 {
1902         u64 bytenr;
1903         u64 ptr_gen;
1904         u32 nritems;
1905         u32 blocksize;
1906         int i;
1907         int level;
1908
1909         level = btrfs_header_level(node);
1910         if (level != 1)
1911                 return;
1912
1913         nritems = btrfs_header_nritems(node);
1914         blocksize = root->nodesize;
1915         for (i = slot; i < nritems; i++) {
1916                 bytenr = btrfs_node_blockptr(node, i);
1917                 ptr_gen = btrfs_node_ptr_generation(node, i);
1918                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1919         }
1920 }
1921
1922 /*
1923  * Check the child node/leaf by the following condition:
1924  * 1. the first item key of the node/leaf should be the same with the one
1925  *    in parent.
1926  * 2. block in parent node should match the child node/leaf.
1927  * 3. generation of parent node and child's header should be consistent.
1928  *
1929  * Or the child node/leaf pointed by the key in parent is not valid.
1930  *
1931  * We hope to check leaf owner too, but since subvol may share leaves,
1932  * which makes leaf owner check not so strong, key check should be
1933  * sufficient enough for that case.
1934  */
1935 static int check_child_node(struct btrfs_root *root,
1936                             struct extent_buffer *parent, int slot,
1937                             struct extent_buffer *child)
1938 {
1939         struct btrfs_key parent_key;
1940         struct btrfs_key child_key;
1941         int ret = 0;
1942
1943         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1944         if (btrfs_header_level(child) == 0)
1945                 btrfs_item_key_to_cpu(child, &child_key, 0);
1946         else
1947                 btrfs_node_key_to_cpu(child, &child_key, 0);
1948
1949         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1950                 ret = -EINVAL;
1951                 fprintf(stderr,
1952                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1953                         parent_key.objectid, parent_key.type, parent_key.offset,
1954                         child_key.objectid, child_key.type, child_key.offset);
1955         }
1956         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1957                 ret = -EINVAL;
1958                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1959                         btrfs_node_blockptr(parent, slot),
1960                         btrfs_header_bytenr(child));
1961         }
1962         if (btrfs_node_ptr_generation(parent, slot) !=
1963             btrfs_header_generation(child)) {
1964                 ret = -EINVAL;
1965                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1966                         btrfs_header_generation(child),
1967                         btrfs_node_ptr_generation(parent, slot));
1968         }
1969         return ret;
1970 }
1971
1972 struct node_refs {
1973         u64 bytenr[BTRFS_MAX_LEVEL];
1974         u64 refs[BTRFS_MAX_LEVEL];
1975 };
1976
1977 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1978                           struct walk_control *wc, int *level,
1979                           struct node_refs *nrefs)
1980 {
1981         enum btrfs_tree_block_status status;
1982         u64 bytenr;
1983         u64 ptr_gen;
1984         struct extent_buffer *next;
1985         struct extent_buffer *cur;
1986         u32 blocksize;
1987         int ret, err = 0;
1988         u64 refs;
1989
1990         WARN_ON(*level < 0);
1991         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1992
1993         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1994                 refs = nrefs->refs[*level];
1995                 ret = 0;
1996         } else {
1997                 ret = btrfs_lookup_extent_info(NULL, root,
1998                                        path->nodes[*level]->start,
1999                                        *level, 1, &refs, NULL);
2000                 if (ret < 0) {
2001                         err = ret;
2002                         goto out;
2003                 }
2004                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2005                 nrefs->refs[*level] = refs;
2006         }
2007
2008         if (refs > 1) {
2009                 ret = enter_shared_node(root, path->nodes[*level]->start,
2010                                         refs, wc, *level);
2011                 if (ret > 0) {
2012                         err = ret;
2013                         goto out;
2014                 }
2015         }
2016
2017         while (*level >= 0) {
2018                 WARN_ON(*level < 0);
2019                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2020                 cur = path->nodes[*level];
2021
2022                 if (btrfs_header_level(cur) != *level)
2023                         WARN_ON(1);
2024
2025                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2026                         break;
2027                 if (*level == 0) {
2028                         ret = process_one_leaf(root, cur, wc);
2029                         if (ret < 0)
2030                                 err = ret;
2031                         break;
2032                 }
2033                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2034                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2035                 blocksize = root->nodesize;
2036
2037                 if (bytenr == nrefs->bytenr[*level - 1]) {
2038                         refs = nrefs->refs[*level - 1];
2039                 } else {
2040                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2041                                         *level - 1, 1, &refs, NULL);
2042                         if (ret < 0) {
2043                                 refs = 0;
2044                         } else {
2045                                 nrefs->bytenr[*level - 1] = bytenr;
2046                                 nrefs->refs[*level - 1] = refs;
2047                         }
2048                 }
2049
2050                 if (refs > 1) {
2051                         ret = enter_shared_node(root, bytenr, refs,
2052                                                 wc, *level - 1);
2053                         if (ret > 0) {
2054                                 path->slots[*level]++;
2055                                 continue;
2056                         }
2057                 }
2058
2059                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2060                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2061                         free_extent_buffer(next);
2062                         reada_walk_down(root, cur, path->slots[*level]);
2063                         next = read_tree_block(root, bytenr, blocksize,
2064                                                ptr_gen);
2065                         if (!extent_buffer_uptodate(next)) {
2066                                 struct btrfs_key node_key;
2067
2068                                 btrfs_node_key_to_cpu(path->nodes[*level],
2069                                                       &node_key,
2070                                                       path->slots[*level]);
2071                                 btrfs_add_corrupt_extent_record(root->fs_info,
2072                                                 &node_key,
2073                                                 path->nodes[*level]->start,
2074                                                 root->nodesize, *level);
2075                                 err = -EIO;
2076                                 goto out;
2077                         }
2078                 }
2079
2080                 ret = check_child_node(root, cur, path->slots[*level], next);
2081                 if (ret) {
2082                         err = ret;
2083                         goto out;
2084                 }
2085
2086                 if (btrfs_is_leaf(next))
2087                         status = btrfs_check_leaf(root, NULL, next);
2088                 else
2089                         status = btrfs_check_node(root, NULL, next);
2090                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2091                         free_extent_buffer(next);
2092                         err = -EIO;
2093                         goto out;
2094                 }
2095
2096                 *level = *level - 1;
2097                 free_extent_buffer(path->nodes[*level]);
2098                 path->nodes[*level] = next;
2099                 path->slots[*level] = 0;
2100         }
2101 out:
2102         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2103         return err;
2104 }
2105
2106 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2107                         struct walk_control *wc, int *level)
2108 {
2109         int i;
2110         struct extent_buffer *leaf;
2111
2112         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2113                 leaf = path->nodes[i];
2114                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2115                         path->slots[i]++;
2116                         *level = i;
2117                         return 0;
2118                 } else {
2119                         free_extent_buffer(path->nodes[*level]);
2120                         path->nodes[*level] = NULL;
2121                         BUG_ON(*level > wc->active_node);
2122                         if (*level == wc->active_node)
2123                                 leave_shared_node(root, wc, *level);
2124                         *level = i + 1;
2125                 }
2126         }
2127         return 1;
2128 }
2129
2130 static int check_root_dir(struct inode_record *rec)
2131 {
2132         struct inode_backref *backref;
2133         int ret = -1;
2134
2135         if (!rec->found_inode_item || rec->errors)
2136                 goto out;
2137         if (rec->nlink != 1 || rec->found_link != 0)
2138                 goto out;
2139         if (list_empty(&rec->backrefs))
2140                 goto out;
2141         backref = to_inode_backref(rec->backrefs.next);
2142         if (!backref->found_inode_ref)
2143                 goto out;
2144         if (backref->index != 0 || backref->namelen != 2 ||
2145             memcmp(backref->name, "..", 2))
2146                 goto out;
2147         if (backref->found_dir_index || backref->found_dir_item)
2148                 goto out;
2149         ret = 0;
2150 out:
2151         return ret;
2152 }
2153
2154 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2155                               struct btrfs_root *root, struct btrfs_path *path,
2156                               struct inode_record *rec)
2157 {
2158         struct btrfs_inode_item *ei;
2159         struct btrfs_key key;
2160         int ret;
2161
2162         key.objectid = rec->ino;
2163         key.type = BTRFS_INODE_ITEM_KEY;
2164         key.offset = (u64)-1;
2165
2166         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2167         if (ret < 0)
2168                 goto out;
2169         if (ret) {
2170                 if (!path->slots[0]) {
2171                         ret = -ENOENT;
2172                         goto out;
2173                 }
2174                 path->slots[0]--;
2175                 ret = 0;
2176         }
2177         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2178         if (key.objectid != rec->ino) {
2179                 ret = -ENOENT;
2180                 goto out;
2181         }
2182
2183         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2184                             struct btrfs_inode_item);
2185         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2186         btrfs_mark_buffer_dirty(path->nodes[0]);
2187         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2188         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2189                root->root_key.objectid);
2190 out:
2191         btrfs_release_path(path);
2192         return ret;
2193 }
2194
2195 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2196                                     struct btrfs_root *root,
2197                                     struct btrfs_path *path,
2198                                     struct inode_record *rec)
2199 {
2200         int ret;
2201
2202         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2203         btrfs_release_path(path);
2204         if (!ret)
2205                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2206         return ret;
2207 }
2208
2209 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2210                                struct btrfs_root *root,
2211                                struct btrfs_path *path,
2212                                struct inode_record *rec)
2213 {
2214         struct btrfs_inode_item *ei;
2215         struct btrfs_key key;
2216         int ret = 0;
2217
2218         key.objectid = rec->ino;
2219         key.type = BTRFS_INODE_ITEM_KEY;
2220         key.offset = 0;
2221
2222         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2223         if (ret) {
2224                 if (ret > 0)
2225                         ret = -ENOENT;
2226                 goto out;
2227         }
2228
2229         /* Since ret == 0, no need to check anything */
2230         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2231                             struct btrfs_inode_item);
2232         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2233         btrfs_mark_buffer_dirty(path->nodes[0]);
2234         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2235         printf("reset nbytes for ino %llu root %llu\n",
2236                rec->ino, root->root_key.objectid);
2237 out:
2238         btrfs_release_path(path);
2239         return ret;
2240 }
2241
2242 static int add_missing_dir_index(struct btrfs_root *root,
2243                                  struct cache_tree *inode_cache,
2244                                  struct inode_record *rec,
2245                                  struct inode_backref *backref)
2246 {
2247         struct btrfs_path *path;
2248         struct btrfs_trans_handle *trans;
2249         struct btrfs_dir_item *dir_item;
2250         struct extent_buffer *leaf;
2251         struct btrfs_key key;
2252         struct btrfs_disk_key disk_key;
2253         struct inode_record *dir_rec;
2254         unsigned long name_ptr;
2255         u32 data_size = sizeof(*dir_item) + backref->namelen;
2256         int ret;
2257
2258         path = btrfs_alloc_path();
2259         if (!path)
2260                 return -ENOMEM;
2261
2262         trans = btrfs_start_transaction(root, 1);
2263         if (IS_ERR(trans)) {
2264                 btrfs_free_path(path);
2265                 return PTR_ERR(trans);
2266         }
2267
2268         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2269                 (unsigned long long)rec->ino);
2270         key.objectid = backref->dir;
2271         key.type = BTRFS_DIR_INDEX_KEY;
2272         key.offset = backref->index;
2273
2274         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2275         BUG_ON(ret);
2276
2277         leaf = path->nodes[0];
2278         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2279
2280         disk_key.objectid = cpu_to_le64(rec->ino);
2281         disk_key.type = BTRFS_INODE_ITEM_KEY;
2282         disk_key.offset = 0;
2283
2284         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2285         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2286         btrfs_set_dir_data_len(leaf, dir_item, 0);
2287         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2288         name_ptr = (unsigned long)(dir_item + 1);
2289         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2290         btrfs_mark_buffer_dirty(leaf);
2291         btrfs_free_path(path);
2292         btrfs_commit_transaction(trans, root);
2293
2294         backref->found_dir_index = 1;
2295         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2296         BUG_ON(IS_ERR(dir_rec));
2297         if (!dir_rec)
2298                 return 0;
2299         dir_rec->found_size += backref->namelen;
2300         if (dir_rec->found_size == dir_rec->isize &&
2301             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2302                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2303         if (dir_rec->found_size != dir_rec->isize)
2304                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2305
2306         return 0;
2307 }
2308
2309 static int delete_dir_index(struct btrfs_root *root,
2310                             struct cache_tree *inode_cache,
2311                             struct inode_record *rec,
2312                             struct inode_backref *backref)
2313 {
2314         struct btrfs_trans_handle *trans;
2315         struct btrfs_dir_item *di;
2316         struct btrfs_path *path;
2317         int ret = 0;
2318
2319         path = btrfs_alloc_path();
2320         if (!path)
2321                 return -ENOMEM;
2322
2323         trans = btrfs_start_transaction(root, 1);
2324         if (IS_ERR(trans)) {
2325                 btrfs_free_path(path);
2326                 return PTR_ERR(trans);
2327         }
2328
2329
2330         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2331                 (unsigned long long)backref->dir,
2332                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2333                 (unsigned long long)root->objectid);
2334
2335         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2336                                     backref->name, backref->namelen,
2337                                     backref->index, -1);
2338         if (IS_ERR(di)) {
2339                 ret = PTR_ERR(di);
2340                 btrfs_free_path(path);
2341                 btrfs_commit_transaction(trans, root);
2342                 if (ret == -ENOENT)
2343                         return 0;
2344                 return ret;
2345         }
2346
2347         if (!di)
2348                 ret = btrfs_del_item(trans, root, path);
2349         else
2350                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2351         BUG_ON(ret);
2352         btrfs_free_path(path);
2353         btrfs_commit_transaction(trans, root);
2354         return ret;
2355 }
2356
2357 static int create_inode_item(struct btrfs_root *root,
2358                              struct inode_record *rec,
2359                              struct inode_backref *backref, int root_dir)
2360 {
2361         struct btrfs_trans_handle *trans;
2362         struct btrfs_inode_item inode_item;
2363         time_t now = time(NULL);
2364         int ret;
2365
2366         trans = btrfs_start_transaction(root, 1);
2367         if (IS_ERR(trans)) {
2368                 ret = PTR_ERR(trans);
2369                 return ret;
2370         }
2371
2372         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2373                 "be incomplete, please check permissions and content after "
2374                 "the fsck completes.\n", (unsigned long long)root->objectid,
2375                 (unsigned long long)rec->ino);
2376
2377         memset(&inode_item, 0, sizeof(inode_item));
2378         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2379         if (root_dir)
2380                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2381         else
2382                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2383         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2384         if (rec->found_dir_item) {
2385                 if (rec->found_file_extent)
2386                         fprintf(stderr, "root %llu inode %llu has both a dir "
2387                                 "item and extents, unsure if it is a dir or a "
2388                                 "regular file so setting it as a directory\n",
2389                                 (unsigned long long)root->objectid,
2390                                 (unsigned long long)rec->ino);
2391                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2392                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2393         } else if (!rec->found_dir_item) {
2394                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2395                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2396         }
2397         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2398         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2399         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2400         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2401         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2402         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2403         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2404         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2405
2406         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2407         BUG_ON(ret);
2408         btrfs_commit_transaction(trans, root);
2409         return 0;
2410 }
2411
2412 static int repair_inode_backrefs(struct btrfs_root *root,
2413                                  struct inode_record *rec,
2414                                  struct cache_tree *inode_cache,
2415                                  int delete)
2416 {
2417         struct inode_backref *tmp, *backref;
2418         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2419         int ret = 0;
2420         int repaired = 0;
2421
2422         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2423                 if (!delete && rec->ino == root_dirid) {
2424                         if (!rec->found_inode_item) {
2425                                 ret = create_inode_item(root, rec, backref, 1);
2426                                 if (ret)
2427                                         break;
2428                                 repaired++;
2429                         }
2430                 }
2431
2432                 /* Index 0 for root dir's are special, don't mess with it */
2433                 if (rec->ino == root_dirid && backref->index == 0)
2434                         continue;
2435
2436                 if (delete &&
2437                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2438                      (backref->found_dir_index && backref->found_inode_ref &&
2439                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2440                         ret = delete_dir_index(root, inode_cache, rec, backref);
2441                         if (ret)
2442                                 break;
2443                         repaired++;
2444                         list_del(&backref->list);
2445                         free(backref);
2446                 }
2447
2448                 if (!delete && !backref->found_dir_index &&
2449                     backref->found_dir_item && backref->found_inode_ref) {
2450                         ret = add_missing_dir_index(root, inode_cache, rec,
2451                                                     backref);
2452                         if (ret)
2453                                 break;
2454                         repaired++;
2455                         if (backref->found_dir_item &&
2456                             backref->found_dir_index &&
2457                             backref->found_dir_index) {
2458                                 if (!backref->errors &&
2459                                     backref->found_inode_ref) {
2460                                         list_del(&backref->list);
2461                                         free(backref);
2462                                 }
2463                         }
2464                 }
2465
2466                 if (!delete && (!backref->found_dir_index &&
2467                                 !backref->found_dir_item &&
2468                                 backref->found_inode_ref)) {
2469                         struct btrfs_trans_handle *trans;
2470                         struct btrfs_key location;
2471
2472                         ret = check_dir_conflict(root, backref->name,
2473                                                  backref->namelen,
2474                                                  backref->dir,
2475                                                  backref->index);
2476                         if (ret) {
2477                                 /*
2478                                  * let nlink fixing routine to handle it,
2479                                  * which can do it better.
2480                                  */
2481                                 ret = 0;
2482                                 break;
2483                         }
2484                         location.objectid = rec->ino;
2485                         location.type = BTRFS_INODE_ITEM_KEY;
2486                         location.offset = 0;
2487
2488                         trans = btrfs_start_transaction(root, 1);
2489                         if (IS_ERR(trans)) {
2490                                 ret = PTR_ERR(trans);
2491                                 break;
2492                         }
2493                         fprintf(stderr, "adding missing dir index/item pair "
2494                                 "for inode %llu\n",
2495                                 (unsigned long long)rec->ino);
2496                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2497                                                     backref->namelen,
2498                                                     backref->dir, &location,
2499                                                     imode_to_type(rec->imode),
2500                                                     backref->index);
2501                         BUG_ON(ret);
2502                         btrfs_commit_transaction(trans, root);
2503                         repaired++;
2504                 }
2505
2506                 if (!delete && (backref->found_inode_ref &&
2507                                 backref->found_dir_index &&
2508                                 backref->found_dir_item &&
2509                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2510                                 !rec->found_inode_item)) {
2511                         ret = create_inode_item(root, rec, backref, 0);
2512                         if (ret)
2513                                 break;
2514                         repaired++;
2515                 }
2516
2517         }
2518         return ret ? ret : repaired;
2519 }
2520
2521 /*
2522  * To determine the file type for nlink/inode_item repair
2523  *
2524  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2525  * Return -ENOENT if file type is not found.
2526  */
2527 static int find_file_type(struct inode_record *rec, u8 *type)
2528 {
2529         struct inode_backref *backref;
2530
2531         /* For inode item recovered case */
2532         if (rec->found_inode_item) {
2533                 *type = imode_to_type(rec->imode);
2534                 return 0;
2535         }
2536
2537         list_for_each_entry(backref, &rec->backrefs, list) {
2538                 if (backref->found_dir_index || backref->found_dir_item) {
2539                         *type = backref->filetype;
2540                         return 0;
2541                 }
2542         }
2543         return -ENOENT;
2544 }
2545
2546 /*
2547  * To determine the file name for nlink repair
2548  *
2549  * Return 0 if file name is found, set name and namelen.
2550  * Return -ENOENT if file name is not found.
2551  */
2552 static int find_file_name(struct inode_record *rec,
2553                           char *name, int *namelen)
2554 {
2555         struct inode_backref *backref;
2556
2557         list_for_each_entry(backref, &rec->backrefs, list) {
2558                 if (backref->found_dir_index || backref->found_dir_item ||
2559                     backref->found_inode_ref) {
2560                         memcpy(name, backref->name, backref->namelen);
2561                         *namelen = backref->namelen;
2562                         return 0;
2563                 }
2564         }
2565         return -ENOENT;
2566 }
2567
2568 /* Reset the nlink of the inode to the correct one */
2569 static int reset_nlink(struct btrfs_trans_handle *trans,
2570                        struct btrfs_root *root,
2571                        struct btrfs_path *path,
2572                        struct inode_record *rec)
2573 {
2574         struct inode_backref *backref;
2575         struct inode_backref *tmp;
2576         struct btrfs_key key;
2577         struct btrfs_inode_item *inode_item;
2578         int ret = 0;
2579
2580         /* We don't believe this either, reset it and iterate backref */
2581         rec->found_link = 0;
2582
2583         /* Remove all backref including the valid ones */
2584         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2585                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2586                                    backref->index, backref->name,
2587                                    backref->namelen, 0);
2588                 if (ret < 0)
2589                         goto out;
2590
2591                 /* remove invalid backref, so it won't be added back */
2592                 if (!(backref->found_dir_index &&
2593                       backref->found_dir_item &&
2594                       backref->found_inode_ref)) {
2595                         list_del(&backref->list);
2596                         free(backref);
2597                 } else {
2598                         rec->found_link++;
2599                 }
2600         }
2601
2602         /* Set nlink to 0 */
2603         key.objectid = rec->ino;
2604         key.type = BTRFS_INODE_ITEM_KEY;
2605         key.offset = 0;
2606         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2607         if (ret < 0)
2608                 goto out;
2609         if (ret > 0) {
2610                 ret = -ENOENT;
2611                 goto out;
2612         }
2613         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2614                                     struct btrfs_inode_item);
2615         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2616         btrfs_mark_buffer_dirty(path->nodes[0]);
2617         btrfs_release_path(path);
2618
2619         /*
2620          * Add back valid inode_ref/dir_item/dir_index,
2621          * add_link() will handle the nlink inc, so new nlink must be correct
2622          */
2623         list_for_each_entry(backref, &rec->backrefs, list) {
2624                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2625                                      backref->name, backref->namelen,
2626                                      backref->filetype, &backref->index, 1);
2627                 if (ret < 0)
2628                         goto out;
2629         }
2630 out:
2631         btrfs_release_path(path);
2632         return ret;
2633 }
2634
2635 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2636                                struct btrfs_root *root,
2637                                struct btrfs_path *path,
2638                                struct inode_record *rec)
2639 {
2640         char *dir_name = "lost+found";
2641         char namebuf[BTRFS_NAME_LEN] = {0};
2642         u64 lost_found_ino;
2643         u32 mode = 0700;
2644         u8 type = 0;
2645         int namelen = 0;
2646         int name_recovered = 0;
2647         int type_recovered = 0;
2648         int ret = 0;
2649
2650         /*
2651          * Get file name and type first before these invalid inode ref
2652          * are deleted by remove_all_invalid_backref()
2653          */
2654         name_recovered = !find_file_name(rec, namebuf, &namelen);
2655         type_recovered = !find_file_type(rec, &type);
2656
2657         if (!name_recovered) {
2658                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2659                        rec->ino, rec->ino);
2660                 namelen = count_digits(rec->ino);
2661                 sprintf(namebuf, "%llu", rec->ino);
2662                 name_recovered = 1;
2663         }
2664         if (!type_recovered) {
2665                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2666                        rec->ino);
2667                 type = BTRFS_FT_REG_FILE;
2668                 type_recovered = 1;
2669         }
2670
2671         ret = reset_nlink(trans, root, path, rec);
2672         if (ret < 0) {
2673                 fprintf(stderr,
2674                         "Failed to reset nlink for inode %llu: %s\n",
2675                         rec->ino, strerror(-ret));
2676                 goto out;
2677         }
2678
2679         if (rec->found_link == 0) {
2680                 lost_found_ino = root->highest_inode;
2681                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2682                         ret = -EOVERFLOW;
2683                         goto out;
2684                 }
2685                 lost_found_ino++;
2686                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2687                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2688                                   mode);
2689                 if (ret < 0) {
2690                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2691                                 dir_name, strerror(-ret));
2692                         goto out;
2693                 }
2694                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2695                                      namebuf, namelen, type, NULL, 1);
2696                 /*
2697                  * Add ".INO" suffix several times to handle case where
2698                  * "FILENAME.INO" is already taken by another file.
2699                  */
2700                 while (ret == -EEXIST) {
2701                         /*
2702                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2703                          */
2704                         if (namelen + count_digits(rec->ino) + 1 >
2705                             BTRFS_NAME_LEN) {
2706                                 ret = -EFBIG;
2707                                 goto out;
2708                         }
2709                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2710                                  ".%llu", rec->ino);
2711                         namelen += count_digits(rec->ino) + 1;
2712                         ret = btrfs_add_link(trans, root, rec->ino,
2713                                              lost_found_ino, namebuf,
2714                                              namelen, type, NULL, 1);
2715                 }
2716                 if (ret < 0) {
2717                         fprintf(stderr,
2718                                 "Failed to link the inode %llu to %s dir: %s\n",
2719                                 rec->ino, dir_name, strerror(-ret));
2720                         goto out;
2721                 }
2722                 /*
2723                  * Just increase the found_link, don't actually add the
2724                  * backref. This will make things easier and this inode
2725                  * record will be freed after the repair is done.
2726                  * So fsck will not report problem about this inode.
2727                  */
2728                 rec->found_link++;
2729                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2730                        namelen, namebuf, dir_name);
2731         }
2732         printf("Fixed the nlink of inode %llu\n", rec->ino);
2733 out:
2734         /*
2735          * Clear the flag anyway, or we will loop forever for the same inode
2736          * as it will not be removed from the bad inode list and the dead loop
2737          * happens.
2738          */
2739         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2740         btrfs_release_path(path);
2741         return ret;
2742 }
2743
2744 /*
2745  * Check if there is any normal(reg or prealloc) file extent for given
2746  * ino.
2747  * This is used to determine the file type when neither its dir_index/item or
2748  * inode_item exists.
2749  *
2750  * This will *NOT* report error, if any error happens, just consider it does
2751  * not have any normal file extent.
2752  */
2753 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2754 {
2755         struct btrfs_path *path;
2756         struct btrfs_key key;
2757         struct btrfs_key found_key;
2758         struct btrfs_file_extent_item *fi;
2759         u8 type;
2760         int ret = 0;
2761
2762         path = btrfs_alloc_path();
2763         if (!path)
2764                 goto out;
2765         key.objectid = ino;
2766         key.type = BTRFS_EXTENT_DATA_KEY;
2767         key.offset = 0;
2768
2769         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2770         if (ret < 0) {
2771                 ret = 0;
2772                 goto out;
2773         }
2774         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2775                 ret = btrfs_next_leaf(root, path);
2776                 if (ret) {
2777                         ret = 0;
2778                         goto out;
2779                 }
2780         }
2781         while (1) {
2782                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2783                                       path->slots[0]);
2784                 if (found_key.objectid != ino ||
2785                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2786                         break;
2787                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2788                                     struct btrfs_file_extent_item);
2789                 type = btrfs_file_extent_type(path->nodes[0], fi);
2790                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2791                         ret = 1;
2792                         goto out;
2793                 }
2794         }
2795 out:
2796         btrfs_free_path(path);
2797         return ret;
2798 }
2799
2800 static u32 btrfs_type_to_imode(u8 type)
2801 {
2802         static u32 imode_by_btrfs_type[] = {
2803                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2804                 [BTRFS_FT_DIR]          = S_IFDIR,
2805                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2806                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2807                 [BTRFS_FT_FIFO]         = S_IFIFO,
2808                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2809                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2810         };
2811
2812         return imode_by_btrfs_type[(type)];
2813 }
2814
2815 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2816                                 struct btrfs_root *root,
2817                                 struct btrfs_path *path,
2818                                 struct inode_record *rec)
2819 {
2820         u8 filetype;
2821         u32 mode = 0700;
2822         int type_recovered = 0;
2823         int ret = 0;
2824
2825         printf("Trying to rebuild inode:%llu\n", rec->ino);
2826
2827         type_recovered = !find_file_type(rec, &filetype);
2828
2829         /*
2830          * Try to determine inode type if type not found.
2831          *
2832          * For found regular file extent, it must be FILE.
2833          * For found dir_item/index, it must be DIR.
2834          *
2835          * For undetermined one, use FILE as fallback.
2836          *
2837          * TODO:
2838          * 1. If found backref(inode_index/item is already handled) to it,
2839          *    it must be DIR.
2840          *    Need new inode-inode ref structure to allow search for that.
2841          */
2842         if (!type_recovered) {
2843                 if (rec->found_file_extent &&
2844                     find_normal_file_extent(root, rec->ino)) {
2845                         type_recovered = 1;
2846                         filetype = BTRFS_FT_REG_FILE;
2847                 } else if (rec->found_dir_item) {
2848                         type_recovered = 1;
2849                         filetype = BTRFS_FT_DIR;
2850                 } else if (!list_empty(&rec->orphan_extents)) {
2851                         type_recovered = 1;
2852                         filetype = BTRFS_FT_REG_FILE;
2853                 } else{
2854                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2855                                rec->ino);
2856                         type_recovered = 1;
2857                         filetype = BTRFS_FT_REG_FILE;
2858                 }
2859         }
2860
2861         ret = btrfs_new_inode(trans, root, rec->ino,
2862                               mode | btrfs_type_to_imode(filetype));
2863         if (ret < 0)
2864                 goto out;
2865
2866         /*
2867          * Here inode rebuild is done, we only rebuild the inode item,
2868          * don't repair the nlink(like move to lost+found).
2869          * That is the job of nlink repair.
2870          *
2871          * We just fill the record and return
2872          */
2873         rec->found_dir_item = 1;
2874         rec->imode = mode | btrfs_type_to_imode(filetype);
2875         rec->nlink = 0;
2876         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2877         /* Ensure the inode_nlinks repair function will be called */
2878         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2879 out:
2880         return ret;
2881 }
2882
2883 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2884                                       struct btrfs_root *root,
2885                                       struct btrfs_path *path,
2886                                       struct inode_record *rec)
2887 {
2888         struct orphan_data_extent *orphan;
2889         struct orphan_data_extent *tmp;
2890         int ret = 0;
2891
2892         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2893                 /*
2894                  * Check for conflicting file extents
2895                  *
2896                  * Here we don't know whether the extents is compressed or not,
2897                  * so we can only assume it not compressed nor data offset,
2898                  * and use its disk_len as extent length.
2899                  */
2900                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2901                                        orphan->offset, orphan->disk_len, 0);
2902                 btrfs_release_path(path);
2903                 if (ret < 0)
2904                         goto out;
2905                 if (!ret) {
2906                         fprintf(stderr,
2907                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2908                                 orphan->disk_bytenr, orphan->disk_len);
2909                         ret = btrfs_free_extent(trans,
2910                                         root->fs_info->extent_root,
2911                                         orphan->disk_bytenr, orphan->disk_len,
2912                                         0, root->objectid, orphan->objectid,
2913                                         orphan->offset);
2914                         if (ret < 0)
2915                                 goto out;
2916                 }
2917                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2918                                 orphan->offset, orphan->disk_bytenr,
2919                                 orphan->disk_len, orphan->disk_len);
2920                 if (ret < 0)
2921                         goto out;
2922
2923                 /* Update file size info */
2924                 rec->found_size += orphan->disk_len;
2925                 if (rec->found_size == rec->nbytes)
2926                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2927
2928                 /* Update the file extent hole info too */
2929                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2930                                            orphan->disk_len);
2931                 if (ret < 0)
2932                         goto out;
2933                 if (RB_EMPTY_ROOT(&rec->holes))
2934                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2935
2936                 list_del(&orphan->list);
2937                 free(orphan);
2938         }
2939         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2940 out:
2941         return ret;
2942 }
2943
2944 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2945                                         struct btrfs_root *root,
2946                                         struct btrfs_path *path,
2947                                         struct inode_record *rec)
2948 {
2949         struct rb_node *node;
2950         struct file_extent_hole *hole;
2951         int found = 0;
2952         int ret = 0;
2953
2954         node = rb_first(&rec->holes);
2955
2956         while (node) {
2957                 found = 1;
2958                 hole = rb_entry(node, struct file_extent_hole, node);
2959                 ret = btrfs_punch_hole(trans, root, rec->ino,
2960                                        hole->start, hole->len);
2961                 if (ret < 0)
2962                         goto out;
2963                 ret = del_file_extent_hole(&rec->holes, hole->start,
2964                                            hole->len);
2965                 if (ret < 0)
2966                         goto out;
2967                 if (RB_EMPTY_ROOT(&rec->holes))
2968                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2969                 node = rb_first(&rec->holes);
2970         }
2971         /* special case for a file losing all its file extent */
2972         if (!found) {
2973                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2974                                        round_up(rec->isize, root->sectorsize));
2975                 if (ret < 0)
2976                         goto out;
2977         }
2978         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2979                rec->ino, root->objectid);
2980 out:
2981         return ret;
2982 }
2983
2984 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2985 {
2986         struct btrfs_trans_handle *trans;
2987         struct btrfs_path *path;
2988         int ret = 0;
2989
2990         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2991                              I_ERR_NO_ORPHAN_ITEM |
2992                              I_ERR_LINK_COUNT_WRONG |
2993                              I_ERR_NO_INODE_ITEM |
2994                              I_ERR_FILE_EXTENT_ORPHAN |
2995                              I_ERR_FILE_EXTENT_DISCOUNT|
2996                              I_ERR_FILE_NBYTES_WRONG)))
2997                 return rec->errors;
2998
2999         path = btrfs_alloc_path();
3000         if (!path)
3001                 return -ENOMEM;
3002
3003         /*
3004          * For nlink repair, it may create a dir and add link, so
3005          * 2 for parent(256)'s dir_index and dir_item
3006          * 2 for lost+found dir's inode_item and inode_ref
3007          * 1 for the new inode_ref of the file
3008          * 2 for lost+found dir's dir_index and dir_item for the file
3009          */
3010         trans = btrfs_start_transaction(root, 7);
3011         if (IS_ERR(trans)) {
3012                 btrfs_free_path(path);
3013                 return PTR_ERR(trans);
3014         }
3015
3016         if (rec->errors & I_ERR_NO_INODE_ITEM)
3017                 ret = repair_inode_no_item(trans, root, path, rec);
3018         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3019                 ret = repair_inode_orphan_extent(trans, root, path, rec);
3020         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3021                 ret = repair_inode_discount_extent(trans, root, path, rec);
3022         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3023                 ret = repair_inode_isize(trans, root, path, rec);
3024         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3025                 ret = repair_inode_orphan_item(trans, root, path, rec);
3026         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3027                 ret = repair_inode_nlinks(trans, root, path, rec);
3028         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3029                 ret = repair_inode_nbytes(trans, root, path, rec);
3030         btrfs_commit_transaction(trans, root);
3031         btrfs_free_path(path);
3032         return ret;
3033 }
3034
3035 static int check_inode_recs(struct btrfs_root *root,
3036                             struct cache_tree *inode_cache)
3037 {
3038         struct cache_extent *cache;
3039         struct ptr_node *node;
3040         struct inode_record *rec;
3041         struct inode_backref *backref;
3042         int stage = 0;
3043         int ret = 0;
3044         int err = 0;
3045         u64 error = 0;
3046         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3047
3048         if (btrfs_root_refs(&root->root_item) == 0) {
3049                 if (!cache_tree_empty(inode_cache))
3050                         fprintf(stderr, "warning line %d\n", __LINE__);
3051                 return 0;
3052         }
3053
3054         /*
3055          * We need to record the highest inode number for later 'lost+found'
3056          * dir creation.
3057          * We must select an ino not used/referred by any existing inode, or
3058          * 'lost+found' ino may be a missing ino in a corrupted leaf,
3059          * this may cause 'lost+found' dir has wrong nlinks.
3060          */
3061         cache = last_cache_extent(inode_cache);
3062         if (cache) {
3063                 node = container_of(cache, struct ptr_node, cache);
3064                 rec = node->data;
3065                 if (rec->ino > root->highest_inode)
3066                         root->highest_inode = rec->ino;
3067         }
3068
3069         /*
3070          * We need to repair backrefs first because we could change some of the
3071          * errors in the inode recs.
3072          *
3073          * We also need to go through and delete invalid backrefs first and then
3074          * add the correct ones second.  We do this because we may get EEXIST
3075          * when adding back the correct index because we hadn't yet deleted the
3076          * invalid index.
3077          *
3078          * For example, if we were missing a dir index then the directories
3079          * isize would be wrong, so if we fixed the isize to what we thought it
3080          * would be and then fixed the backref we'd still have a invalid fs, so
3081          * we need to add back the dir index and then check to see if the isize
3082          * is still wrong.
3083          */
3084         while (stage < 3) {
3085                 stage++;
3086                 if (stage == 3 && !err)
3087                         break;
3088
3089                 cache = search_cache_extent(inode_cache, 0);
3090                 while (repair && cache) {
3091                         node = container_of(cache, struct ptr_node, cache);
3092                         rec = node->data;
3093                         cache = next_cache_extent(cache);
3094
3095                         /* Need to free everything up and rescan */
3096                         if (stage == 3) {
3097                                 remove_cache_extent(inode_cache, &node->cache);
3098                                 free(node);
3099                                 free_inode_rec(rec);
3100                                 continue;
3101                         }
3102
3103                         if (list_empty(&rec->backrefs))
3104                                 continue;
3105
3106                         ret = repair_inode_backrefs(root, rec, inode_cache,
3107                                                     stage == 1);
3108                         if (ret < 0) {
3109                                 err = ret;
3110                                 stage = 2;
3111                                 break;
3112                         } if (ret > 0) {
3113                                 err = -EAGAIN;
3114                         }
3115                 }
3116         }
3117         if (err)
3118                 return err;
3119
3120         rec = get_inode_rec(inode_cache, root_dirid, 0);
3121         BUG_ON(IS_ERR(rec));
3122         if (rec) {
3123                 ret = check_root_dir(rec);
3124                 if (ret) {
3125                         fprintf(stderr, "root %llu root dir %llu error\n",
3126                                 (unsigned long long)root->root_key.objectid,
3127                                 (unsigned long long)root_dirid);
3128                         print_inode_error(root, rec);
3129                         error++;
3130                 }
3131         } else {
3132                 if (repair) {
3133                         struct btrfs_trans_handle *trans;
3134
3135                         trans = btrfs_start_transaction(root, 1);
3136                         if (IS_ERR(trans)) {
3137                                 err = PTR_ERR(trans);
3138                                 return err;
3139                         }
3140
3141                         fprintf(stderr,
3142                                 "root %llu missing its root dir, recreating\n",
3143                                 (unsigned long long)root->objectid);
3144
3145                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3146                         BUG_ON(ret);
3147
3148                         btrfs_commit_transaction(trans, root);
3149                         return -EAGAIN;
3150                 }
3151
3152                 fprintf(stderr, "root %llu root dir %llu not found\n",
3153                         (unsigned long long)root->root_key.objectid,
3154                         (unsigned long long)root_dirid);
3155         }
3156
3157         while (1) {
3158                 cache = search_cache_extent(inode_cache, 0);
3159                 if (!cache)
3160                         break;
3161                 node = container_of(cache, struct ptr_node, cache);
3162                 rec = node->data;
3163                 remove_cache_extent(inode_cache, &node->cache);
3164                 free(node);
3165                 if (rec->ino == root_dirid ||
3166                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3167                         free_inode_rec(rec);
3168                         continue;
3169                 }
3170
3171                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3172                         ret = check_orphan_item(root, rec->ino);
3173                         if (ret == 0)
3174                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3175                         if (can_free_inode_rec(rec)) {
3176                                 free_inode_rec(rec);
3177                                 continue;
3178                         }
3179                 }
3180
3181                 if (!rec->found_inode_item)
3182                         rec->errors |= I_ERR_NO_INODE_ITEM;
3183                 if (rec->found_link != rec->nlink)
3184                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3185                 if (repair) {
3186                         ret = try_repair_inode(root, rec);
3187                         if (ret == 0 && can_free_inode_rec(rec)) {
3188                                 free_inode_rec(rec);
3189                                 continue;
3190                         }
3191                         ret = 0;
3192                 }
3193
3194                 if (!(repair && ret == 0))
3195                         error++;
3196                 print_inode_error(root, rec);
3197                 list_for_each_entry(backref, &rec->backrefs, list) {
3198                         if (!backref->found_dir_item)
3199                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3200                         if (!backref->found_dir_index)
3201                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3202                         if (!backref->found_inode_ref)
3203                                 backref->errors |= REF_ERR_NO_INODE_REF;
3204                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3205                                 " namelen %u name %s filetype %d errors %x",
3206                                 (unsigned long long)backref->dir,
3207                                 (unsigned long long)backref->index,
3208                                 backref->namelen, backref->name,
3209                                 backref->filetype, backref->errors);
3210                         print_ref_error(backref->errors);
3211                 }
3212                 free_inode_rec(rec);
3213         }
3214         return (error > 0) ? -1 : 0;
3215 }
3216
3217 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3218                                         u64 objectid)
3219 {
3220         struct cache_extent *cache;
3221         struct root_record *rec = NULL;
3222         int ret;
3223
3224         cache = lookup_cache_extent(root_cache, objectid, 1);
3225         if (cache) {
3226                 rec = container_of(cache, struct root_record, cache);
3227         } else {
3228                 rec = calloc(1, sizeof(*rec));
3229                 if (!rec)
3230                         return ERR_PTR(-ENOMEM);
3231                 rec->objectid = objectid;
3232                 INIT_LIST_HEAD(&rec->backrefs);
3233                 rec->cache.start = objectid;
3234                 rec->cache.size = 1;
3235
3236                 ret = insert_cache_extent(root_cache, &rec->cache);
3237                 if (ret)
3238                         return ERR_PTR(-EEXIST);
3239         }
3240         return rec;
3241 }
3242
3243 static struct root_backref *get_root_backref(struct root_record *rec,
3244                                              u64 ref_root, u64 dir, u64 index,
3245                                              const char *name, int namelen)
3246 {
3247         struct root_backref *backref;
3248
3249         list_for_each_entry(backref, &rec->backrefs, list) {
3250                 if (backref->ref_root != ref_root || backref->dir != dir ||
3251                     backref->namelen != namelen)
3252                         continue;
3253                 if (memcmp(name, backref->name, namelen))
3254                         continue;
3255                 return backref;
3256         }
3257
3258         backref = calloc(1, sizeof(*backref) + namelen + 1);
3259         if (!backref)
3260                 return NULL;
3261         backref->ref_root = ref_root;
3262         backref->dir = dir;
3263         backref->index = index;
3264         backref->namelen = namelen;
3265         memcpy(backref->name, name, namelen);
3266         backref->name[namelen] = '\0';
3267         list_add_tail(&backref->list, &rec->backrefs);
3268         return backref;
3269 }
3270
3271 static void free_root_record(struct cache_extent *cache)
3272 {
3273         struct root_record *rec;
3274         struct root_backref *backref;
3275
3276         rec = container_of(cache, struct root_record, cache);
3277         while (!list_empty(&rec->backrefs)) {
3278                 backref = to_root_backref(rec->backrefs.next);
3279                 list_del(&backref->list);
3280                 free(backref);
3281         }
3282
3283         kfree(rec);
3284 }
3285
3286 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3287
3288 static int add_root_backref(struct cache_tree *root_cache,
3289                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3290                             const char *name, int namelen,
3291                             int item_type, int errors)
3292 {
3293         struct root_record *rec;
3294         struct root_backref *backref;
3295
3296         rec = get_root_rec(root_cache, root_id);
3297         BUG_ON(IS_ERR(rec));
3298         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3299         BUG_ON(!backref);
3300
3301         backref->errors |= errors;
3302
3303         if (item_type != BTRFS_DIR_ITEM_KEY) {
3304                 if (backref->found_dir_index || backref->found_back_ref ||
3305                     backref->found_forward_ref) {
3306                         if (backref->index != index)
3307                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3308                 } else {
3309                         backref->index = index;
3310                 }
3311         }
3312
3313         if (item_type == BTRFS_DIR_ITEM_KEY) {
3314                 if (backref->found_forward_ref)
3315                         rec->found_ref++;
3316                 backref->found_dir_item = 1;
3317         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3318                 backref->found_dir_index = 1;
3319         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3320                 if (backref->found_forward_ref)
3321                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3322                 else if (backref->found_dir_item)
3323                         rec->found_ref++;
3324                 backref->found_forward_ref = 1;
3325         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3326                 if (backref->found_back_ref)
3327                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3328                 backref->found_back_ref = 1;
3329         } else {
3330                 BUG_ON(1);
3331         }
3332
3333         if (backref->found_forward_ref && backref->found_dir_item)
3334                 backref->reachable = 1;
3335         return 0;
3336 }
3337
3338 static int merge_root_recs(struct btrfs_root *root,
3339                            struct cache_tree *src_cache,
3340                            struct cache_tree *dst_cache)
3341 {
3342         struct cache_extent *cache;
3343         struct ptr_node *node;
3344         struct inode_record *rec;
3345         struct inode_backref *backref;
3346         int ret = 0;
3347
3348         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3349                 free_inode_recs_tree(src_cache);
3350                 return 0;
3351         }
3352
3353         while (1) {
3354                 cache = search_cache_extent(src_cache, 0);
3355                 if (!cache)
3356                         break;
3357                 node = container_of(cache, struct ptr_node, cache);
3358                 rec = node->data;
3359                 remove_cache_extent(src_cache, &node->cache);
3360                 free(node);
3361
3362                 ret = is_child_root(root, root->objectid, rec->ino);
3363                 if (ret < 0)
3364                         break;
3365                 else if (ret == 0)
3366                         goto skip;
3367
3368                 list_for_each_entry(backref, &rec->backrefs, list) {
3369                         BUG_ON(backref->found_inode_ref);
3370                         if (backref->found_dir_item)
3371                                 add_root_backref(dst_cache, rec->ino,
3372                                         root->root_key.objectid, backref->dir,
3373                                         backref->index, backref->name,
3374                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3375                                         backref->errors);
3376                         if (backref->found_dir_index)
3377                                 add_root_backref(dst_cache, rec->ino,
3378                                         root->root_key.objectid, backref->dir,
3379                                         backref->index, backref->name,
3380                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3381                                         backref->errors);
3382                 }
3383 skip:
3384                 free_inode_rec(rec);
3385         }
3386         if (ret < 0)
3387                 return ret;
3388         return 0;
3389 }
3390
3391 static int check_root_refs(struct btrfs_root *root,
3392                            struct cache_tree *root_cache)
3393 {
3394         struct root_record *rec;
3395         struct root_record *ref_root;
3396         struct root_backref *backref;
3397         struct cache_extent *cache;
3398         int loop = 1;
3399         int ret;
3400         int error;
3401         int errors = 0;
3402
3403         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3404         BUG_ON(IS_ERR(rec));
3405         rec->found_ref = 1;
3406
3407         /* fixme: this can not detect circular references */
3408         while (loop) {
3409                 loop = 0;
3410                 cache = search_cache_extent(root_cache, 0);
3411                 while (1) {
3412                         if (!cache)
3413                                 break;
3414                         rec = container_of(cache, struct root_record, cache);
3415                         cache = next_cache_extent(cache);
3416
3417                         if (rec->found_ref == 0)
3418                                 continue;
3419
3420                         list_for_each_entry(backref, &rec->backrefs, list) {
3421                                 if (!backref->reachable)
3422                                         continue;
3423
3424                                 ref_root = get_root_rec(root_cache,
3425                                                         backref->ref_root);
3426                                 BUG_ON(IS_ERR(ref_root));
3427                                 if (ref_root->found_ref > 0)
3428                                         continue;
3429
3430                                 backref->reachable = 0;
3431                                 rec->found_ref--;
3432                                 if (rec->found_ref == 0)
3433                                         loop = 1;
3434                         }
3435                 }
3436         }
3437
3438         cache = search_cache_extent(root_cache, 0);
3439         while (1) {
3440                 if (!cache)
3441                         break;
3442                 rec = container_of(cache, struct root_record, cache);
3443                 cache = next_cache_extent(cache);
3444
3445                 if (rec->found_ref == 0 &&
3446                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3447                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3448                         ret = check_orphan_item(root->fs_info->tree_root,
3449                                                 rec->objectid);
3450                         if (ret == 0)
3451                                 continue;
3452
3453                         /*
3454                          * If we don't have a root item then we likely just have
3455                          * a dir item in a snapshot for this root but no actual
3456                          * ref key or anything so it's meaningless.
3457                          */
3458                         if (!rec->found_root_item)
3459                                 continue;
3460                         errors++;
3461                         fprintf(stderr, "fs tree %llu not referenced\n",
3462                                 (unsigned long long)rec->objectid);
3463                 }
3464
3465                 error = 0;
3466                 if (rec->found_ref > 0 && !rec->found_root_item)
3467                         error = 1;
3468                 list_for_each_entry(backref, &rec->backrefs, list) {
3469                         if (!backref->found_dir_item)
3470                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3471                         if (!backref->found_dir_index)
3472                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3473                         if (!backref->found_back_ref)
3474                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3475                         if (!backref->found_forward_ref)
3476                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3477                         if (backref->reachable && backref->errors)
3478                                 error = 1;
3479                 }
3480                 if (!error)
3481                         continue;
3482
3483                 errors++;
3484                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3485                         (unsigned long long)rec->objectid, rec->found_ref,
3486                          rec->found_root_item ? "" : "not found");
3487
3488                 list_for_each_entry(backref, &rec->backrefs, list) {
3489                         if (!backref->reachable)
3490                                 continue;
3491                         if (!backref->errors && rec->found_root_item)
3492                                 continue;
3493                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3494                                 " index %llu namelen %u name %s errors %x\n",
3495                                 (unsigned long long)backref->ref_root,
3496                                 (unsigned long long)backref->dir,
3497                                 (unsigned long long)backref->index,
3498                                 backref->namelen, backref->name,
3499                                 backref->errors);
3500                         print_ref_error(backref->errors);
3501                 }
3502         }
3503         return errors > 0 ? 1 : 0;
3504 }
3505
3506 static int process_root_ref(struct extent_buffer *eb, int slot,
3507                             struct btrfs_key *key,
3508                             struct cache_tree *root_cache)
3509 {
3510         u64 dirid;
3511         u64 index;
3512         u32 len;
3513         u32 name_len;
3514         struct btrfs_root_ref *ref;
3515         char namebuf[BTRFS_NAME_LEN];
3516         int error;
3517
3518         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3519
3520         dirid = btrfs_root_ref_dirid(eb, ref);
3521         index = btrfs_root_ref_sequence(eb, ref);
3522         name_len = btrfs_root_ref_name_len(eb, ref);
3523
3524         if (name_len <= BTRFS_NAME_LEN) {
3525                 len = name_len;
3526                 error = 0;
3527         } else {
3528                 len = BTRFS_NAME_LEN;
3529                 error = REF_ERR_NAME_TOO_LONG;
3530         }
3531         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3532
3533         if (key->type == BTRFS_ROOT_REF_KEY) {
3534                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3535                                  index, namebuf, len, key->type, error);
3536         } else {
3537                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3538                                  index, namebuf, len, key->type, error);
3539         }
3540         return 0;
3541 }
3542
3543 static void free_corrupt_block(struct cache_extent *cache)
3544 {
3545         struct btrfs_corrupt_block *corrupt;
3546
3547         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3548         free(corrupt);
3549 }
3550
3551 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3552
3553 /*
3554  * Repair the btree of the given root.
3555  *
3556  * The fix is to remove the node key in corrupt_blocks cache_tree.
3557  * and rebalance the tree.
3558  * After the fix, the btree should be writeable.
3559  */
3560 static int repair_btree(struct btrfs_root *root,
3561                         struct cache_tree *corrupt_blocks)
3562 {
3563         struct btrfs_trans_handle *trans;
3564         struct btrfs_path *path;
3565         struct btrfs_corrupt_block *corrupt;
3566         struct cache_extent *cache;
3567         struct btrfs_key key;
3568         u64 offset;
3569         int level;
3570         int ret = 0;
3571
3572         if (cache_tree_empty(corrupt_blocks))
3573                 return 0;
3574
3575         path = btrfs_alloc_path();
3576         if (!path)
3577                 return -ENOMEM;
3578
3579         trans = btrfs_start_transaction(root, 1);
3580         if (IS_ERR(trans)) {
3581                 ret = PTR_ERR(trans);
3582                 fprintf(stderr, "Error starting transaction: %s\n",
3583                         strerror(-ret));
3584                 goto out_free_path;
3585         }
3586         cache = first_cache_extent(corrupt_blocks);
3587         while (cache) {
3588                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3589                                        cache);
3590                 level = corrupt->level;
3591                 path->lowest_level = level;
3592                 key.objectid = corrupt->key.objectid;
3593                 key.type = corrupt->key.type;
3594                 key.offset = corrupt->key.offset;
3595
3596                 /*
3597                  * Here we don't want to do any tree balance, since it may
3598                  * cause a balance with corrupted brother leaf/node,
3599                  * so ins_len set to 0 here.
3600                  * Balance will be done after all corrupt node/leaf is deleted.
3601                  */
3602                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3603                 if (ret < 0)
3604                         goto out;
3605                 offset = btrfs_node_blockptr(path->nodes[level],
3606                                              path->slots[level]);
3607
3608                 /* Remove the ptr */
3609                 ret = btrfs_del_ptr(trans, root, path, level,
3610                                     path->slots[level]);
3611                 if (ret < 0)
3612                         goto out;
3613                 /*
3614                  * Remove the corresponding extent
3615                  * return value is not concerned.
3616                  */
3617                 btrfs_release_path(path);
3618                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3619                                         0, root->root_key.objectid,
3620                                         level - 1, 0);
3621                 cache = next_cache_extent(cache);
3622         }
3623
3624         /* Balance the btree using btrfs_search_slot() */
3625         cache = first_cache_extent(corrupt_blocks);
3626         while (cache) {
3627                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3628                                        cache);
3629                 memcpy(&key, &corrupt->key, sizeof(key));
3630                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3631                 if (ret < 0)
3632                         goto out;
3633                 /* return will always >0 since it won't find the item */
3634                 ret = 0;
3635                 btrfs_release_path(path);
3636                 cache = next_cache_extent(cache);
3637         }
3638 out:
3639         btrfs_commit_transaction(trans, root);
3640 out_free_path:
3641         btrfs_free_path(path);
3642         return ret;
3643 }
3644
3645 static int check_fs_root(struct btrfs_root *root,
3646                          struct cache_tree *root_cache,
3647                          struct walk_control *wc)
3648 {
3649         int ret = 0;
3650         int err = 0;
3651         int wret;
3652         int level;
3653         struct btrfs_path path;
3654         struct shared_node root_node;
3655         struct root_record *rec;
3656         struct btrfs_root_item *root_item = &root->root_item;
3657         struct cache_tree corrupt_blocks;
3658         struct orphan_data_extent *orphan;
3659         struct orphan_data_extent *tmp;
3660         enum btrfs_tree_block_status status;
3661         struct node_refs nrefs;
3662
3663         /*
3664          * Reuse the corrupt_block cache tree to record corrupted tree block
3665          *
3666          * Unlike the usage in extent tree check, here we do it in a per
3667          * fs/subvol tree base.
3668          */
3669         cache_tree_init(&corrupt_blocks);
3670         root->fs_info->corrupt_blocks = &corrupt_blocks;
3671
3672         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3673                 rec = get_root_rec(root_cache, root->root_key.objectid);
3674                 BUG_ON(IS_ERR(rec));
3675                 if (btrfs_root_refs(root_item) > 0)
3676                         rec->found_root_item = 1;
3677         }
3678
3679         btrfs_init_path(&path);
3680         memset(&root_node, 0, sizeof(root_node));
3681         cache_tree_init(&root_node.root_cache);
3682         cache_tree_init(&root_node.inode_cache);
3683         memset(&nrefs, 0, sizeof(nrefs));
3684
3685         /* Move the orphan extent record to corresponding inode_record */
3686         list_for_each_entry_safe(orphan, tmp,
3687                                  &root->orphan_data_extents, list) {
3688                 struct inode_record *inode;
3689
3690                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3691                                       1);
3692                 BUG_ON(IS_ERR(inode));
3693                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3694                 list_move(&orphan->list, &inode->orphan_extents);
3695         }
3696
3697         level = btrfs_header_level(root->node);
3698         memset(wc->nodes, 0, sizeof(wc->nodes));
3699         wc->nodes[level] = &root_node;
3700         wc->active_node = level;
3701         wc->root_level = level;
3702
3703         /* We may not have checked the root block, lets do that now */
3704         if (btrfs_is_leaf(root->node))
3705                 status = btrfs_check_leaf(root, NULL, root->node);
3706         else
3707                 status = btrfs_check_node(root, NULL, root->node);
3708         if (status != BTRFS_TREE_BLOCK_CLEAN)
3709                 return -EIO;
3710
3711         if (btrfs_root_refs(root_item) > 0 ||
3712             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3713                 path.nodes[level] = root->node;
3714                 extent_buffer_get(root->node);
3715                 path.slots[level] = 0;
3716         } else {
3717                 struct btrfs_key key;
3718                 struct btrfs_disk_key found_key;
3719
3720                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3721                 level = root_item->drop_level;
3722                 path.lowest_level = level;
3723                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3724                 if (wret < 0)
3725                         goto skip_walking;
3726                 btrfs_node_key(path.nodes[level], &found_key,
3727                                 path.slots[level]);
3728                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3729                                         sizeof(found_key)));
3730         }
3731
3732         while (1) {
3733                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3734                 if (wret < 0)
3735                         ret = wret;
3736                 if (wret != 0)
3737                         break;
3738
3739                 wret = walk_up_tree(root, &path, wc, &level);
3740                 if (wret < 0)
3741                         ret = wret;
3742                 if (wret != 0)
3743                         break;
3744         }
3745 skip_walking:
3746         btrfs_release_path(&path);
3747
3748         if (!cache_tree_empty(&corrupt_blocks)) {
3749                 struct cache_extent *cache;
3750                 struct btrfs_corrupt_block *corrupt;
3751
3752                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3753                        root->root_key.objectid);
3754                 cache = first_cache_extent(&corrupt_blocks);
3755                 while (cache) {
3756                         corrupt = container_of(cache,
3757                                                struct btrfs_corrupt_block,
3758                                                cache);
3759                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3760                                cache->start, corrupt->level,
3761                                corrupt->key.objectid, corrupt->key.type,
3762                                corrupt->key.offset);
3763                         cache = next_cache_extent(cache);
3764                 }
3765                 if (repair) {
3766                         printf("Try to repair the btree for root %llu\n",
3767                                root->root_key.objectid);
3768                         ret = repair_btree(root, &corrupt_blocks);
3769                         if (ret < 0)
3770                                 fprintf(stderr, "Failed to repair btree: %s\n",
3771                                         strerror(-ret));
3772                         if (!ret)
3773                                 printf("Btree for root %llu is fixed\n",
3774                                        root->root_key.objectid);
3775                 }
3776         }
3777
3778         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3779         if (err < 0)
3780                 ret = err;
3781
3782         if (root_node.current) {
3783                 root_node.current->checked = 1;
3784                 maybe_free_inode_rec(&root_node.inode_cache,
3785                                 root_node.current);
3786         }
3787
3788         err = check_inode_recs(root, &root_node.inode_cache);
3789         if (!ret)
3790                 ret = err;
3791
3792         free_corrupt_blocks_tree(&corrupt_blocks);
3793         root->fs_info->corrupt_blocks = NULL;
3794         free_orphan_data_extents(&root->orphan_data_extents);
3795         return ret;
3796 }
3797
3798 static int fs_root_objectid(u64 objectid)
3799 {
3800         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3801             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3802                 return 1;
3803         return is_fstree(objectid);
3804 }
3805
3806 static int check_fs_roots(struct btrfs_root *root,
3807                           struct cache_tree *root_cache)
3808 {
3809         struct btrfs_path path;
3810         struct btrfs_key key;
3811         struct walk_control wc;
3812         struct extent_buffer *leaf, *tree_node;
3813         struct btrfs_root *tmp_root;
3814         struct btrfs_root *tree_root = root->fs_info->tree_root;
3815         int ret;
3816         int err = 0;
3817
3818         if (ctx.progress_enabled) {
3819                 ctx.tp = TASK_FS_ROOTS;
3820                 task_start(ctx.info);
3821         }
3822
3823         /*
3824          * Just in case we made any changes to the extent tree that weren't
3825          * reflected into the free space cache yet.
3826          */
3827         if (repair)
3828                 reset_cached_block_groups(root->fs_info);
3829         memset(&wc, 0, sizeof(wc));
3830         cache_tree_init(&wc.shared);
3831         btrfs_init_path(&path);
3832
3833 again:
3834         key.offset = 0;
3835         key.objectid = 0;
3836         key.type = BTRFS_ROOT_ITEM_KEY;
3837         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3838         if (ret < 0) {
3839                 err = 1;
3840                 goto out;
3841         }
3842         tree_node = tree_root->node;
3843         while (1) {
3844                 if (tree_node != tree_root->node) {
3845                         free_root_recs_tree(root_cache);
3846                         btrfs_release_path(&path);
3847                         goto again;
3848                 }
3849                 leaf = path.nodes[0];
3850                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3851                         ret = btrfs_next_leaf(tree_root, &path);
3852                         if (ret) {
3853                                 if (ret < 0)
3854                                         err = 1;
3855                                 break;
3856                         }
3857                         leaf = path.nodes[0];
3858                 }
3859                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3860                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3861                     fs_root_objectid(key.objectid)) {
3862                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3863                                 tmp_root = btrfs_read_fs_root_no_cache(
3864                                                 root->fs_info, &key);
3865                         } else {
3866                                 key.offset = (u64)-1;
3867                                 tmp_root = btrfs_read_fs_root(
3868                                                 root->fs_info, &key);
3869                         }
3870                         if (IS_ERR(tmp_root)) {
3871                                 err = 1;
3872                                 goto next;
3873                         }
3874                         ret = check_fs_root(tmp_root, root_cache, &wc);
3875                         if (ret == -EAGAIN) {
3876                                 free_root_recs_tree(root_cache);
3877                                 btrfs_release_path(&path);
3878                                 goto again;
3879                         }
3880                         if (ret)
3881                                 err = 1;
3882                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3883                                 btrfs_free_fs_root(tmp_root);
3884                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3885                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3886                         process_root_ref(leaf, path.slots[0], &key,
3887                                          root_cache);
3888                 }
3889 next:
3890                 path.slots[0]++;
3891         }
3892 out:
3893         btrfs_release_path(&path);
3894         if (err)
3895                 free_extent_cache_tree(&wc.shared);
3896         if (!cache_tree_empty(&wc.shared))
3897                 fprintf(stderr, "warning line %d\n", __LINE__);
3898
3899         task_stop(ctx.info);
3900
3901         return err;
3902 }
3903
3904 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3905 {
3906         struct rb_node *n;
3907         struct extent_backref *back;
3908         struct tree_backref *tback;
3909         struct data_backref *dback;
3910         u64 found = 0;
3911         int err = 0;
3912
3913         for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
3914                 back = rb_node_to_extent_backref(n);
3915                 if (!back->found_extent_tree) {
3916                         err = 1;
3917                         if (!print_errs)
3918                                 goto out;
3919                         if (back->is_data) {
3920                                 dback = to_data_backref(back);
3921                                 fprintf(stderr, "Backref %llu %s %llu"
3922                                         " owner %llu offset %llu num_refs %lu"
3923                                         " not found in extent tree\n",
3924                                         (unsigned long long)rec->start,
3925                                         back->full_backref ?
3926                                         "parent" : "root",
3927                                         back->full_backref ?
3928                                         (unsigned long long)dback->parent:
3929                                         (unsigned long long)dback->root,
3930                                         (unsigned long long)dback->owner,
3931                                         (unsigned long long)dback->offset,
3932                                         (unsigned long)dback->num_refs);
3933                         } else {
3934                                 tback = to_tree_backref(back);
3935                                 fprintf(stderr, "Backref %llu parent %llu"
3936                                         " root %llu not found in extent tree\n",
3937                                         (unsigned long long)rec->start,
3938                                         (unsigned long long)tback->parent,
3939                                         (unsigned long long)tback->root);
3940                         }
3941                 }
3942                 if (!back->is_data && !back->found_ref) {
3943                         err = 1;
3944                         if (!print_errs)
3945                                 goto out;
3946                         tback = to_tree_backref(back);
3947                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3948                                 (unsigned long long)rec->start,
3949                                 back->full_backref ? "parent" : "root",
3950                                 back->full_backref ?
3951                                 (unsigned long long)tback->parent :
3952                                 (unsigned long long)tback->root, back);
3953                 }
3954                 if (back->is_data) {
3955                         dback = to_data_backref(back);
3956                         if (dback->found_ref != dback->num_refs) {
3957                                 err = 1;
3958                                 if (!print_errs)
3959                                         goto out;
3960                                 fprintf(stderr, "Incorrect local backref count"
3961                                         " on %llu %s %llu owner %llu"
3962                                         " offset %llu found %u wanted %u back %p\n",
3963                                         (unsigned long long)rec->start,
3964                                         back->full_backref ?
3965                                         "parent" : "root",
3966                                         back->full_backref ?
3967                                         (unsigned long long)dback->parent:
3968                                         (unsigned long long)dback->root,
3969                                         (unsigned long long)dback->owner,
3970                                         (unsigned long long)dback->offset,
3971                                         dback->found_ref, dback->num_refs, back);
3972                         }
3973                         if (dback->disk_bytenr != rec->start) {
3974                                 err = 1;
3975                                 if (!print_errs)
3976                                         goto out;
3977                                 fprintf(stderr, "Backref disk bytenr does not"
3978                                         " match extent record, bytenr=%llu, "
3979                                         "ref bytenr=%llu\n",
3980                                         (unsigned long long)rec->start,
3981                                         (unsigned long long)dback->disk_bytenr);
3982                         }
3983
3984                         if (dback->bytes != rec->nr) {
3985                                 err = 1;
3986                                 if (!print_errs)
3987                                         goto out;
3988                                 fprintf(stderr, "Backref bytes do not match "
3989                                         "extent backref, bytenr=%llu, ref "
3990                                         "bytes=%llu, backref bytes=%llu\n",
3991                                         (unsigned long long)rec->start,
3992                                         (unsigned long long)rec->nr,
3993                                         (unsigned long long)dback->bytes);
3994                         }
3995                 }
3996                 if (!back->is_data) {
3997                         found += 1;
3998                 } else {
3999                         dback = to_data_backref(back);
4000                         found += dback->found_ref;
4001                 }
4002         }
4003         if (found != rec->refs) {
4004                 err = 1;
4005                 if (!print_errs)
4006                         goto out;
4007                 fprintf(stderr, "Incorrect global backref count "
4008                         "on %llu found %llu wanted %llu\n",
4009                         (unsigned long long)rec->start,
4010                         (unsigned long long)found,
4011                         (unsigned long long)rec->refs);
4012         }
4013 out:
4014         return err;
4015 }
4016
4017 static void __free_one_backref(struct rb_node *node)
4018 {
4019         struct extent_backref *back = rb_node_to_extent_backref(node);
4020
4021         free(back);
4022 }
4023
4024 static void free_all_extent_backrefs(struct extent_record *rec)
4025 {
4026         rb_free_nodes(&rec->backref_tree, __free_one_backref);
4027 }
4028
4029 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4030                                      struct cache_tree *extent_cache)
4031 {
4032         struct cache_extent *cache;
4033         struct extent_record *rec;
4034
4035         while (1) {
4036                 cache = first_cache_extent(extent_cache);
4037                 if (!cache)
4038                         break;
4039                 rec = container_of(cache, struct extent_record, cache);
4040                 remove_cache_extent(extent_cache, cache);
4041                 free_all_extent_backrefs(rec);
4042                 free(rec);
4043         }
4044 }
4045
4046 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4047                                  struct extent_record *rec)
4048 {
4049         if (rec->content_checked && rec->owner_ref_checked &&
4050             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4051             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4052             !rec->bad_full_backref && !rec->crossing_stripes &&
4053             !rec->wrong_chunk_type) {
4054                 remove_cache_extent(extent_cache, &rec->cache);
4055                 free_all_extent_backrefs(rec);
4056                 list_del_init(&rec->list);
4057                 free(rec);
4058         }
4059         return 0;
4060 }
4061
4062 static int check_owner_ref(struct btrfs_root *root,
4063                             struct extent_record *rec,
4064                             struct extent_buffer *buf)
4065 {
4066         struct extent_backref *node, *tmp;
4067         struct tree_backref *back;
4068         struct btrfs_root *ref_root;
4069         struct btrfs_key key;
4070         struct btrfs_path path;
4071         struct extent_buffer *parent;
4072         int level;
4073         int found = 0;
4074         int ret;
4075
4076         rbtree_postorder_for_each_entry_safe(node, tmp,
4077                                              &rec->backref_tree, node) {
4078                 if (node->is_data)
4079                         continue;
4080                 if (!node->found_ref)
4081                         continue;
4082                 if (node->full_backref)
4083                         continue;
4084                 back = to_tree_backref(node);
4085                 if (btrfs_header_owner(buf) == back->root)
4086                         return 0;
4087         }
4088         BUG_ON(rec->is_root);
4089
4090         /* try to find the block by search corresponding fs tree */
4091         key.objectid = btrfs_header_owner(buf);
4092         key.type = BTRFS_ROOT_ITEM_KEY;
4093         key.offset = (u64)-1;
4094
4095         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4096         if (IS_ERR(ref_root))
4097                 return 1;
4098
4099         level = btrfs_header_level(buf);
4100         if (level == 0)
4101                 btrfs_item_key_to_cpu(buf, &key, 0);
4102         else
4103                 btrfs_node_key_to_cpu(buf, &key, 0);
4104
4105         btrfs_init_path(&path);
4106         path.lowest_level = level + 1;
4107         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4108         if (ret < 0)
4109                 return 0;
4110
4111         parent = path.nodes[level + 1];
4112         if (parent && buf->start == btrfs_node_blockptr(parent,
4113                                                         path.slots[level + 1]))
4114                 found = 1;
4115
4116         btrfs_release_path(&path);
4117         return found ? 0 : 1;
4118 }
4119
4120 static int is_extent_tree_record(struct extent_record *rec)
4121 {
4122         struct extent_backref *ref, *tmp;
4123         struct tree_backref *back;
4124         int is_extent = 0;
4125
4126         rbtree_postorder_for_each_entry_safe(ref, tmp,
4127                                              &rec->backref_tree, node) {
4128                 if (ref->is_data)
4129                         return 0;
4130                 back = to_tree_backref(ref);
4131                 if (ref->full_backref)
4132                         return 0;
4133                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4134                         is_extent = 1;
4135         }
4136         return is_extent;
4137 }
4138
4139
4140 static int record_bad_block_io(struct btrfs_fs_info *info,
4141                                struct cache_tree *extent_cache,
4142                                u64 start, u64 len)
4143 {
4144         struct extent_record *rec;
4145         struct cache_extent *cache;
4146         struct btrfs_key key;
4147
4148         cache = lookup_cache_extent(extent_cache, start, len);
4149         if (!cache)
4150                 return 0;
4151
4152         rec = container_of(cache, struct extent_record, cache);
4153         if (!is_extent_tree_record(rec))
4154                 return 0;
4155
4156         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4157         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4158 }
4159
4160 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4161                        struct extent_buffer *buf, int slot)
4162 {
4163         if (btrfs_header_level(buf)) {
4164                 struct btrfs_key_ptr ptr1, ptr2;
4165
4166                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4167                                    sizeof(struct btrfs_key_ptr));
4168                 read_extent_buffer(buf, &ptr2,
4169                                    btrfs_node_key_ptr_offset(slot + 1),
4170                                    sizeof(struct btrfs_key_ptr));
4171                 write_extent_buffer(buf, &ptr1,
4172                                     btrfs_node_key_ptr_offset(slot + 1),
4173                                     sizeof(struct btrfs_key_ptr));
4174                 write_extent_buffer(buf, &ptr2,
4175                                     btrfs_node_key_ptr_offset(slot),
4176                                     sizeof(struct btrfs_key_ptr));
4177                 if (slot == 0) {
4178                         struct btrfs_disk_key key;
4179                         btrfs_node_key(buf, &key, 0);
4180                         btrfs_fixup_low_keys(root, path, &key,
4181                                              btrfs_header_level(buf) + 1);
4182                 }
4183         } else {
4184                 struct btrfs_item *item1, *item2;
4185                 struct btrfs_key k1, k2;
4186                 char *item1_data, *item2_data;
4187                 u32 item1_offset, item2_offset, item1_size, item2_size;
4188
4189                 item1 = btrfs_item_nr(slot);
4190                 item2 = btrfs_item_nr(slot + 1);
4191                 btrfs_item_key_to_cpu(buf, &k1, slot);
4192                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4193                 item1_offset = btrfs_item_offset(buf, item1);
4194                 item2_offset = btrfs_item_offset(buf, item2);
4195                 item1_size = btrfs_item_size(buf, item1);
4196                 item2_size = btrfs_item_size(buf, item2);
4197
4198                 item1_data = malloc(item1_size);
4199                 if (!item1_data)
4200                         return -ENOMEM;
4201                 item2_data = malloc(item2_size);
4202                 if (!item2_data) {
4203                         free(item1_data);
4204                         return -ENOMEM;
4205                 }
4206
4207                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4208                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4209
4210                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4211                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4212                 free(item1_data);
4213                 free(item2_data);
4214
4215                 btrfs_set_item_offset(buf, item1, item2_offset);
4216                 btrfs_set_item_offset(buf, item2, item1_offset);
4217                 btrfs_set_item_size(buf, item1, item2_size);
4218                 btrfs_set_item_size(buf, item2, item1_size);
4219
4220                 path->slots[0] = slot;
4221                 btrfs_set_item_key_unsafe(root, path, &k2);
4222                 path->slots[0] = slot + 1;
4223                 btrfs_set_item_key_unsafe(root, path, &k1);
4224         }
4225         return 0;
4226 }
4227
4228 static int fix_key_order(struct btrfs_trans_handle *trans,
4229                          struct btrfs_root *root,
4230                          struct btrfs_path *path)
4231 {
4232         struct extent_buffer *buf;
4233         struct btrfs_key k1, k2;
4234         int i;
4235         int level = path->lowest_level;
4236         int ret = -EIO;
4237
4238         buf = path->nodes[level];
4239         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4240                 if (level) {
4241                         btrfs_node_key_to_cpu(buf, &k1, i);
4242                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4243                 } else {
4244                         btrfs_item_key_to_cpu(buf, &k1, i);
4245                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4246                 }
4247                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4248                         continue;
4249                 ret = swap_values(root, path, buf, i);
4250                 if (ret)
4251                         break;
4252                 btrfs_mark_buffer_dirty(buf);
4253                 i = 0;
4254         }
4255         return ret;
4256 }
4257
4258 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4259                              struct btrfs_root *root,
4260                              struct btrfs_path *path,
4261                              struct extent_buffer *buf, int slot)
4262 {
4263         struct btrfs_key key;
4264         int nritems = btrfs_header_nritems(buf);
4265
4266         btrfs_item_key_to_cpu(buf, &key, slot);
4267
4268         /* These are all the keys we can deal with missing. */
4269         if (key.type != BTRFS_DIR_INDEX_KEY &&
4270             key.type != BTRFS_EXTENT_ITEM_KEY &&
4271             key.type != BTRFS_METADATA_ITEM_KEY &&
4272             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4273             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4274                 return -1;
4275
4276         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4277                (unsigned long long)key.objectid, key.type,
4278                (unsigned long long)key.offset, slot, buf->start);
4279         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4280                               btrfs_item_nr_offset(slot + 1),
4281                               sizeof(struct btrfs_item) *
4282                               (nritems - slot - 1));
4283         btrfs_set_header_nritems(buf, nritems - 1);
4284         if (slot == 0) {
4285                 struct btrfs_disk_key disk_key;
4286
4287                 btrfs_item_key(buf, &disk_key, 0);
4288                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4289         }
4290         btrfs_mark_buffer_dirty(buf);
4291         return 0;
4292 }
4293
4294 static int fix_item_offset(struct btrfs_trans_handle *trans,
4295                            struct btrfs_root *root,
4296                            struct btrfs_path *path)
4297 {
4298         struct extent_buffer *buf;
4299         int i;
4300         int ret = 0;
4301
4302         /* We should only get this for leaves */
4303         BUG_ON(path->lowest_level);
4304         buf = path->nodes[0];
4305 again:
4306         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4307                 unsigned int shift = 0, offset;
4308
4309                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4310                     BTRFS_LEAF_DATA_SIZE(root)) {
4311                         if (btrfs_item_end_nr(buf, i) >
4312                             BTRFS_LEAF_DATA_SIZE(root)) {
4313                                 ret = delete_bogus_item(trans, root, path,
4314                                                         buf, i);
4315                                 if (!ret)
4316                                         goto again;
4317                                 fprintf(stderr, "item is off the end of the "
4318                                         "leaf, can't fix\n");
4319                                 ret = -EIO;
4320                                 break;
4321                         }
4322                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4323                                 btrfs_item_end_nr(buf, i);
4324                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4325                            btrfs_item_offset_nr(buf, i - 1)) {
4326                         if (btrfs_item_end_nr(buf, i) >
4327                             btrfs_item_offset_nr(buf, i - 1)) {
4328                                 ret = delete_bogus_item(trans, root, path,
4329                                                         buf, i);
4330                                 if (!ret)
4331                                         goto again;
4332                                 fprintf(stderr, "items overlap, can't fix\n");
4333                                 ret = -EIO;
4334                                 break;
4335                         }
4336                         shift = btrfs_item_offset_nr(buf, i - 1) -
4337                                 btrfs_item_end_nr(buf, i);
4338                 }
4339                 if (!shift)
4340                         continue;
4341
4342                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4343                        i, shift, (unsigned long long)buf->start);
4344                 offset = btrfs_item_offset_nr(buf, i);
4345                 memmove_extent_buffer(buf,
4346                                       btrfs_leaf_data(buf) + offset + shift,
4347                                       btrfs_leaf_data(buf) + offset,
4348                                       btrfs_item_size_nr(buf, i));
4349                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4350                                       offset + shift);
4351                 btrfs_mark_buffer_dirty(buf);
4352         }
4353
4354         /*
4355          * We may have moved things, in which case we want to exit so we don't
4356          * write those changes out.  Once we have proper abort functionality in
4357          * progs this can be changed to something nicer.
4358          */
4359         BUG_ON(ret);
4360         return ret;
4361 }
4362
4363 /*
4364  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4365  * then just return -EIO.
4366  */
4367 static int try_to_fix_bad_block(struct btrfs_root *root,
4368                                 struct extent_buffer *buf,
4369                                 enum btrfs_tree_block_status status)
4370 {
4371         struct btrfs_trans_handle *trans;
4372         struct ulist *roots;
4373         struct ulist_node *node;
4374         struct btrfs_root *search_root;
4375         struct btrfs_path *path;
4376         struct ulist_iterator iter;
4377         struct btrfs_key root_key, key;
4378         int ret;
4379
4380         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4381             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4382                 return -EIO;
4383
4384         path = btrfs_alloc_path();
4385         if (!path)
4386                 return -EIO;
4387
4388         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4389                                    0, &roots);
4390         if (ret) {
4391                 btrfs_free_path(path);
4392                 return -EIO;
4393         }
4394
4395         ULIST_ITER_INIT(&iter);
4396         while ((node = ulist_next(roots, &iter))) {
4397                 root_key.objectid = node->val;
4398                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4399                 root_key.offset = (u64)-1;
4400
4401                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4402                 if (IS_ERR(root)) {
4403                         ret = -EIO;
4404                         break;
4405                 }
4406
4407
4408                 trans = btrfs_start_transaction(search_root, 0);
4409                 if (IS_ERR(trans)) {
4410                         ret = PTR_ERR(trans);
4411                         break;
4412                 }
4413
4414                 path->lowest_level = btrfs_header_level(buf);
4415                 path->skip_check_block = 1;
4416                 if (path->lowest_level)
4417                         btrfs_node_key_to_cpu(buf, &key, 0);
4418                 else
4419                         btrfs_item_key_to_cpu(buf, &key, 0);
4420                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4421                 if (ret) {
4422                         ret = -EIO;
4423                         btrfs_commit_transaction(trans, search_root);
4424                         break;
4425                 }
4426                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4427                         ret = fix_key_order(trans, search_root, path);
4428                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4429                         ret = fix_item_offset(trans, search_root, path);
4430                 if (ret) {
4431                         btrfs_commit_transaction(trans, search_root);
4432                         break;
4433                 }
4434                 btrfs_release_path(path);
4435                 btrfs_commit_transaction(trans, search_root);
4436         }
4437         ulist_free(roots);
4438         btrfs_free_path(path);
4439         return ret;
4440 }
4441
4442 static int check_block(struct btrfs_root *root,
4443                        struct cache_tree *extent_cache,
4444                        struct extent_buffer *buf, u64 flags)
4445 {
4446         struct extent_record *rec;
4447         struct cache_extent *cache;
4448         struct btrfs_key key;
4449         enum btrfs_tree_block_status status;
4450         int ret = 0;
4451         int level;
4452
4453         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4454         if (!cache)
4455                 return 1;
4456         rec = container_of(cache, struct extent_record, cache);
4457         rec->generation = btrfs_header_generation(buf);
4458
4459         level = btrfs_header_level(buf);
4460         if (btrfs_header_nritems(buf) > 0) {
4461
4462                 if (level == 0)
4463                         btrfs_item_key_to_cpu(buf, &key, 0);
4464                 else
4465                         btrfs_node_key_to_cpu(buf, &key, 0);
4466
4467                 rec->info_objectid = key.objectid;
4468         }
4469         rec->info_level = level;
4470
4471         if (btrfs_is_leaf(buf))
4472                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4473         else
4474                 status = btrfs_check_node(root, &rec->parent_key, buf);
4475
4476         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4477                 if (repair)
4478                         status = try_to_fix_bad_block(root, buf, status);
4479                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4480                         ret = -EIO;
4481                         fprintf(stderr, "bad block %llu\n",
4482                                 (unsigned long long)buf->start);
4483                 } else {
4484                         /*
4485                          * Signal to callers we need to start the scan over
4486                          * again since we'll have cowed blocks.
4487                          */
4488                         ret = -EAGAIN;
4489                 }
4490         } else {
4491                 rec->content_checked = 1;
4492                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4493                         rec->owner_ref_checked = 1;
4494                 else {
4495                         ret = check_owner_ref(root, rec, buf);
4496                         if (!ret)
4497                                 rec->owner_ref_checked = 1;
4498                 }
4499         }
4500         if (!ret)
4501                 maybe_free_extent_rec(extent_cache, rec);
4502         return ret;
4503 }
4504
4505
4506 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4507                                                 u64 parent, u64 root)
4508 {
4509         struct rb_node *node;
4510         struct tree_backref *back = NULL;
4511         struct tree_backref match = {
4512                 .node = {
4513                         .is_data = 0,
4514                 },
4515         };
4516
4517         if (parent) {
4518                 match.parent = parent;
4519                 match.node.full_backref = 1;
4520         } else {
4521                 match.root = root;
4522         }
4523
4524         node = rb_search(&rec->backref_tree, &match.node.node,
4525                          (rb_compare_keys)compare_extent_backref, NULL);
4526         if (node)
4527                 back = to_tree_backref(rb_node_to_extent_backref(node));
4528
4529         return back;
4530 }
4531
4532 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4533                                                 u64 parent, u64 root)
4534 {
4535         struct tree_backref *ref = malloc(sizeof(*ref));
4536
4537         if (!ref)
4538                 return NULL;
4539         memset(&ref->node, 0, sizeof(ref->node));
4540         if (parent > 0) {
4541                 ref->parent = parent;
4542                 ref->node.full_backref = 1;
4543         } else {
4544                 ref->root = root;
4545                 ref->node.full_backref = 0;
4546         }
4547         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4548
4549         return ref;
4550 }
4551
4552 static struct data_backref *find_data_backref(struct extent_record *rec,
4553                                                 u64 parent, u64 root,
4554                                                 u64 owner, u64 offset,
4555                                                 int found_ref,
4556                                                 u64 disk_bytenr, u64 bytes)
4557 {
4558         struct rb_node *node;
4559         struct data_backref *back = NULL;
4560         struct data_backref match = {
4561                 .node = {
4562                         .is_data = 1,
4563                 },
4564                 .owner = owner,
4565                 .offset = offset,
4566                 .bytes = bytes,
4567                 .found_ref = found_ref,
4568                 .disk_bytenr = disk_bytenr,
4569         };
4570
4571         if (parent) {
4572                 match.parent = parent;
4573                 match.node.full_backref = 1;
4574         } else {
4575                 match.root = root;
4576         }
4577
4578         node = rb_search(&rec->backref_tree, &match.node.node,
4579                          (rb_compare_keys)compare_extent_backref, NULL);
4580         if (node)
4581                 back = to_data_backref(rb_node_to_extent_backref(node));
4582
4583         return back;
4584 }
4585
4586 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4587                                                 u64 parent, u64 root,
4588                                                 u64 owner, u64 offset,
4589                                                 u64 max_size)
4590 {
4591         struct data_backref *ref = malloc(sizeof(*ref));
4592
4593         if (!ref)
4594                 return NULL;
4595         memset(&ref->node, 0, sizeof(ref->node));
4596         ref->node.is_data = 1;
4597
4598         if (parent > 0) {
4599                 ref->parent = parent;
4600                 ref->owner = 0;
4601                 ref->offset = 0;
4602                 ref->node.full_backref = 1;
4603         } else {
4604                 ref->root = root;
4605                 ref->owner = owner;
4606                 ref->offset = offset;
4607                 ref->node.full_backref = 0;
4608         }
4609         ref->bytes = max_size;
4610         ref->found_ref = 0;
4611         ref->num_refs = 0;
4612         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4613         if (max_size > rec->max_size)
4614                 rec->max_size = max_size;
4615         return ref;
4616 }
4617
4618 /* Check if the type of extent matches with its chunk */
4619 static void check_extent_type(struct extent_record *rec)
4620 {
4621         struct btrfs_block_group_cache *bg_cache;
4622
4623         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4624         if (!bg_cache)
4625                 return;
4626
4627         /* data extent, check chunk directly*/
4628         if (!rec->metadata) {
4629                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4630                         rec->wrong_chunk_type = 1;
4631                 return;
4632         }
4633
4634         /* metadata extent, check the obvious case first */
4635         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4636                                  BTRFS_BLOCK_GROUP_METADATA))) {
4637                 rec->wrong_chunk_type = 1;
4638                 return;
4639         }
4640
4641         /*
4642          * Check SYSTEM extent, as it's also marked as metadata, we can only
4643          * make sure it's a SYSTEM extent by its backref
4644          */
4645         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4646                 struct extent_backref *node;
4647                 struct tree_backref *tback;
4648                 u64 bg_type;
4649
4650                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4651                 if (node->is_data) {
4652                         /* tree block shouldn't have data backref */
4653                         rec->wrong_chunk_type = 1;
4654                         return;
4655                 }
4656                 tback = container_of(node, struct tree_backref, node);
4657
4658                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4659                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4660                 else
4661                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4662                 if (!(bg_cache->flags & bg_type))
4663                         rec->wrong_chunk_type = 1;
4664         }
4665 }
4666
4667 /*
4668  * Allocate a new extent record, fill default values from @tmpl and insert int
4669  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4670  * the cache, otherwise it fails.
4671  */
4672 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4673                 struct extent_record *tmpl)
4674 {
4675         struct extent_record *rec;
4676         int ret = 0;
4677
4678         rec = malloc(sizeof(*rec));
4679         if (!rec)
4680                 return -ENOMEM;
4681         rec->start = tmpl->start;
4682         rec->max_size = tmpl->max_size;
4683         rec->nr = max(tmpl->nr, tmpl->max_size);
4684         rec->found_rec = tmpl->found_rec;
4685         rec->content_checked = tmpl->content_checked;
4686         rec->owner_ref_checked = tmpl->owner_ref_checked;
4687         rec->num_duplicates = 0;
4688         rec->metadata = tmpl->metadata;
4689         rec->flag_block_full_backref = FLAG_UNSET;
4690         rec->bad_full_backref = 0;
4691         rec->crossing_stripes = 0;
4692         rec->wrong_chunk_type = 0;
4693         rec->is_root = tmpl->is_root;
4694         rec->refs = tmpl->refs;
4695         rec->extent_item_refs = tmpl->extent_item_refs;
4696         rec->parent_generation = tmpl->parent_generation;
4697         INIT_LIST_HEAD(&rec->backrefs);
4698         INIT_LIST_HEAD(&rec->dups);
4699         INIT_LIST_HEAD(&rec->list);
4700         rec->backref_tree = RB_ROOT;
4701         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4702         rec->cache.start = tmpl->start;
4703         rec->cache.size = tmpl->nr;
4704         ret = insert_cache_extent(extent_cache, &rec->cache);
4705         BUG_ON(ret);
4706         bytes_used += rec->nr;
4707
4708         if (tmpl->metadata)
4709                 rec->crossing_stripes = check_crossing_stripes(rec->start,
4710                                 global_info->tree_root->nodesize);
4711         check_extent_type(rec);
4712         return ret;
4713 }
4714
4715 /*
4716  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4717  * some are hints:
4718  * - refs              - if found, increase refs
4719  * - is_root           - if found, set
4720  * - content_checked   - if found, set
4721  * - owner_ref_checked - if found, set
4722  *
4723  * If not found, create a new one, initialize and insert.
4724  */
4725 static int add_extent_rec(struct cache_tree *extent_cache,
4726                 struct extent_record *tmpl)
4727 {
4728         struct extent_record *rec;
4729         struct cache_extent *cache;
4730         int ret = 0;
4731         int dup = 0;
4732
4733         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4734         if (cache) {
4735                 rec = container_of(cache, struct extent_record, cache);
4736                 if (tmpl->refs)
4737                         rec->refs++;
4738                 if (rec->nr == 1)
4739                         rec->nr = max(tmpl->nr, tmpl->max_size);
4740
4741                 /*
4742                  * We need to make sure to reset nr to whatever the extent
4743                  * record says was the real size, this way we can compare it to
4744                  * the backrefs.
4745                  */
4746                 if (tmpl->found_rec) {
4747                         if (tmpl->start != rec->start || rec->found_rec) {
4748                                 struct extent_record *tmp;
4749
4750                                 dup = 1;
4751                                 if (list_empty(&rec->list))
4752                                         list_add_tail(&rec->list,
4753                                                       &duplicate_extents);
4754
4755                                 /*
4756                                  * We have to do this song and dance in case we
4757                                  * find an extent record that falls inside of
4758                                  * our current extent record but does not have
4759                                  * the same objectid.
4760                                  */
4761                                 tmp = malloc(sizeof(*tmp));
4762                                 if (!tmp)
4763                                         return -ENOMEM;
4764                                 tmp->start = tmpl->start;
4765                                 tmp->max_size = tmpl->max_size;
4766                                 tmp->nr = tmpl->nr;
4767                                 tmp->found_rec = 1;
4768                                 tmp->metadata = tmpl->metadata;
4769                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4770                                 INIT_LIST_HEAD(&tmp->list);
4771                                 list_add_tail(&tmp->list, &rec->dups);
4772                                 rec->num_duplicates++;
4773                         } else {
4774                                 rec->nr = tmpl->nr;
4775                                 rec->found_rec = 1;
4776                         }
4777                 }
4778
4779                 if (tmpl->extent_item_refs && !dup) {
4780                         if (rec->extent_item_refs) {
4781                                 fprintf(stderr, "block %llu rec "
4782                                         "extent_item_refs %llu, passed %llu\n",
4783                                         (unsigned long long)tmpl->start,
4784                                         (unsigned long long)
4785                                                         rec->extent_item_refs,
4786                                         (unsigned long long)tmpl->extent_item_refs);
4787                         }
4788                         rec->extent_item_refs = tmpl->extent_item_refs;
4789                 }
4790                 if (tmpl->is_root)
4791                         rec->is_root = 1;
4792                 if (tmpl->content_checked)
4793                         rec->content_checked = 1;
4794                 if (tmpl->owner_ref_checked)
4795                         rec->owner_ref_checked = 1;
4796                 memcpy(&rec->parent_key, &tmpl->parent_key,
4797                                 sizeof(tmpl->parent_key));
4798                 if (tmpl->parent_generation)
4799                         rec->parent_generation = tmpl->parent_generation;
4800                 if (rec->max_size < tmpl->max_size)
4801                         rec->max_size = tmpl->max_size;
4802
4803                 /*
4804                  * A metadata extent can't cross stripe_len boundary, otherwise
4805                  * kernel scrub won't be able to handle it.
4806                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4807                  * it.
4808                  */
4809                 if (tmpl->metadata)
4810                         rec->crossing_stripes = check_crossing_stripes(
4811                                 rec->start, global_info->tree_root->nodesize);
4812                 check_extent_type(rec);
4813                 maybe_free_extent_rec(extent_cache, rec);
4814                 return ret;
4815         }
4816
4817         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4818
4819         return ret;
4820 }
4821
4822 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4823                             u64 parent, u64 root, int found_ref)
4824 {
4825         struct extent_record *rec;
4826         struct tree_backref *back;
4827         struct cache_extent *cache;
4828
4829         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4830         if (!cache) {
4831                 struct extent_record tmpl;
4832
4833                 memset(&tmpl, 0, sizeof(tmpl));
4834                 tmpl.start = bytenr;
4835                 tmpl.nr = 1;
4836                 tmpl.metadata = 1;
4837
4838                 add_extent_rec_nolookup(extent_cache, &tmpl);
4839
4840                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4841                 if (!cache)
4842                         abort();
4843         }
4844
4845         rec = container_of(cache, struct extent_record, cache);
4846         if (rec->start != bytenr) {
4847                 abort();
4848         }
4849
4850         back = find_tree_backref(rec, parent, root);
4851         if (!back) {
4852                 back = alloc_tree_backref(rec, parent, root);
4853                 BUG_ON(!back);
4854         }
4855
4856         if (found_ref) {
4857                 if (back->node.found_ref) {
4858                         fprintf(stderr, "Extent back ref already exists "
4859                                 "for %llu parent %llu root %llu \n",
4860                                 (unsigned long long)bytenr,
4861                                 (unsigned long long)parent,
4862                                 (unsigned long long)root);
4863                 }
4864                 back->node.found_ref = 1;
4865         } else {
4866                 if (back->node.found_extent_tree) {
4867                         fprintf(stderr, "Extent back ref already exists "
4868                                 "for %llu parent %llu root %llu \n",
4869                                 (unsigned long long)bytenr,
4870                                 (unsigned long long)parent,
4871                                 (unsigned long long)root);
4872                 }
4873                 back->node.found_extent_tree = 1;
4874         }
4875         check_extent_type(rec);
4876         maybe_free_extent_rec(extent_cache, rec);
4877         return 0;
4878 }
4879
4880 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4881                             u64 parent, u64 root, u64 owner, u64 offset,
4882                             u32 num_refs, int found_ref, u64 max_size)
4883 {
4884         struct extent_record *rec;
4885         struct data_backref *back;
4886         struct cache_extent *cache;
4887
4888         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4889         if (!cache) {
4890                 struct extent_record tmpl;
4891
4892                 memset(&tmpl, 0, sizeof(tmpl));
4893                 tmpl.start = bytenr;
4894                 tmpl.nr = 1;
4895                 tmpl.max_size = max_size;
4896
4897                 add_extent_rec_nolookup(extent_cache, &tmpl);
4898
4899                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4900                 if (!cache)
4901                         abort();
4902         }
4903
4904         rec = container_of(cache, struct extent_record, cache);
4905         if (rec->max_size < max_size)
4906                 rec->max_size = max_size;
4907
4908         /*
4909          * If found_ref is set then max_size is the real size and must match the
4910          * existing refs.  So if we have already found a ref then we need to
4911          * make sure that this ref matches the existing one, otherwise we need
4912          * to add a new backref so we can notice that the backrefs don't match
4913          * and we need to figure out who is telling the truth.  This is to
4914          * account for that awful fsync bug I introduced where we'd end up with
4915          * a btrfs_file_extent_item that would have its length include multiple
4916          * prealloc extents or point inside of a prealloc extent.
4917          */
4918         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4919                                  bytenr, max_size);
4920         if (!back) {
4921                 back = alloc_data_backref(rec, parent, root, owner, offset,
4922                                           max_size);
4923                 BUG_ON(!back);
4924         }
4925
4926         if (found_ref) {
4927                 BUG_ON(num_refs != 1);
4928                 if (back->node.found_ref)
4929                         BUG_ON(back->bytes != max_size);
4930                 back->node.found_ref = 1;
4931                 back->found_ref += 1;
4932                 back->bytes = max_size;
4933                 back->disk_bytenr = bytenr;
4934                 rec->refs += 1;
4935                 rec->content_checked = 1;
4936                 rec->owner_ref_checked = 1;
4937         } else {
4938                 if (back->node.found_extent_tree) {
4939                         fprintf(stderr, "Extent back ref already exists "
4940                                 "for %llu parent %llu root %llu "
4941                                 "owner %llu offset %llu num_refs %lu\n",
4942                                 (unsigned long long)bytenr,
4943                                 (unsigned long long)parent,
4944                                 (unsigned long long)root,
4945                                 (unsigned long long)owner,
4946                                 (unsigned long long)offset,
4947                                 (unsigned long)num_refs);
4948                 }
4949                 back->num_refs = num_refs;
4950                 back->node.found_extent_tree = 1;
4951         }
4952         maybe_free_extent_rec(extent_cache, rec);
4953         return 0;
4954 }
4955
4956 static int add_pending(struct cache_tree *pending,
4957                        struct cache_tree *seen, u64 bytenr, u32 size)
4958 {
4959         int ret;
4960         ret = add_cache_extent(seen, bytenr, size);
4961         if (ret)
4962                 return ret;
4963         add_cache_extent(pending, bytenr, size);
4964         return 0;
4965 }
4966
4967 static int pick_next_pending(struct cache_tree *pending,
4968                         struct cache_tree *reada,
4969                         struct cache_tree *nodes,
4970                         u64 last, struct block_info *bits, int bits_nr,
4971                         int *reada_bits)
4972 {
4973         unsigned long node_start = last;
4974         struct cache_extent *cache;
4975         int ret;
4976
4977         cache = search_cache_extent(reada, 0);
4978         if (cache) {
4979                 bits[0].start = cache->start;
4980                 bits[0].size = cache->size;
4981                 *reada_bits = 1;
4982                 return 1;
4983         }
4984         *reada_bits = 0;
4985         if (node_start > 32768)
4986                 node_start -= 32768;
4987
4988         cache = search_cache_extent(nodes, node_start);
4989         if (!cache)
4990                 cache = search_cache_extent(nodes, 0);
4991
4992         if (!cache) {
4993                  cache = search_cache_extent(pending, 0);
4994                  if (!cache)
4995                          return 0;
4996                  ret = 0;
4997                  do {
4998                          bits[ret].start = cache->start;
4999                          bits[ret].size = cache->size;
5000                          cache = next_cache_extent(cache);
5001                          ret++;
5002                  } while (cache && ret < bits_nr);
5003                  return ret;
5004         }
5005
5006         ret = 0;
5007         do {
5008                 bits[ret].start = cache->start;
5009                 bits[ret].size = cache->size;
5010                 cache = next_cache_extent(cache);
5011                 ret++;
5012         } while (cache && ret < bits_nr);
5013
5014         if (bits_nr - ret > 8) {
5015                 u64 lookup = bits[0].start + bits[0].size;
5016                 struct cache_extent *next;
5017                 next = search_cache_extent(pending, lookup);
5018                 while(next) {
5019                         if (next->start - lookup > 32768)
5020                                 break;
5021                         bits[ret].start = next->start;
5022                         bits[ret].size = next->size;
5023                         lookup = next->start + next->size;
5024                         ret++;
5025                         if (ret == bits_nr)
5026                                 break;
5027                         next = next_cache_extent(next);
5028                         if (!next)
5029                                 break;
5030                 }
5031         }
5032         return ret;
5033 }
5034
5035 static void free_chunk_record(struct cache_extent *cache)
5036 {
5037         struct chunk_record *rec;
5038
5039         rec = container_of(cache, struct chunk_record, cache);
5040         list_del_init(&rec->list);
5041         list_del_init(&rec->dextents);
5042         free(rec);
5043 }
5044
5045 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5046 {
5047         cache_tree_free_extents(chunk_cache, free_chunk_record);
5048 }
5049
5050 static void free_device_record(struct rb_node *node)
5051 {
5052         struct device_record *rec;
5053
5054         rec = container_of(node, struct device_record, node);
5055         free(rec);
5056 }
5057
5058 FREE_RB_BASED_TREE(device_cache, free_device_record);
5059
5060 int insert_block_group_record(struct block_group_tree *tree,
5061                               struct block_group_record *bg_rec)
5062 {
5063         int ret;
5064
5065         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5066         if (ret)
5067                 return ret;
5068
5069         list_add_tail(&bg_rec->list, &tree->block_groups);
5070         return 0;
5071 }
5072
5073 static void free_block_group_record(struct cache_extent *cache)
5074 {
5075         struct block_group_record *rec;
5076
5077         rec = container_of(cache, struct block_group_record, cache);
5078         list_del_init(&rec->list);
5079         free(rec);
5080 }
5081
5082 void free_block_group_tree(struct block_group_tree *tree)
5083 {
5084         cache_tree_free_extents(&tree->tree, free_block_group_record);
5085 }
5086
5087 int insert_device_extent_record(struct device_extent_tree *tree,
5088                                 struct device_extent_record *de_rec)
5089 {
5090         int ret;
5091
5092         /*
5093          * Device extent is a bit different from the other extents, because
5094          * the extents which belong to the different devices may have the
5095          * same start and size, so we need use the special extent cache
5096          * search/insert functions.
5097          */
5098         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5099         if (ret)
5100                 return ret;
5101
5102         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5103         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5104         return 0;
5105 }
5106
5107 static void free_device_extent_record(struct cache_extent *cache)
5108 {
5109         struct device_extent_record *rec;
5110
5111         rec = container_of(cache, struct device_extent_record, cache);
5112         if (!list_empty(&rec->chunk_list))
5113                 list_del_init(&rec->chunk_list);
5114         if (!list_empty(&rec->device_list))
5115                 list_del_init(&rec->device_list);
5116         free(rec);
5117 }
5118
5119 void free_device_extent_tree(struct device_extent_tree *tree)
5120 {
5121         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5122 }
5123
5124 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5125 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5126                                  struct extent_buffer *leaf, int slot)
5127 {
5128         struct btrfs_extent_ref_v0 *ref0;
5129         struct btrfs_key key;
5130
5131         btrfs_item_key_to_cpu(leaf, &key, slot);
5132         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5133         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5134                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5135         } else {
5136                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5137                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5138         }
5139         return 0;
5140 }
5141 #endif
5142
5143 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5144                                             struct btrfs_key *key,
5145                                             int slot)
5146 {
5147         struct btrfs_chunk *ptr;
5148         struct chunk_record *rec;
5149         int num_stripes, i;
5150
5151         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5152         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5153
5154         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5155         if (!rec) {
5156                 fprintf(stderr, "memory allocation failed\n");
5157                 exit(-1);
5158         }
5159
5160         INIT_LIST_HEAD(&rec->list);
5161         INIT_LIST_HEAD(&rec->dextents);
5162         rec->bg_rec = NULL;
5163
5164         rec->cache.start = key->offset;
5165         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5166
5167         rec->generation = btrfs_header_generation(leaf);
5168
5169         rec->objectid = key->objectid;
5170         rec->type = key->type;
5171         rec->offset = key->offset;
5172
5173         rec->length = rec->cache.size;
5174         rec->owner = btrfs_chunk_owner(leaf, ptr);
5175         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5176         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5177         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5178         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5179         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5180         rec->num_stripes = num_stripes;
5181         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5182
5183         for (i = 0; i < rec->num_stripes; ++i) {
5184                 rec->stripes[i].devid =
5185                         btrfs_stripe_devid_nr(leaf, ptr, i);
5186                 rec->stripes[i].offset =
5187                         btrfs_stripe_offset_nr(leaf, ptr, i);
5188                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5189                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5190                                 BTRFS_UUID_SIZE);
5191         }
5192
5193         return rec;
5194 }
5195
5196 static int process_chunk_item(struct cache_tree *chunk_cache,
5197                               struct btrfs_key *key, struct extent_buffer *eb,
5198                               int slot)
5199 {
5200         struct chunk_record *rec;
5201         int ret = 0;
5202
5203         rec = btrfs_new_chunk_record(eb, key, slot);
5204         ret = insert_cache_extent(chunk_cache, &rec->cache);
5205         if (ret) {
5206                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5207                         rec->offset, rec->length);
5208                 free(rec);
5209         }
5210
5211         return ret;
5212 }
5213
5214 static int process_device_item(struct rb_root *dev_cache,
5215                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5216 {
5217         struct btrfs_dev_item *ptr;
5218         struct device_record *rec;
5219         int ret = 0;
5220
5221         ptr = btrfs_item_ptr(eb,
5222                 slot, struct btrfs_dev_item);
5223
5224         rec = malloc(sizeof(*rec));
5225         if (!rec) {
5226                 fprintf(stderr, "memory allocation failed\n");
5227                 return -ENOMEM;
5228         }
5229
5230         rec->devid = key->offset;
5231         rec->generation = btrfs_header_generation(eb);
5232
5233         rec->objectid = key->objectid;
5234         rec->type = key->type;
5235         rec->offset = key->offset;
5236
5237         rec->devid = btrfs_device_id(eb, ptr);
5238         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5239         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5240
5241         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5242         if (ret) {
5243                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5244                 free(rec);
5245         }
5246
5247         return ret;
5248 }
5249
5250 struct block_group_record *
5251 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5252                              int slot)
5253 {
5254         struct btrfs_block_group_item *ptr;
5255         struct block_group_record *rec;
5256
5257         rec = calloc(1, sizeof(*rec));
5258         if (!rec) {
5259                 fprintf(stderr, "memory allocation failed\n");
5260                 exit(-1);
5261         }
5262
5263         rec->cache.start = key->objectid;
5264         rec->cache.size = key->offset;
5265
5266         rec->generation = btrfs_header_generation(leaf);
5267
5268         rec->objectid = key->objectid;
5269         rec->type = key->type;
5270         rec->offset = key->offset;
5271
5272         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5273         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5274
5275         INIT_LIST_HEAD(&rec->list);
5276
5277         return rec;
5278 }
5279
5280 static int process_block_group_item(struct block_group_tree *block_group_cache,
5281                                     struct btrfs_key *key,
5282                                     struct extent_buffer *eb, int slot)
5283 {
5284         struct block_group_record *rec;
5285         int ret = 0;
5286
5287         rec = btrfs_new_block_group_record(eb, key, slot);
5288         ret = insert_block_group_record(block_group_cache, rec);
5289         if (ret) {
5290                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5291                         rec->objectid, rec->offset);
5292                 free(rec);
5293         }
5294
5295         return ret;
5296 }
5297
5298 struct device_extent_record *
5299 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5300                                struct btrfs_key *key, int slot)
5301 {
5302         struct device_extent_record *rec;
5303         struct btrfs_dev_extent *ptr;
5304
5305         rec = calloc(1, sizeof(*rec));
5306         if (!rec) {
5307                 fprintf(stderr, "memory allocation failed\n");
5308                 exit(-1);
5309         }
5310
5311         rec->cache.objectid = key->objectid;
5312         rec->cache.start = key->offset;
5313
5314         rec->generation = btrfs_header_generation(leaf);
5315
5316         rec->objectid = key->objectid;
5317         rec->type = key->type;
5318         rec->offset = key->offset;
5319
5320         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5321         rec->chunk_objecteid =
5322                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5323         rec->chunk_offset =
5324                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5325         rec->length = btrfs_dev_extent_length(leaf, ptr);
5326         rec->cache.size = rec->length;
5327
5328         INIT_LIST_HEAD(&rec->chunk_list);
5329         INIT_LIST_HEAD(&rec->device_list);
5330
5331         return rec;
5332 }
5333
5334 static int
5335 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5336                            struct btrfs_key *key, struct extent_buffer *eb,
5337                            int slot)
5338 {
5339         struct device_extent_record *rec;
5340         int ret;
5341
5342         rec = btrfs_new_device_extent_record(eb, key, slot);
5343         ret = insert_device_extent_record(dev_extent_cache, rec);
5344         if (ret) {
5345                 fprintf(stderr,
5346                         "Device extent[%llu, %llu, %llu] existed.\n",
5347                         rec->objectid, rec->offset, rec->length);
5348                 free(rec);
5349         }
5350
5351         return ret;
5352 }
5353
5354 static int process_extent_item(struct btrfs_root *root,
5355                                struct cache_tree *extent_cache,
5356                                struct extent_buffer *eb, int slot)
5357 {
5358         struct btrfs_extent_item *ei;
5359         struct btrfs_extent_inline_ref *iref;
5360         struct btrfs_extent_data_ref *dref;
5361         struct btrfs_shared_data_ref *sref;
5362         struct btrfs_key key;
5363         struct extent_record tmpl;
5364         unsigned long end;
5365         unsigned long ptr;
5366         int type;
5367         u32 item_size = btrfs_item_size_nr(eb, slot);
5368         u64 refs = 0;
5369         u64 offset;
5370         u64 num_bytes;
5371         int metadata = 0;
5372
5373         btrfs_item_key_to_cpu(eb, &key, slot);
5374
5375         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5376                 metadata = 1;
5377                 num_bytes = root->nodesize;
5378         } else {
5379                 num_bytes = key.offset;
5380         }
5381
5382         if (item_size < sizeof(*ei)) {
5383 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5384                 struct btrfs_extent_item_v0 *ei0;
5385                 BUG_ON(item_size != sizeof(*ei0));
5386                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5387                 refs = btrfs_extent_refs_v0(eb, ei0);
5388 #else
5389                 BUG();
5390 #endif
5391                 memset(&tmpl, 0, sizeof(tmpl));
5392                 tmpl.start = key.objectid;
5393                 tmpl.nr = num_bytes;
5394                 tmpl.extent_item_refs = refs;
5395                 tmpl.metadata = metadata;
5396                 tmpl.found_rec = 1;
5397                 tmpl.max_size = num_bytes;
5398
5399                 return add_extent_rec(extent_cache, &tmpl);
5400         }
5401
5402         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5403         refs = btrfs_extent_refs(eb, ei);
5404         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5405                 metadata = 1;
5406         else
5407                 metadata = 0;
5408
5409         memset(&tmpl, 0, sizeof(tmpl));
5410         tmpl.start = key.objectid;
5411         tmpl.nr = num_bytes;
5412         tmpl.extent_item_refs = refs;
5413         tmpl.metadata = metadata;
5414         tmpl.found_rec = 1;
5415         tmpl.max_size = num_bytes;
5416         add_extent_rec(extent_cache, &tmpl);
5417
5418         ptr = (unsigned long)(ei + 1);
5419         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5420             key.type == BTRFS_EXTENT_ITEM_KEY)
5421                 ptr += sizeof(struct btrfs_tree_block_info);
5422
5423         end = (unsigned long)ei + item_size;
5424         while (ptr < end) {
5425                 iref = (struct btrfs_extent_inline_ref *)ptr;
5426                 type = btrfs_extent_inline_ref_type(eb, iref);
5427                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5428                 switch (type) {
5429                 case BTRFS_TREE_BLOCK_REF_KEY:
5430                         add_tree_backref(extent_cache, key.objectid,
5431                                          0, offset, 0);
5432                         break;
5433                 case BTRFS_SHARED_BLOCK_REF_KEY:
5434                         add_tree_backref(extent_cache, key.objectid,
5435                                          offset, 0, 0);
5436                         break;
5437                 case BTRFS_EXTENT_DATA_REF_KEY:
5438                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5439                         add_data_backref(extent_cache, key.objectid, 0,
5440                                         btrfs_extent_data_ref_root(eb, dref),
5441                                         btrfs_extent_data_ref_objectid(eb,
5442                                                                        dref),
5443                                         btrfs_extent_data_ref_offset(eb, dref),
5444                                         btrfs_extent_data_ref_count(eb, dref),
5445                                         0, num_bytes);
5446                         break;
5447                 case BTRFS_SHARED_DATA_REF_KEY:
5448                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5449                         add_data_backref(extent_cache, key.objectid, offset,
5450                                         0, 0, 0,
5451                                         btrfs_shared_data_ref_count(eb, sref),
5452                                         0, num_bytes);
5453                         break;
5454                 default:
5455                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5456                                 key.objectid, key.type, num_bytes);
5457                         goto out;
5458                 }
5459                 ptr += btrfs_extent_inline_ref_size(type);
5460         }
5461         WARN_ON(ptr > end);
5462 out:
5463         return 0;
5464 }
5465
5466 static int check_cache_range(struct btrfs_root *root,
5467                              struct btrfs_block_group_cache *cache,
5468                              u64 offset, u64 bytes)
5469 {
5470         struct btrfs_free_space *entry;
5471         u64 *logical;
5472         u64 bytenr;
5473         int stripe_len;
5474         int i, nr, ret;
5475
5476         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5477                 bytenr = btrfs_sb_offset(i);
5478                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5479                                        cache->key.objectid, bytenr, 0,
5480                                        &logical, &nr, &stripe_len);
5481                 if (ret)
5482                         return ret;
5483
5484                 while (nr--) {
5485                         if (logical[nr] + stripe_len <= offset)
5486                                 continue;
5487                         if (offset + bytes <= logical[nr])
5488                                 continue;
5489                         if (logical[nr] == offset) {
5490                                 if (stripe_len >= bytes) {
5491                                         kfree(logical);
5492                                         return 0;
5493                                 }
5494                                 bytes -= stripe_len;
5495                                 offset += stripe_len;
5496                         } else if (logical[nr] < offset) {
5497                                 if (logical[nr] + stripe_len >=
5498                                     offset + bytes) {
5499                                         kfree(logical);
5500                                         return 0;
5501                                 }
5502                                 bytes = (offset + bytes) -
5503                                         (logical[nr] + stripe_len);
5504                                 offset = logical[nr] + stripe_len;
5505                         } else {
5506                                 /*
5507                                  * Could be tricky, the super may land in the
5508                                  * middle of the area we're checking.  First
5509                                  * check the easiest case, it's at the end.
5510                                  */
5511                                 if (logical[nr] + stripe_len >=
5512                                     bytes + offset) {
5513                                         bytes = logical[nr] - offset;
5514                                         continue;
5515                                 }
5516
5517                                 /* Check the left side */
5518                                 ret = check_cache_range(root, cache,
5519                                                         offset,
5520                                                         logical[nr] - offset);
5521                                 if (ret) {
5522                                         kfree(logical);
5523                                         return ret;
5524                                 }
5525
5526                                 /* Now we continue with the right side */
5527                                 bytes = (offset + bytes) -
5528                                         (logical[nr] + stripe_len);
5529                                 offset = logical[nr] + stripe_len;
5530                         }
5531                 }
5532
5533                 kfree(logical);
5534         }
5535
5536         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5537         if (!entry) {
5538                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5539                         offset, offset+bytes);
5540                 return -EINVAL;
5541         }
5542
5543         if (entry->offset != offset) {
5544                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5545                         entry->offset);
5546                 return -EINVAL;
5547         }
5548
5549         if (entry->bytes != bytes) {
5550                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5551                         bytes, entry->bytes, offset);
5552                 return -EINVAL;
5553         }
5554
5555         unlink_free_space(cache->free_space_ctl, entry);
5556         free(entry);
5557         return 0;
5558 }
5559
5560 static int verify_space_cache(struct btrfs_root *root,
5561                               struct btrfs_block_group_cache *cache)
5562 {
5563         struct btrfs_path *path;
5564         struct extent_buffer *leaf;
5565         struct btrfs_key key;
5566         u64 last;
5567         int ret = 0;
5568
5569         path = btrfs_alloc_path();
5570         if (!path)
5571                 return -ENOMEM;
5572
5573         root = root->fs_info->extent_root;
5574
5575         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5576
5577         key.objectid = last;
5578         key.offset = 0;
5579         key.type = BTRFS_EXTENT_ITEM_KEY;
5580
5581         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5582         if (ret < 0)
5583                 goto out;
5584         ret = 0;
5585         while (1) {
5586                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5587                         ret = btrfs_next_leaf(root, path);
5588                         if (ret < 0)
5589                                 goto out;
5590                         if (ret > 0) {
5591                                 ret = 0;
5592                                 break;
5593                         }
5594                 }
5595                 leaf = path->nodes[0];
5596                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5597                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5598                         break;
5599                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5600                     key.type != BTRFS_METADATA_ITEM_KEY) {
5601                         path->slots[0]++;
5602                         continue;
5603                 }
5604
5605                 if (last == key.objectid) {
5606                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5607                                 last = key.objectid + key.offset;
5608                         else
5609                                 last = key.objectid + root->nodesize;
5610                         path->slots[0]++;
5611                         continue;
5612                 }
5613
5614                 ret = check_cache_range(root, cache, last,
5615                                         key.objectid - last);
5616                 if (ret)
5617                         break;
5618                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5619                         last = key.objectid + key.offset;
5620                 else
5621                         last = key.objectid + root->nodesize;
5622                 path->slots[0]++;
5623         }
5624
5625         if (last < cache->key.objectid + cache->key.offset)
5626                 ret = check_cache_range(root, cache, last,
5627                                         cache->key.objectid +
5628                                         cache->key.offset - last);
5629
5630 out:
5631         btrfs_free_path(path);
5632
5633         if (!ret &&
5634             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5635                 fprintf(stderr, "There are still entries left in the space "
5636                         "cache\n");
5637                 ret = -EINVAL;
5638         }
5639
5640         return ret;
5641 }
5642
5643 static int check_space_cache(struct btrfs_root *root)
5644 {
5645         struct btrfs_block_group_cache *cache;
5646         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5647         int ret;
5648         int error = 0;
5649
5650         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5651             btrfs_super_generation(root->fs_info->super_copy) !=
5652             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5653                 printf("cache and super generation don't match, space cache "
5654                        "will be invalidated\n");
5655                 return 0;
5656         }
5657
5658         if (ctx.progress_enabled) {
5659                 ctx.tp = TASK_FREE_SPACE;
5660                 task_start(ctx.info);
5661         }
5662
5663         while (1) {
5664                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5665                 if (!cache)
5666                         break;
5667
5668                 start = cache->key.objectid + cache->key.offset;
5669                 if (!cache->free_space_ctl) {
5670                         if (btrfs_init_free_space_ctl(cache,
5671                                                       root->sectorsize)) {
5672                                 ret = -ENOMEM;
5673                                 break;
5674                         }
5675                 } else {
5676                         btrfs_remove_free_space_cache(cache);
5677                 }
5678
5679                 if (btrfs_fs_compat_ro(root->fs_info,
5680                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5681                         ret = exclude_super_stripes(root, cache);
5682                         if (ret) {
5683                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5684                                         strerror(-ret));
5685                                 error++;
5686                                 continue;
5687                         }
5688                         ret = load_free_space_tree(root->fs_info, cache);
5689                         free_excluded_extents(root, cache);
5690                         if (ret < 0) {
5691                                 fprintf(stderr, "could not load free space tree: %s\n",
5692                                         strerror(-ret));
5693                                 error++;
5694                                 continue;
5695                         }
5696                         error += ret;
5697                 } else {
5698                         ret = load_free_space_cache(root->fs_info, cache);
5699                         if (!ret)
5700                                 continue;
5701                 }
5702
5703                 ret = verify_space_cache(root, cache);
5704                 if (ret) {
5705                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5706                                 cache->key.objectid);
5707                         error++;
5708                 }
5709         }
5710
5711         task_stop(ctx.info);
5712
5713         return error ? -EINVAL : 0;
5714 }
5715
5716 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5717                         u64 num_bytes, unsigned long leaf_offset,
5718                         struct extent_buffer *eb) {
5719
5720         u64 offset = 0;
5721         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5722         char *data;
5723         unsigned long csum_offset;
5724         u32 csum;
5725         u32 csum_expected;
5726         u64 read_len;
5727         u64 data_checked = 0;
5728         u64 tmp;
5729         int ret = 0;
5730         int mirror;
5731         int num_copies;
5732
5733         if (num_bytes % root->sectorsize)
5734                 return -EINVAL;
5735
5736         data = malloc(num_bytes);
5737         if (!data)
5738                 return -ENOMEM;
5739
5740         while (offset < num_bytes) {
5741                 mirror = 0;
5742 again:
5743                 read_len = num_bytes - offset;
5744                 /* read as much space once a time */
5745                 ret = read_extent_data(root, data + offset,
5746                                 bytenr + offset, &read_len, mirror);
5747                 if (ret)
5748                         goto out;
5749                 data_checked = 0;
5750                 /* verify every 4k data's checksum */
5751                 while (data_checked < read_len) {
5752                         csum = ~(u32)0;
5753                         tmp = offset + data_checked;
5754
5755                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5756                                                csum, root->sectorsize);
5757                         btrfs_csum_final(csum, (char *)&csum);
5758
5759                         csum_offset = leaf_offset +
5760                                  tmp / root->sectorsize * csum_size;
5761                         read_extent_buffer(eb, (char *)&csum_expected,
5762                                            csum_offset, csum_size);
5763                         /* try another mirror */
5764                         if (csum != csum_expected) {
5765                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5766                                                 mirror, bytenr + tmp,
5767                                                 csum, csum_expected);
5768                                 num_copies = btrfs_num_copies(
5769                                                 &root->fs_info->mapping_tree,
5770                                                 bytenr, num_bytes);
5771                                 if (mirror < num_copies - 1) {
5772                                         mirror += 1;
5773                                         goto again;
5774                                 }
5775                         }
5776                         data_checked += root->sectorsize;
5777                 }
5778                 offset += read_len;
5779         }
5780 out:
5781         free(data);
5782         return ret;
5783 }
5784
5785 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5786                                u64 num_bytes)
5787 {
5788         struct btrfs_path *path;
5789         struct extent_buffer *leaf;
5790         struct btrfs_key key;
5791         int ret;
5792
5793         path = btrfs_alloc_path();
5794         if (!path) {
5795                 fprintf(stderr, "Error allocating path\n");
5796                 return -ENOMEM;
5797         }
5798
5799         key.objectid = bytenr;
5800         key.type = BTRFS_EXTENT_ITEM_KEY;
5801         key.offset = (u64)-1;
5802
5803 again:
5804         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5805                                 0, 0);
5806         if (ret < 0) {
5807                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5808                 btrfs_free_path(path);
5809                 return ret;
5810         } else if (ret) {
5811                 if (path->slots[0] > 0) {
5812                         path->slots[0]--;
5813                 } else {
5814                         ret = btrfs_prev_leaf(root, path);
5815                         if (ret < 0) {
5816                                 goto out;
5817                         } else if (ret > 0) {
5818                                 ret = 0;
5819                                 goto out;
5820                         }
5821                 }
5822         }
5823
5824         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5825
5826         /*
5827          * Block group items come before extent items if they have the same
5828          * bytenr, so walk back one more just in case.  Dear future traveller,
5829          * first congrats on mastering time travel.  Now if it's not too much
5830          * trouble could you go back to 2006 and tell Chris to make the
5831          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5832          * EXTENT_ITEM_KEY please?
5833          */
5834         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5835                 if (path->slots[0] > 0) {
5836                         path->slots[0]--;
5837                 } else {
5838                         ret = btrfs_prev_leaf(root, path);
5839                         if (ret < 0) {
5840                                 goto out;
5841                         } else if (ret > 0) {
5842                                 ret = 0;
5843                                 goto out;
5844                         }
5845                 }
5846                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5847         }
5848
5849         while (num_bytes) {
5850                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5851                         ret = btrfs_next_leaf(root, path);
5852                         if (ret < 0) {
5853                                 fprintf(stderr, "Error going to next leaf "
5854                                         "%d\n", ret);
5855                                 btrfs_free_path(path);
5856                                 return ret;
5857                         } else if (ret) {
5858                                 break;
5859                         }
5860                 }
5861                 leaf = path->nodes[0];
5862                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5863                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5864                         path->slots[0]++;
5865                         continue;
5866                 }
5867                 if (key.objectid + key.offset < bytenr) {
5868                         path->slots[0]++;
5869                         continue;
5870                 }
5871                 if (key.objectid > bytenr + num_bytes)
5872                         break;
5873
5874                 if (key.objectid == bytenr) {
5875                         if (key.offset >= num_bytes) {
5876                                 num_bytes = 0;
5877                                 break;
5878                         }
5879                         num_bytes -= key.offset;
5880                         bytenr += key.offset;
5881                 } else if (key.objectid < bytenr) {
5882                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5883                                 num_bytes = 0;
5884                                 break;
5885                         }
5886                         num_bytes = (bytenr + num_bytes) -
5887                                 (key.objectid + key.offset);
5888                         bytenr = key.objectid + key.offset;
5889                 } else {
5890                         if (key.objectid + key.offset < bytenr + num_bytes) {
5891                                 u64 new_start = key.objectid + key.offset;
5892                                 u64 new_bytes = bytenr + num_bytes - new_start;
5893
5894                                 /*
5895                                  * Weird case, the extent is in the middle of
5896                                  * our range, we'll have to search one side
5897                                  * and then the other.  Not sure if this happens
5898                                  * in real life, but no harm in coding it up
5899                                  * anyway just in case.
5900                                  */
5901                                 btrfs_release_path(path);
5902                                 ret = check_extent_exists(root, new_start,
5903                                                           new_bytes);
5904                                 if (ret) {
5905                                         fprintf(stderr, "Right section didn't "
5906                                                 "have a record\n");
5907                                         break;
5908                                 }
5909                                 num_bytes = key.objectid - bytenr;
5910                                 goto again;
5911                         }
5912                         num_bytes = key.objectid - bytenr;
5913                 }
5914                 path->slots[0]++;
5915         }
5916         ret = 0;
5917
5918 out:
5919         if (num_bytes && !ret) {
5920                 fprintf(stderr, "There are no extents for csum range "
5921                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5922                 ret = 1;
5923         }
5924
5925         btrfs_free_path(path);
5926         return ret;
5927 }
5928
5929 static int check_csums(struct btrfs_root *root)
5930 {
5931         struct btrfs_path *path;
5932         struct extent_buffer *leaf;
5933         struct btrfs_key key;
5934         u64 offset = 0, num_bytes = 0;
5935         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5936         int errors = 0;
5937         int ret;
5938         u64 data_len;
5939         unsigned long leaf_offset;
5940
5941         root = root->fs_info->csum_root;
5942         if (!extent_buffer_uptodate(root->node)) {
5943                 fprintf(stderr, "No valid csum tree found\n");
5944                 return -ENOENT;
5945         }
5946
5947         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5948         key.type = BTRFS_EXTENT_CSUM_KEY;
5949         key.offset = 0;
5950
5951         path = btrfs_alloc_path();
5952         if (!path)
5953                 return -ENOMEM;
5954
5955         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5956         if (ret < 0) {
5957                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5958                 btrfs_free_path(path);
5959                 return ret;
5960         }
5961
5962         if (ret > 0 && path->slots[0])
5963                 path->slots[0]--;
5964         ret = 0;
5965
5966         while (1) {
5967                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5968                         ret = btrfs_next_leaf(root, path);
5969                         if (ret < 0) {
5970                                 fprintf(stderr, "Error going to next leaf "
5971                                         "%d\n", ret);
5972                                 break;
5973                         }
5974                         if (ret)
5975                                 break;
5976                 }
5977                 leaf = path->nodes[0];
5978
5979                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5980                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5981                         path->slots[0]++;
5982                         continue;
5983                 }
5984
5985                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5986                               csum_size) * root->sectorsize;
5987                 if (!check_data_csum)
5988                         goto skip_csum_check;
5989                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5990                 ret = check_extent_csums(root, key.offset, data_len,
5991                                          leaf_offset, leaf);
5992                 if (ret)
5993                         break;
5994 skip_csum_check:
5995                 if (!num_bytes) {
5996                         offset = key.offset;
5997                 } else if (key.offset != offset + num_bytes) {
5998                         ret = check_extent_exists(root, offset, num_bytes);
5999                         if (ret) {
6000                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6001                                         "there is no extent record\n",
6002                                         offset, offset+num_bytes);
6003                                 errors++;
6004                         }
6005                         offset = key.offset;
6006                         num_bytes = 0;
6007                 }
6008                 num_bytes += data_len;
6009                 path->slots[0]++;
6010         }
6011
6012         btrfs_free_path(path);
6013         return errors;
6014 }
6015
6016 static int is_dropped_key(struct btrfs_key *key,
6017                           struct btrfs_key *drop_key) {
6018         if (key->objectid < drop_key->objectid)
6019                 return 1;
6020         else if (key->objectid == drop_key->objectid) {
6021                 if (key->type < drop_key->type)
6022                         return 1;
6023                 else if (key->type == drop_key->type) {
6024                         if (key->offset < drop_key->offset)
6025                                 return 1;
6026                 }
6027         }
6028         return 0;
6029 }
6030
6031 /*
6032  * Here are the rules for FULL_BACKREF.
6033  *
6034  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6035  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6036  *      FULL_BACKREF set.
6037  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6038  *    if it happened after the relocation occurred since we'll have dropped the
6039  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6040  *    have no real way to know for sure.
6041  *
6042  * We process the blocks one root at a time, and we start from the lowest root
6043  * objectid and go to the highest.  So we can just lookup the owner backref for
6044  * the record and if we don't find it then we know it doesn't exist and we have
6045  * a FULL BACKREF.
6046  *
6047  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6048  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6049  * be set or not and then we can check later once we've gathered all the refs.
6050  */
6051 static int calc_extent_flag(struct btrfs_root *root,
6052                            struct cache_tree *extent_cache,
6053                            struct extent_buffer *buf,
6054                            struct root_item_record *ri,
6055                            u64 *flags)
6056 {
6057         struct extent_record *rec;
6058         struct cache_extent *cache;
6059         struct tree_backref *tback;
6060         u64 owner = 0;
6061
6062         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6063         /* we have added this extent before */
6064         BUG_ON(!cache);
6065         rec = container_of(cache, struct extent_record, cache);
6066
6067         /*
6068          * Except file/reloc tree, we can not have
6069          * FULL BACKREF MODE
6070          */
6071         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6072                 goto normal;
6073         /*
6074          * root node
6075          */
6076         if (buf->start == ri->bytenr)
6077                 goto normal;
6078
6079         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6080                 goto full_backref;
6081
6082         owner = btrfs_header_owner(buf);
6083         if (owner == ri->objectid)
6084                 goto normal;
6085
6086         tback = find_tree_backref(rec, 0, owner);
6087         if (!tback)
6088                 goto full_backref;
6089 normal:
6090         *flags = 0;
6091         if (rec->flag_block_full_backref != FLAG_UNSET &&
6092             rec->flag_block_full_backref != 0)
6093                 rec->bad_full_backref = 1;
6094         return 0;
6095 full_backref:
6096         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6097         if (rec->flag_block_full_backref != FLAG_UNSET &&
6098             rec->flag_block_full_backref != 1)
6099                 rec->bad_full_backref = 1;
6100         return 0;
6101 }
6102
6103 static int run_next_block(struct btrfs_root *root,
6104                           struct block_info *bits,
6105                           int bits_nr,
6106                           u64 *last,
6107                           struct cache_tree *pending,
6108                           struct cache_tree *seen,
6109                           struct cache_tree *reada,
6110                           struct cache_tree *nodes,
6111                           struct cache_tree *extent_cache,
6112                           struct cache_tree *chunk_cache,
6113                           struct rb_root *dev_cache,
6114                           struct block_group_tree *block_group_cache,
6115                           struct device_extent_tree *dev_extent_cache,
6116                           struct root_item_record *ri)
6117 {
6118         struct extent_buffer *buf;
6119         struct extent_record *rec = NULL;
6120         u64 bytenr;
6121         u32 size;
6122         u64 parent;
6123         u64 owner;
6124         u64 flags;
6125         u64 ptr;
6126         u64 gen = 0;
6127         int ret = 0;
6128         int i;
6129         int nritems;
6130         struct btrfs_key key;
6131         struct cache_extent *cache;
6132         int reada_bits;
6133
6134         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6135                                     bits_nr, &reada_bits);
6136         if (nritems == 0)
6137                 return 1;
6138
6139         if (!reada_bits) {
6140                 for(i = 0; i < nritems; i++) {
6141                         ret = add_cache_extent(reada, bits[i].start,
6142                                                bits[i].size);
6143                         if (ret == -EEXIST)
6144                                 continue;
6145
6146                         /* fixme, get the parent transid */
6147                         readahead_tree_block(root, bits[i].start,
6148                                              bits[i].size, 0);
6149                 }
6150         }
6151         *last = bits[0].start;
6152         bytenr = bits[0].start;
6153         size = bits[0].size;
6154
6155         cache = lookup_cache_extent(pending, bytenr, size);
6156         if (cache) {
6157                 remove_cache_extent(pending, cache);
6158                 free(cache);
6159         }
6160         cache = lookup_cache_extent(reada, bytenr, size);
6161         if (cache) {
6162                 remove_cache_extent(reada, cache);
6163                 free(cache);
6164         }
6165         cache = lookup_cache_extent(nodes, bytenr, size);
6166         if (cache) {
6167                 remove_cache_extent(nodes, cache);
6168                 free(cache);
6169         }
6170         cache = lookup_cache_extent(extent_cache, bytenr, size);
6171         if (cache) {
6172                 rec = container_of(cache, struct extent_record, cache);
6173                 gen = rec->parent_generation;
6174         }
6175
6176         /* fixme, get the real parent transid */
6177         buf = read_tree_block(root, bytenr, size, gen);
6178         if (!extent_buffer_uptodate(buf)) {
6179                 record_bad_block_io(root->fs_info,
6180                                     extent_cache, bytenr, size);
6181                 goto out;
6182         }
6183
6184         nritems = btrfs_header_nritems(buf);
6185
6186         flags = 0;
6187         if (!init_extent_tree) {
6188                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6189                                        btrfs_header_level(buf), 1, NULL,
6190                                        &flags);
6191                 if (ret < 0) {
6192                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6193                         if (ret < 0) {
6194                                 fprintf(stderr, "Couldn't calc extent flags\n");
6195                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6196                         }
6197                 }
6198         } else {
6199                 flags = 0;
6200                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6201                 if (ret < 0) {
6202                         fprintf(stderr, "Couldn't calc extent flags\n");
6203                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6204                 }
6205         }
6206
6207         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6208                 if (ri != NULL &&
6209                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6210                     ri->objectid == btrfs_header_owner(buf)) {
6211                         /*
6212                          * Ok we got to this block from it's original owner and
6213                          * we have FULL_BACKREF set.  Relocation can leave
6214                          * converted blocks over so this is altogether possible,
6215                          * however it's not possible if the generation > the
6216                          * last snapshot, so check for this case.
6217                          */
6218                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6219                             btrfs_header_generation(buf) > ri->last_snapshot) {
6220                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6221                                 rec->bad_full_backref = 1;
6222                         }
6223                 }
6224         } else {
6225                 if (ri != NULL &&
6226                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6227                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6228                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6229                         rec->bad_full_backref = 1;
6230                 }
6231         }
6232
6233         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6234                 rec->flag_block_full_backref = 1;
6235                 parent = bytenr;
6236                 owner = 0;
6237         } else {
6238                 rec->flag_block_full_backref = 0;
6239                 parent = 0;
6240                 owner = btrfs_header_owner(buf);
6241         }
6242
6243         ret = check_block(root, extent_cache, buf, flags);
6244         if (ret)
6245                 goto out;
6246
6247         if (btrfs_is_leaf(buf)) {
6248                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6249                 for (i = 0; i < nritems; i++) {
6250                         struct btrfs_file_extent_item *fi;
6251                         btrfs_item_key_to_cpu(buf, &key, i);
6252                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6253                                 process_extent_item(root, extent_cache, buf,
6254                                                     i);
6255                                 continue;
6256                         }
6257                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6258                                 process_extent_item(root, extent_cache, buf,
6259                                                     i);
6260                                 continue;
6261                         }
6262                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6263                                 total_csum_bytes +=
6264                                         btrfs_item_size_nr(buf, i);
6265                                 continue;
6266                         }
6267                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6268                                 process_chunk_item(chunk_cache, &key, buf, i);
6269                                 continue;
6270                         }
6271                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6272                                 process_device_item(dev_cache, &key, buf, i);
6273                                 continue;
6274                         }
6275                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6276                                 process_block_group_item(block_group_cache,
6277                                         &key, buf, i);
6278                                 continue;
6279                         }
6280                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6281                                 process_device_extent_item(dev_extent_cache,
6282                                         &key, buf, i);
6283                                 continue;
6284
6285                         }
6286                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6287 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6288                                 process_extent_ref_v0(extent_cache, buf, i);
6289 #else
6290                                 BUG();
6291 #endif
6292                                 continue;
6293                         }
6294
6295                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6296                                 add_tree_backref(extent_cache, key.objectid, 0,
6297                                                  key.offset, 0);
6298                                 continue;
6299                         }
6300                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6301                                 add_tree_backref(extent_cache, key.objectid,
6302                                                  key.offset, 0, 0);
6303                                 continue;
6304                         }
6305                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6306                                 struct btrfs_extent_data_ref *ref;
6307                                 ref = btrfs_item_ptr(buf, i,
6308                                                 struct btrfs_extent_data_ref);
6309                                 add_data_backref(extent_cache,
6310                                         key.objectid, 0,
6311                                         btrfs_extent_data_ref_root(buf, ref),
6312                                         btrfs_extent_data_ref_objectid(buf,
6313                                                                        ref),
6314                                         btrfs_extent_data_ref_offset(buf, ref),
6315                                         btrfs_extent_data_ref_count(buf, ref),
6316                                         0, root->sectorsize);
6317                                 continue;
6318                         }
6319                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6320                                 struct btrfs_shared_data_ref *ref;
6321                                 ref = btrfs_item_ptr(buf, i,
6322                                                 struct btrfs_shared_data_ref);
6323                                 add_data_backref(extent_cache,
6324                                         key.objectid, key.offset, 0, 0, 0,
6325                                         btrfs_shared_data_ref_count(buf, ref),
6326                                         0, root->sectorsize);
6327                                 continue;
6328                         }
6329                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6330                                 struct bad_item *bad;
6331
6332                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6333                                         continue;
6334                                 if (!owner)
6335                                         continue;
6336                                 bad = malloc(sizeof(struct bad_item));
6337                                 if (!bad)
6338                                         continue;
6339                                 INIT_LIST_HEAD(&bad->list);
6340                                 memcpy(&bad->key, &key,
6341                                        sizeof(struct btrfs_key));
6342                                 bad->root_id = owner;
6343                                 list_add_tail(&bad->list, &delete_items);
6344                                 continue;
6345                         }
6346                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6347                                 continue;
6348                         fi = btrfs_item_ptr(buf, i,
6349                                             struct btrfs_file_extent_item);
6350                         if (btrfs_file_extent_type(buf, fi) ==
6351                             BTRFS_FILE_EXTENT_INLINE)
6352                                 continue;
6353                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6354                                 continue;
6355
6356                         data_bytes_allocated +=
6357                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6358                         if (data_bytes_allocated < root->sectorsize) {
6359                                 abort();
6360                         }
6361                         data_bytes_referenced +=
6362                                 btrfs_file_extent_num_bytes(buf, fi);
6363                         add_data_backref(extent_cache,
6364                                 btrfs_file_extent_disk_bytenr(buf, fi),
6365                                 parent, owner, key.objectid, key.offset -
6366                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6367                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6368                 }
6369         } else {
6370                 int level;
6371                 struct btrfs_key first_key;
6372
6373                 first_key.objectid = 0;
6374
6375                 if (nritems > 0)
6376                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6377                 level = btrfs_header_level(buf);
6378                 for (i = 0; i < nritems; i++) {
6379                         struct extent_record tmpl;
6380
6381                         ptr = btrfs_node_blockptr(buf, i);
6382                         size = root->nodesize;
6383                         btrfs_node_key_to_cpu(buf, &key, i);
6384                         if (ri != NULL) {
6385                                 if ((level == ri->drop_level)
6386                                     && is_dropped_key(&key, &ri->drop_key)) {
6387                                         continue;
6388                                 }
6389                         }
6390
6391                         memset(&tmpl, 0, sizeof(tmpl));
6392                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6393                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6394                         tmpl.start = ptr;
6395                         tmpl.nr = size;
6396                         tmpl.refs = 1;
6397                         tmpl.metadata = 1;
6398                         tmpl.max_size = size;
6399                         ret = add_extent_rec(extent_cache, &tmpl);
6400                         BUG_ON(ret);
6401
6402                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6403
6404                         if (level > 1) {
6405                                 add_pending(nodes, seen, ptr, size);
6406                         } else {
6407                                 add_pending(pending, seen, ptr, size);
6408                         }
6409                 }
6410                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6411                                       nritems) * sizeof(struct btrfs_key_ptr);
6412         }
6413         total_btree_bytes += buf->len;
6414         if (fs_root_objectid(btrfs_header_owner(buf)))
6415                 total_fs_tree_bytes += buf->len;
6416         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6417                 total_extent_tree_bytes += buf->len;
6418         if (!found_old_backref &&
6419             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6420             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6421             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6422                 found_old_backref = 1;
6423 out:
6424         free_extent_buffer(buf);
6425         return ret;
6426 }
6427
6428 static int add_root_to_pending(struct extent_buffer *buf,
6429                                struct cache_tree *extent_cache,
6430                                struct cache_tree *pending,
6431                                struct cache_tree *seen,
6432                                struct cache_tree *nodes,
6433                                u64 objectid)
6434 {
6435         struct extent_record tmpl;
6436
6437         if (btrfs_header_level(buf) > 0)
6438                 add_pending(nodes, seen, buf->start, buf->len);
6439         else
6440                 add_pending(pending, seen, buf->start, buf->len);
6441
6442         memset(&tmpl, 0, sizeof(tmpl));
6443         tmpl.start = buf->start;
6444         tmpl.nr = buf->len;
6445         tmpl.is_root = 1;
6446         tmpl.refs = 1;
6447         tmpl.metadata = 1;
6448         tmpl.max_size = buf->len;
6449         add_extent_rec(extent_cache, &tmpl);
6450
6451         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6452             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6453                 add_tree_backref(extent_cache, buf->start, buf->start,
6454                                  0, 1);
6455         else
6456                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6457         return 0;
6458 }
6459
6460 /* as we fix the tree, we might be deleting blocks that
6461  * we're tracking for repair.  This hook makes sure we
6462  * remove any backrefs for blocks as we are fixing them.
6463  */
6464 static int free_extent_hook(struct btrfs_trans_handle *trans,
6465                             struct btrfs_root *root,
6466                             u64 bytenr, u64 num_bytes, u64 parent,
6467                             u64 root_objectid, u64 owner, u64 offset,
6468                             int refs_to_drop)
6469 {
6470         struct extent_record *rec;
6471         struct cache_extent *cache;
6472         int is_data;
6473         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6474
6475         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6476         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6477         if (!cache)
6478                 return 0;
6479
6480         rec = container_of(cache, struct extent_record, cache);
6481         if (is_data) {
6482                 struct data_backref *back;
6483                 back = find_data_backref(rec, parent, root_objectid, owner,
6484                                          offset, 1, bytenr, num_bytes);
6485                 if (!back)
6486                         goto out;
6487                 if (back->node.found_ref) {
6488                         back->found_ref -= refs_to_drop;
6489                         if (rec->refs)
6490                                 rec->refs -= refs_to_drop;
6491                 }
6492                 if (back->node.found_extent_tree) {
6493                         back->num_refs -= refs_to_drop;
6494                         if (rec->extent_item_refs)
6495                                 rec->extent_item_refs -= refs_to_drop;
6496                 }
6497                 if (back->found_ref == 0)
6498                         back->node.found_ref = 0;
6499                 if (back->num_refs == 0)
6500                         back->node.found_extent_tree = 0;
6501
6502                 if (!back->node.found_extent_tree && back->node.found_ref) {
6503                         rb_erase(&back->node.node, &rec->backref_tree);
6504                         free(back);
6505                 }
6506         } else {
6507                 struct tree_backref *back;
6508                 back = find_tree_backref(rec, parent, root_objectid);
6509                 if (!back)
6510                         goto out;
6511                 if (back->node.found_ref) {
6512                         if (rec->refs)
6513                                 rec->refs--;
6514                         back->node.found_ref = 0;
6515                 }
6516                 if (back->node.found_extent_tree) {
6517                         if (rec->extent_item_refs)
6518                                 rec->extent_item_refs--;
6519                         back->node.found_extent_tree = 0;
6520                 }
6521                 if (!back->node.found_extent_tree && back->node.found_ref) {
6522                         rb_erase(&back->node.node, &rec->backref_tree);
6523                         free(back);
6524                 }
6525         }
6526         maybe_free_extent_rec(extent_cache, rec);
6527 out:
6528         return 0;
6529 }
6530
6531 static int delete_extent_records(struct btrfs_trans_handle *trans,
6532                                  struct btrfs_root *root,
6533                                  struct btrfs_path *path,
6534                                  u64 bytenr, u64 new_len)
6535 {
6536         struct btrfs_key key;
6537         struct btrfs_key found_key;
6538         struct extent_buffer *leaf;
6539         int ret;
6540         int slot;
6541
6542
6543         key.objectid = bytenr;
6544         key.type = (u8)-1;
6545         key.offset = (u64)-1;
6546
6547         while(1) {
6548                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6549                                         &key, path, 0, 1);
6550                 if (ret < 0)
6551                         break;
6552
6553                 if (ret > 0) {
6554                         ret = 0;
6555                         if (path->slots[0] == 0)
6556                                 break;
6557                         path->slots[0]--;
6558                 }
6559                 ret = 0;
6560
6561                 leaf = path->nodes[0];
6562                 slot = path->slots[0];
6563
6564                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6565                 if (found_key.objectid != bytenr)
6566                         break;
6567
6568                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6569                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6570                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6571                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6572                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6573                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6574                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6575                         btrfs_release_path(path);
6576                         if (found_key.type == 0) {
6577                                 if (found_key.offset == 0)
6578                                         break;
6579                                 key.offset = found_key.offset - 1;
6580                                 key.type = found_key.type;
6581                         }
6582                         key.type = found_key.type - 1;
6583                         key.offset = (u64)-1;
6584                         continue;
6585                 }
6586
6587                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6588                         found_key.objectid, found_key.type, found_key.offset);
6589
6590                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6591                 if (ret)
6592                         break;
6593                 btrfs_release_path(path);
6594
6595                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6596                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6597                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6598                                 found_key.offset : root->nodesize;
6599
6600                         ret = btrfs_update_block_group(trans, root, bytenr,
6601                                                        bytes, 0, 0);
6602                         if (ret)
6603                                 break;
6604                 }
6605         }
6606
6607         btrfs_release_path(path);
6608         return ret;
6609 }
6610
6611 /*
6612  * for a single backref, this will allocate a new extent
6613  * and add the backref to it.
6614  */
6615 static int record_extent(struct btrfs_trans_handle *trans,
6616                          struct btrfs_fs_info *info,
6617                          struct btrfs_path *path,
6618                          struct extent_record *rec,
6619                          struct extent_backref *back,
6620                          int allocated, u64 flags)
6621 {
6622         int ret;
6623         struct btrfs_root *extent_root = info->extent_root;
6624         struct extent_buffer *leaf;
6625         struct btrfs_key ins_key;
6626         struct btrfs_extent_item *ei;
6627         struct tree_backref *tback;
6628         struct data_backref *dback;
6629         struct btrfs_tree_block_info *bi;
6630
6631         if (!back->is_data)
6632                 rec->max_size = max_t(u64, rec->max_size,
6633                                     info->extent_root->nodesize);
6634
6635         if (!allocated) {
6636                 u32 item_size = sizeof(*ei);
6637
6638                 if (!back->is_data)
6639                         item_size += sizeof(*bi);
6640
6641                 ins_key.objectid = rec->start;
6642                 ins_key.offset = rec->max_size;
6643                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6644
6645                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6646                                         &ins_key, item_size);
6647                 if (ret)
6648                         goto fail;
6649
6650                 leaf = path->nodes[0];
6651                 ei = btrfs_item_ptr(leaf, path->slots[0],
6652                                     struct btrfs_extent_item);
6653
6654                 btrfs_set_extent_refs(leaf, ei, 0);
6655                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6656
6657                 if (back->is_data) {
6658                         btrfs_set_extent_flags(leaf, ei,
6659                                                BTRFS_EXTENT_FLAG_DATA);
6660                 } else {
6661                         struct btrfs_disk_key copy_key;;
6662
6663                         tback = to_tree_backref(back);
6664                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6665                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6666                                              sizeof(*bi));
6667
6668                         btrfs_set_disk_key_objectid(&copy_key,
6669                                                     rec->info_objectid);
6670                         btrfs_set_disk_key_type(&copy_key, 0);
6671                         btrfs_set_disk_key_offset(&copy_key, 0);
6672
6673                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6674                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6675
6676                         btrfs_set_extent_flags(leaf, ei,
6677                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6678                 }
6679
6680                 btrfs_mark_buffer_dirty(leaf);
6681                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6682                                                rec->max_size, 1, 0);
6683                 if (ret)
6684                         goto fail;
6685                 btrfs_release_path(path);
6686         }
6687
6688         if (back->is_data) {
6689                 u64 parent;
6690                 int i;
6691
6692                 dback = to_data_backref(back);
6693                 if (back->full_backref)
6694                         parent = dback->parent;
6695                 else
6696                         parent = 0;
6697
6698                 for (i = 0; i < dback->found_ref; i++) {
6699                         /* if parent != 0, we're doing a full backref
6700                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6701                          * just makes the backref allocator create a data
6702                          * backref
6703                          */
6704                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6705                                                    rec->start, rec->max_size,
6706                                                    parent,
6707                                                    dback->root,
6708                                                    parent ?
6709                                                    BTRFS_FIRST_FREE_OBJECTID :
6710                                                    dback->owner,
6711                                                    dback->offset);
6712                         if (ret)
6713                                 break;
6714                 }
6715                 fprintf(stderr, "adding new data backref"
6716                                 " on %llu %s %llu owner %llu"
6717                                 " offset %llu found %d\n",
6718                                 (unsigned long long)rec->start,
6719                                 back->full_backref ?
6720                                 "parent" : "root",
6721                                 back->full_backref ?
6722                                 (unsigned long long)parent :
6723                                 (unsigned long long)dback->root,
6724                                 (unsigned long long)dback->owner,
6725                                 (unsigned long long)dback->offset,
6726                                 dback->found_ref);
6727         } else {
6728                 u64 parent;
6729
6730                 tback = to_tree_backref(back);
6731                 if (back->full_backref)
6732                         parent = tback->parent;
6733                 else
6734                         parent = 0;
6735
6736                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6737                                            rec->start, rec->max_size,
6738                                            parent, tback->root, 0, 0);
6739                 fprintf(stderr, "adding new tree backref on "
6740                         "start %llu len %llu parent %llu root %llu\n",
6741                         rec->start, rec->max_size, parent, tback->root);
6742         }
6743 fail:
6744         btrfs_release_path(path);
6745         return ret;
6746 }
6747
6748 static struct extent_entry *find_entry(struct list_head *entries,
6749                                        u64 bytenr, u64 bytes)
6750 {
6751         struct extent_entry *entry = NULL;
6752
6753         list_for_each_entry(entry, entries, list) {
6754                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6755                         return entry;
6756         }
6757
6758         return NULL;
6759 }
6760
6761 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6762 {
6763         struct extent_entry *entry, *best = NULL, *prev = NULL;
6764
6765         list_for_each_entry(entry, entries, list) {
6766                 if (!prev) {
6767                         prev = entry;
6768                         continue;
6769                 }
6770
6771                 /*
6772                  * If there are as many broken entries as entries then we know
6773                  * not to trust this particular entry.
6774                  */
6775                 if (entry->broken == entry->count)
6776                         continue;
6777
6778                 /*
6779                  * If our current entry == best then we can't be sure our best
6780                  * is really the best, so we need to keep searching.
6781                  */
6782                 if (best && best->count == entry->count) {
6783                         prev = entry;
6784                         best = NULL;
6785                         continue;
6786                 }
6787
6788                 /* Prev == entry, not good enough, have to keep searching */
6789                 if (!prev->broken && prev->count == entry->count)
6790                         continue;
6791
6792                 if (!best)
6793                         best = (prev->count > entry->count) ? prev : entry;
6794                 else if (best->count < entry->count)
6795                         best = entry;
6796                 prev = entry;
6797         }
6798
6799         return best;
6800 }
6801
6802 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6803                       struct data_backref *dback, struct extent_entry *entry)
6804 {
6805         struct btrfs_trans_handle *trans;
6806         struct btrfs_root *root;
6807         struct btrfs_file_extent_item *fi;
6808         struct extent_buffer *leaf;
6809         struct btrfs_key key;
6810         u64 bytenr, bytes;
6811         int ret, err;
6812
6813         key.objectid = dback->root;
6814         key.type = BTRFS_ROOT_ITEM_KEY;
6815         key.offset = (u64)-1;
6816         root = btrfs_read_fs_root(info, &key);
6817         if (IS_ERR(root)) {
6818                 fprintf(stderr, "Couldn't find root for our ref\n");
6819                 return -EINVAL;
6820         }
6821
6822         /*
6823          * The backref points to the original offset of the extent if it was
6824          * split, so we need to search down to the offset we have and then walk
6825          * forward until we find the backref we're looking for.
6826          */
6827         key.objectid = dback->owner;
6828         key.type = BTRFS_EXTENT_DATA_KEY;
6829         key.offset = dback->offset;
6830         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6831         if (ret < 0) {
6832                 fprintf(stderr, "Error looking up ref %d\n", ret);
6833                 return ret;
6834         }
6835
6836         while (1) {
6837                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6838                         ret = btrfs_next_leaf(root, path);
6839                         if (ret) {
6840                                 fprintf(stderr, "Couldn't find our ref, next\n");
6841                                 return -EINVAL;
6842                         }
6843                 }
6844                 leaf = path->nodes[0];
6845                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6846                 if (key.objectid != dback->owner ||
6847                     key.type != BTRFS_EXTENT_DATA_KEY) {
6848                         fprintf(stderr, "Couldn't find our ref, search\n");
6849                         return -EINVAL;
6850                 }
6851                 fi = btrfs_item_ptr(leaf, path->slots[0],
6852                                     struct btrfs_file_extent_item);
6853                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6854                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6855
6856                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6857                         break;
6858                 path->slots[0]++;
6859         }
6860
6861         btrfs_release_path(path);
6862
6863         trans = btrfs_start_transaction(root, 1);
6864         if (IS_ERR(trans))
6865                 return PTR_ERR(trans);
6866
6867         /*
6868          * Ok we have the key of the file extent we want to fix, now we can cow
6869          * down to the thing and fix it.
6870          */
6871         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6872         if (ret < 0) {
6873                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6874                         key.objectid, key.type, key.offset, ret);
6875                 goto out;
6876         }
6877         if (ret > 0) {
6878                 fprintf(stderr, "Well that's odd, we just found this key "
6879                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6880                         key.offset);
6881                 ret = -EINVAL;
6882                 goto out;
6883         }
6884         leaf = path->nodes[0];
6885         fi = btrfs_item_ptr(leaf, path->slots[0],
6886                             struct btrfs_file_extent_item);
6887
6888         if (btrfs_file_extent_compression(leaf, fi) &&
6889             dback->disk_bytenr != entry->bytenr) {
6890                 fprintf(stderr, "Ref doesn't match the record start and is "
6891                         "compressed, please take a btrfs-image of this file "
6892                         "system and send it to a btrfs developer so they can "
6893                         "complete this functionality for bytenr %Lu\n",
6894                         dback->disk_bytenr);
6895                 ret = -EINVAL;
6896                 goto out;
6897         }
6898
6899         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6900                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6901         } else if (dback->disk_bytenr > entry->bytenr) {
6902                 u64 off_diff, offset;
6903
6904                 off_diff = dback->disk_bytenr - entry->bytenr;
6905                 offset = btrfs_file_extent_offset(leaf, fi);
6906                 if (dback->disk_bytenr + offset +
6907                     btrfs_file_extent_num_bytes(leaf, fi) >
6908                     entry->bytenr + entry->bytes) {
6909                         fprintf(stderr, "Ref is past the entry end, please "
6910                                 "take a btrfs-image of this file system and "
6911                                 "send it to a btrfs developer, ref %Lu\n",
6912                                 dback->disk_bytenr);
6913                         ret = -EINVAL;
6914                         goto out;
6915                 }
6916                 offset += off_diff;
6917                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6918                 btrfs_set_file_extent_offset(leaf, fi, offset);
6919         } else if (dback->disk_bytenr < entry->bytenr) {
6920                 u64 offset;
6921
6922                 offset = btrfs_file_extent_offset(leaf, fi);
6923                 if (dback->disk_bytenr + offset < entry->bytenr) {
6924                         fprintf(stderr, "Ref is before the entry start, please"
6925                                 " take a btrfs-image of this file system and "
6926                                 "send it to a btrfs developer, ref %Lu\n",
6927                                 dback->disk_bytenr);
6928                         ret = -EINVAL;
6929                         goto out;
6930                 }
6931
6932                 offset += dback->disk_bytenr;
6933                 offset -= entry->bytenr;
6934                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6935                 btrfs_set_file_extent_offset(leaf, fi, offset);
6936         }
6937
6938         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6939
6940         /*
6941          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6942          * only do this if we aren't using compression, otherwise it's a
6943          * trickier case.
6944          */
6945         if (!btrfs_file_extent_compression(leaf, fi))
6946                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6947         else
6948                 printf("ram bytes may be wrong?\n");
6949         btrfs_mark_buffer_dirty(leaf);
6950 out:
6951         err = btrfs_commit_transaction(trans, root);
6952         btrfs_release_path(path);
6953         return ret ? ret : err;
6954 }
6955
6956 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6957                            struct extent_record *rec)
6958 {
6959         struct extent_backref *back, *tmp;
6960         struct data_backref *dback;
6961         struct extent_entry *entry, *best = NULL;
6962         LIST_HEAD(entries);
6963         int nr_entries = 0;
6964         int broken_entries = 0;
6965         int ret = 0;
6966         short mismatch = 0;
6967
6968         /*
6969          * Metadata is easy and the backrefs should always agree on bytenr and
6970          * size, if not we've got bigger issues.
6971          */
6972         if (rec->metadata)
6973                 return 0;
6974
6975         rbtree_postorder_for_each_entry_safe(back, tmp,
6976                                              &rec->backref_tree, node) {
6977                 if (back->full_backref || !back->is_data)
6978                         continue;
6979
6980                 dback = to_data_backref(back);
6981
6982                 /*
6983                  * We only pay attention to backrefs that we found a real
6984                  * backref for.
6985                  */
6986                 if (dback->found_ref == 0)
6987                         continue;
6988
6989                 /*
6990                  * For now we only catch when the bytes don't match, not the
6991                  * bytenr.  We can easily do this at the same time, but I want
6992                  * to have a fs image to test on before we just add repair
6993                  * functionality willy-nilly so we know we won't screw up the
6994                  * repair.
6995                  */
6996
6997                 entry = find_entry(&entries, dback->disk_bytenr,
6998                                    dback->bytes);
6999                 if (!entry) {
7000                         entry = malloc(sizeof(struct extent_entry));
7001                         if (!entry) {
7002                                 ret = -ENOMEM;
7003                                 goto out;
7004                         }
7005                         memset(entry, 0, sizeof(*entry));
7006                         entry->bytenr = dback->disk_bytenr;
7007                         entry->bytes = dback->bytes;
7008                         list_add_tail(&entry->list, &entries);
7009                         nr_entries++;
7010                 }
7011
7012                 /*
7013                  * If we only have on entry we may think the entries agree when
7014                  * in reality they don't so we have to do some extra checking.
7015                  */
7016                 if (dback->disk_bytenr != rec->start ||
7017                     dback->bytes != rec->nr || back->broken)
7018                         mismatch = 1;
7019
7020                 if (back->broken) {
7021                         entry->broken++;
7022                         broken_entries++;
7023                 }
7024
7025                 entry->count++;
7026         }
7027
7028         /* Yay all the backrefs agree, carry on good sir */
7029         if (nr_entries <= 1 && !mismatch)
7030                 goto out;
7031
7032         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7033                 "%Lu\n", rec->start);
7034
7035         /*
7036          * First we want to see if the backrefs can agree amongst themselves who
7037          * is right, so figure out which one of the entries has the highest
7038          * count.
7039          */
7040         best = find_most_right_entry(&entries);
7041
7042         /*
7043          * Ok so we may have an even split between what the backrefs think, so
7044          * this is where we use the extent ref to see what it thinks.
7045          */
7046         if (!best) {
7047                 entry = find_entry(&entries, rec->start, rec->nr);
7048                 if (!entry && (!broken_entries || !rec->found_rec)) {
7049                         fprintf(stderr, "Backrefs don't agree with each other "
7050                                 "and extent record doesn't agree with anybody,"
7051                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7052                                 rec->start, rec->nr);
7053                         ret = -EINVAL;
7054                         goto out;
7055                 } else if (!entry) {
7056                         /*
7057                          * Ok our backrefs were broken, we'll assume this is the
7058                          * correct value and add an entry for this range.
7059                          */
7060                         entry = malloc(sizeof(struct extent_entry));
7061                         if (!entry) {
7062                                 ret = -ENOMEM;
7063                                 goto out;
7064                         }
7065                         memset(entry, 0, sizeof(*entry));
7066                         entry->bytenr = rec->start;
7067                         entry->bytes = rec->nr;
7068                         list_add_tail(&entry->list, &entries);
7069                         nr_entries++;
7070                 }
7071                 entry->count++;
7072                 best = find_most_right_entry(&entries);
7073                 if (!best) {
7074                         fprintf(stderr, "Backrefs and extent record evenly "
7075                                 "split on who is right, this is going to "
7076                                 "require user input to fix bytenr %Lu bytes "
7077                                 "%Lu\n", rec->start, rec->nr);
7078                         ret = -EINVAL;
7079                         goto out;
7080                 }
7081         }
7082
7083         /*
7084          * I don't think this can happen currently as we'll abort() if we catch
7085          * this case higher up, but in case somebody removes that we still can't
7086          * deal with it properly here yet, so just bail out of that's the case.
7087          */
7088         if (best->bytenr != rec->start) {
7089                 fprintf(stderr, "Extent start and backref starts don't match, "
7090                         "please use btrfs-image on this file system and send "
7091                         "it to a btrfs developer so they can make fsck fix "
7092                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7093                         rec->start, rec->nr);
7094                 ret = -EINVAL;
7095                 goto out;
7096         }
7097
7098         /*
7099          * Ok great we all agreed on an extent record, let's go find the real
7100          * references and fix up the ones that don't match.
7101          */
7102         rbtree_postorder_for_each_entry_safe(back, tmp,
7103                                              &rec->backref_tree, node) {
7104                 if (back->full_backref || !back->is_data)
7105                         continue;
7106
7107                 dback = to_data_backref(back);
7108
7109                 /*
7110                  * Still ignoring backrefs that don't have a real ref attached
7111                  * to them.
7112                  */
7113                 if (dback->found_ref == 0)
7114                         continue;
7115
7116                 if (dback->bytes == best->bytes &&
7117                     dback->disk_bytenr == best->bytenr)
7118                         continue;
7119
7120                 ret = repair_ref(info, path, dback, best);
7121                 if (ret)
7122                         goto out;
7123         }
7124
7125         /*
7126          * Ok we messed with the actual refs, which means we need to drop our
7127          * entire cache and go back and rescan.  I know this is a huge pain and
7128          * adds a lot of extra work, but it's the only way to be safe.  Once all
7129          * the backrefs agree we may not need to do anything to the extent
7130          * record itself.
7131          */
7132         ret = -EAGAIN;
7133 out:
7134         while (!list_empty(&entries)) {
7135                 entry = list_entry(entries.next, struct extent_entry, list);
7136                 list_del_init(&entry->list);
7137                 free(entry);
7138         }
7139         return ret;
7140 }
7141
7142 static int process_duplicates(struct btrfs_root *root,
7143                               struct cache_tree *extent_cache,
7144                               struct extent_record *rec)
7145 {
7146         struct extent_record *good, *tmp;
7147         struct cache_extent *cache;
7148         int ret;
7149
7150         /*
7151          * If we found a extent record for this extent then return, or if we
7152          * have more than one duplicate we are likely going to need to delete
7153          * something.
7154          */
7155         if (rec->found_rec || rec->num_duplicates > 1)
7156                 return 0;
7157
7158         /* Shouldn't happen but just in case */
7159         BUG_ON(!rec->num_duplicates);
7160
7161         /*
7162          * So this happens if we end up with a backref that doesn't match the
7163          * actual extent entry.  So either the backref is bad or the extent
7164          * entry is bad.  Either way we want to have the extent_record actually
7165          * reflect what we found in the extent_tree, so we need to take the
7166          * duplicate out and use that as the extent_record since the only way we
7167          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7168          */
7169         remove_cache_extent(extent_cache, &rec->cache);
7170
7171         good = to_extent_record(rec->dups.next);
7172         list_del_init(&good->list);
7173         INIT_LIST_HEAD(&good->backrefs);
7174         INIT_LIST_HEAD(&good->dups);
7175         good->cache.start = good->start;
7176         good->cache.size = good->nr;
7177         good->content_checked = 0;
7178         good->owner_ref_checked = 0;
7179         good->num_duplicates = 0;
7180         good->refs = rec->refs;
7181         list_splice_init(&rec->backrefs, &good->backrefs);
7182         while (1) {
7183                 cache = lookup_cache_extent(extent_cache, good->start,
7184                                             good->nr);
7185                 if (!cache)
7186                         break;
7187                 tmp = container_of(cache, struct extent_record, cache);
7188
7189                 /*
7190                  * If we find another overlapping extent and it's found_rec is
7191                  * set then it's a duplicate and we need to try and delete
7192                  * something.
7193                  */
7194                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7195                         if (list_empty(&good->list))
7196                                 list_add_tail(&good->list,
7197                                               &duplicate_extents);
7198                         good->num_duplicates += tmp->num_duplicates + 1;
7199                         list_splice_init(&tmp->dups, &good->dups);
7200                         list_del_init(&tmp->list);
7201                         list_add_tail(&tmp->list, &good->dups);
7202                         remove_cache_extent(extent_cache, &tmp->cache);
7203                         continue;
7204                 }
7205
7206                 /*
7207                  * Ok we have another non extent item backed extent rec, so lets
7208                  * just add it to this extent and carry on like we did above.
7209                  */
7210                 good->refs += tmp->refs;
7211                 list_splice_init(&tmp->backrefs, &good->backrefs);
7212                 remove_cache_extent(extent_cache, &tmp->cache);
7213                 free(tmp);
7214         }
7215         ret = insert_cache_extent(extent_cache, &good->cache);
7216         BUG_ON(ret);
7217         free(rec);
7218         return good->num_duplicates ? 0 : 1;
7219 }
7220
7221 static int delete_duplicate_records(struct btrfs_root *root,
7222                                     struct extent_record *rec)
7223 {
7224         struct btrfs_trans_handle *trans;
7225         LIST_HEAD(delete_list);
7226         struct btrfs_path *path;
7227         struct extent_record *tmp, *good, *n;
7228         int nr_del = 0;
7229         int ret = 0, err;
7230         struct btrfs_key key;
7231
7232         path = btrfs_alloc_path();
7233         if (!path) {
7234                 ret = -ENOMEM;
7235                 goto out;
7236         }
7237
7238         good = rec;
7239         /* Find the record that covers all of the duplicates. */
7240         list_for_each_entry(tmp, &rec->dups, list) {
7241                 if (good->start < tmp->start)
7242                         continue;
7243                 if (good->nr > tmp->nr)
7244                         continue;
7245
7246                 if (tmp->start + tmp->nr < good->start + good->nr) {
7247                         fprintf(stderr, "Ok we have overlapping extents that "
7248                                 "aren't completely covered by each other, this "
7249                                 "is going to require more careful thought.  "
7250                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7251                                 tmp->start, tmp->nr, good->start, good->nr);
7252                         abort();
7253                 }
7254                 good = tmp;
7255         }
7256
7257         if (good != rec)
7258                 list_add_tail(&rec->list, &delete_list);
7259
7260         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7261                 if (tmp == good)
7262                         continue;
7263                 list_move_tail(&tmp->list, &delete_list);
7264         }
7265
7266         root = root->fs_info->extent_root;
7267         trans = btrfs_start_transaction(root, 1);
7268         if (IS_ERR(trans)) {
7269                 ret = PTR_ERR(trans);
7270                 goto out;
7271         }
7272
7273         list_for_each_entry(tmp, &delete_list, list) {
7274                 if (tmp->found_rec == 0)
7275                         continue;
7276                 key.objectid = tmp->start;
7277                 key.type = BTRFS_EXTENT_ITEM_KEY;
7278                 key.offset = tmp->nr;
7279
7280                 /* Shouldn't happen but just in case */
7281                 if (tmp->metadata) {
7282                         fprintf(stderr, "Well this shouldn't happen, extent "
7283                                 "record overlaps but is metadata? "
7284                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7285                         abort();
7286                 }
7287
7288                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7289                 if (ret) {
7290                         if (ret > 0)
7291                                 ret = -EINVAL;
7292                         break;
7293                 }
7294                 ret = btrfs_del_item(trans, root, path);
7295                 if (ret)
7296                         break;
7297                 btrfs_release_path(path);
7298                 nr_del++;
7299         }
7300         err = btrfs_commit_transaction(trans, root);
7301         if (err && !ret)
7302                 ret = err;
7303 out:
7304         while (!list_empty(&delete_list)) {
7305                 tmp = to_extent_record(delete_list.next);
7306                 list_del_init(&tmp->list);
7307                 if (tmp == rec)
7308                         continue;
7309                 free(tmp);
7310         }
7311
7312         while (!list_empty(&rec->dups)) {
7313                 tmp = to_extent_record(rec->dups.next);
7314                 list_del_init(&tmp->list);
7315                 free(tmp);
7316         }
7317
7318         btrfs_free_path(path);
7319
7320         if (!ret && !nr_del)
7321                 rec->num_duplicates = 0;
7322
7323         return ret ? ret : nr_del;
7324 }
7325
7326 static int find_possible_backrefs(struct btrfs_fs_info *info,
7327                                   struct btrfs_path *path,
7328                                   struct cache_tree *extent_cache,
7329                                   struct extent_record *rec)
7330 {
7331         struct btrfs_root *root;
7332         struct extent_backref *back, *tmp;
7333         struct data_backref *dback;
7334         struct cache_extent *cache;
7335         struct btrfs_file_extent_item *fi;
7336         struct btrfs_key key;
7337         u64 bytenr, bytes;
7338         int ret;
7339
7340         rbtree_postorder_for_each_entry_safe(back, tmp,
7341                                              &rec->backref_tree, node) {
7342                 /* Don't care about full backrefs (poor unloved backrefs) */
7343                 if (back->full_backref || !back->is_data)
7344                         continue;
7345
7346                 dback = to_data_backref(back);
7347
7348                 /* We found this one, we don't need to do a lookup */
7349                 if (dback->found_ref)
7350                         continue;
7351
7352                 key.objectid = dback->root;
7353                 key.type = BTRFS_ROOT_ITEM_KEY;
7354                 key.offset = (u64)-1;
7355
7356                 root = btrfs_read_fs_root(info, &key);
7357
7358                 /* No root, definitely a bad ref, skip */
7359                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7360                         continue;
7361                 /* Other err, exit */
7362                 if (IS_ERR(root))
7363                         return PTR_ERR(root);
7364
7365                 key.objectid = dback->owner;
7366                 key.type = BTRFS_EXTENT_DATA_KEY;
7367                 key.offset = dback->offset;
7368                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7369                 if (ret) {
7370                         btrfs_release_path(path);
7371                         if (ret < 0)
7372                                 return ret;
7373                         /* Didn't find it, we can carry on */
7374                         ret = 0;
7375                         continue;
7376                 }
7377
7378                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7379                                     struct btrfs_file_extent_item);
7380                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7381                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7382                 btrfs_release_path(path);
7383                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7384                 if (cache) {
7385                         struct extent_record *tmp;
7386                         tmp = container_of(cache, struct extent_record, cache);
7387
7388                         /*
7389                          * If we found an extent record for the bytenr for this
7390                          * particular backref then we can't add it to our
7391                          * current extent record.  We only want to add backrefs
7392                          * that don't have a corresponding extent item in the
7393                          * extent tree since they likely belong to this record
7394                          * and we need to fix it if it doesn't match bytenrs.
7395                          */
7396                         if  (tmp->found_rec)
7397                                 continue;
7398                 }
7399
7400                 dback->found_ref += 1;
7401                 dback->disk_bytenr = bytenr;
7402                 dback->bytes = bytes;
7403
7404                 /*
7405                  * Set this so the verify backref code knows not to trust the
7406                  * values in this backref.
7407                  */
7408                 back->broken = 1;
7409         }
7410
7411         return 0;
7412 }
7413
7414 /*
7415  * Record orphan data ref into corresponding root.
7416  *
7417  * Return 0 if the extent item contains data ref and recorded.
7418  * Return 1 if the extent item contains no useful data ref
7419  *   On that case, it may contains only shared_dataref or metadata backref
7420  *   or the file extent exists(this should be handled by the extent bytenr
7421  *   recovery routine)
7422  * Return <0 if something goes wrong.
7423  */
7424 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7425                                       struct extent_record *rec)
7426 {
7427         struct btrfs_key key;
7428         struct btrfs_root *dest_root;
7429         struct extent_backref *back, *tmp;
7430         struct data_backref *dback;
7431         struct orphan_data_extent *orphan;
7432         struct btrfs_path *path;
7433         int recorded_data_ref = 0;
7434         int ret = 0;
7435
7436         if (rec->metadata)
7437                 return 1;
7438         path = btrfs_alloc_path();
7439         if (!path)
7440                 return -ENOMEM;
7441         rbtree_postorder_for_each_entry_safe(back, tmp,
7442                                              &rec->backref_tree, node) {
7443                 if (back->full_backref || !back->is_data ||
7444                     !back->found_extent_tree)
7445                         continue;
7446                 dback = to_data_backref(back);
7447                 if (dback->found_ref)
7448                         continue;
7449                 key.objectid = dback->root;
7450                 key.type = BTRFS_ROOT_ITEM_KEY;
7451                 key.offset = (u64)-1;
7452
7453                 dest_root = btrfs_read_fs_root(fs_info, &key);
7454
7455                 /* For non-exist root we just skip it */
7456                 if (IS_ERR(dest_root) || !dest_root)
7457                         continue;
7458
7459                 key.objectid = dback->owner;
7460                 key.type = BTRFS_EXTENT_DATA_KEY;
7461                 key.offset = dback->offset;
7462
7463                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7464                 /*
7465                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7466                  * we need to record it for inode/file extent rebuild.
7467                  * For ret > 0, we record it only for file extent rebuild.
7468                  * For ret == 0, the file extent exists but only bytenr
7469                  * mismatch, let the original bytenr fix routine to handle,
7470                  * don't record it.
7471                  */
7472                 if (ret == 0)
7473                         continue;
7474                 ret = 0;
7475                 orphan = malloc(sizeof(*orphan));
7476                 if (!orphan) {
7477                         ret = -ENOMEM;
7478                         goto out;
7479                 }
7480                 INIT_LIST_HEAD(&orphan->list);
7481                 orphan->root = dback->root;
7482                 orphan->objectid = dback->owner;
7483                 orphan->offset = dback->offset;
7484                 orphan->disk_bytenr = rec->cache.start;
7485                 orphan->disk_len = rec->cache.size;
7486                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7487                 recorded_data_ref = 1;
7488         }
7489 out:
7490         btrfs_free_path(path);
7491         if (!ret)
7492                 return !recorded_data_ref;
7493         else
7494                 return ret;
7495 }
7496
7497 /*
7498  * when an incorrect extent item is found, this will delete
7499  * all of the existing entries for it and recreate them
7500  * based on what the tree scan found.
7501  */
7502 static int fixup_extent_refs(struct btrfs_fs_info *info,
7503                              struct cache_tree *extent_cache,
7504                              struct extent_record *rec)
7505 {
7506         struct btrfs_trans_handle *trans = NULL;
7507         int ret;
7508         struct btrfs_path *path;
7509         struct cache_extent *cache;
7510         struct extent_backref *back, *tmp;
7511         int allocated = 0;
7512         u64 flags = 0;
7513
7514         if (rec->flag_block_full_backref)
7515                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7516
7517         path = btrfs_alloc_path();
7518         if (!path)
7519                 return -ENOMEM;
7520
7521         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7522                 /*
7523                  * Sometimes the backrefs themselves are so broken they don't
7524                  * get attached to any meaningful rec, so first go back and
7525                  * check any of our backrefs that we couldn't find and throw
7526                  * them into the list if we find the backref so that
7527                  * verify_backrefs can figure out what to do.
7528                  */
7529                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7530                 if (ret < 0)
7531                         goto out;
7532         }
7533
7534         /* step one, make sure all of the backrefs agree */
7535         ret = verify_backrefs(info, path, rec);
7536         if (ret < 0)
7537                 goto out;
7538
7539         trans = btrfs_start_transaction(info->extent_root, 1);
7540         if (IS_ERR(trans)) {
7541                 ret = PTR_ERR(trans);
7542                 goto out;
7543         }
7544
7545         /* step two, delete all the existing records */
7546         ret = delete_extent_records(trans, info->extent_root, path,
7547                                     rec->start, rec->max_size);
7548
7549         if (ret < 0)
7550                 goto out;
7551
7552         /* was this block corrupt?  If so, don't add references to it */
7553         cache = lookup_cache_extent(info->corrupt_blocks,
7554                                     rec->start, rec->max_size);
7555         if (cache) {
7556                 ret = 0;
7557                 goto out;
7558         }
7559
7560         /* step three, recreate all the refs we did find */
7561         rbtree_postorder_for_each_entry_safe(back, tmp,
7562                                              &rec->backref_tree, node) {
7563                 /*
7564                  * if we didn't find any references, don't create a
7565                  * new extent record
7566                  */
7567                 if (!back->found_ref)
7568                         continue;
7569
7570                 rec->bad_full_backref = 0;
7571                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7572                 allocated = 1;
7573
7574                 if (ret)
7575                         goto out;
7576         }
7577 out:
7578         if (trans) {
7579                 int err = btrfs_commit_transaction(trans, info->extent_root);
7580                 if (!ret)
7581                         ret = err;
7582         }
7583
7584         btrfs_free_path(path);
7585         return ret;
7586 }
7587
7588 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7589                               struct extent_record *rec)
7590 {
7591         struct btrfs_trans_handle *trans;
7592         struct btrfs_root *root = fs_info->extent_root;
7593         struct btrfs_path *path;
7594         struct btrfs_extent_item *ei;
7595         struct btrfs_key key;
7596         u64 flags;
7597         int ret = 0;
7598
7599         key.objectid = rec->start;
7600         if (rec->metadata) {
7601                 key.type = BTRFS_METADATA_ITEM_KEY;
7602                 key.offset = rec->info_level;
7603         } else {
7604                 key.type = BTRFS_EXTENT_ITEM_KEY;
7605                 key.offset = rec->max_size;
7606         }
7607
7608         path = btrfs_alloc_path();
7609         if (!path)
7610                 return -ENOMEM;
7611
7612         trans = btrfs_start_transaction(root, 0);
7613         if (IS_ERR(trans)) {
7614                 btrfs_free_path(path);
7615                 return PTR_ERR(trans);
7616         }
7617
7618         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7619         if (ret < 0) {
7620                 btrfs_free_path(path);
7621                 btrfs_commit_transaction(trans, root);
7622                 return ret;
7623         } else if (ret) {
7624                 fprintf(stderr, "Didn't find extent for %llu\n",
7625                         (unsigned long long)rec->start);
7626                 btrfs_free_path(path);
7627                 btrfs_commit_transaction(trans, root);
7628                 return -ENOENT;
7629         }
7630
7631         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7632                             struct btrfs_extent_item);
7633         flags = btrfs_extent_flags(path->nodes[0], ei);
7634         if (rec->flag_block_full_backref) {
7635                 fprintf(stderr, "setting full backref on %llu\n",
7636                         (unsigned long long)key.objectid);
7637                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7638         } else {
7639                 fprintf(stderr, "clearing full backref on %llu\n",
7640                         (unsigned long long)key.objectid);
7641                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7642         }
7643         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7644         btrfs_mark_buffer_dirty(path->nodes[0]);
7645         btrfs_free_path(path);
7646         return btrfs_commit_transaction(trans, root);
7647 }
7648
7649 /* right now we only prune from the extent allocation tree */
7650 static int prune_one_block(struct btrfs_trans_handle *trans,
7651                            struct btrfs_fs_info *info,
7652                            struct btrfs_corrupt_block *corrupt)
7653 {
7654         int ret;
7655         struct btrfs_path path;
7656         struct extent_buffer *eb;
7657         u64 found;
7658         int slot;
7659         int nritems;
7660         int level = corrupt->level + 1;
7661
7662         btrfs_init_path(&path);
7663 again:
7664         /* we want to stop at the parent to our busted block */
7665         path.lowest_level = level;
7666
7667         ret = btrfs_search_slot(trans, info->extent_root,
7668                                 &corrupt->key, &path, -1, 1);
7669
7670         if (ret < 0)
7671                 goto out;
7672
7673         eb = path.nodes[level];
7674         if (!eb) {
7675                 ret = -ENOENT;
7676                 goto out;
7677         }
7678
7679         /*
7680          * hopefully the search gave us the block we want to prune,
7681          * lets try that first
7682          */
7683         slot = path.slots[level];
7684         found =  btrfs_node_blockptr(eb, slot);
7685         if (found == corrupt->cache.start)
7686                 goto del_ptr;
7687
7688         nritems = btrfs_header_nritems(eb);
7689
7690         /* the search failed, lets scan this node and hope we find it */
7691         for (slot = 0; slot < nritems; slot++) {
7692                 found =  btrfs_node_blockptr(eb, slot);
7693                 if (found == corrupt->cache.start)
7694                         goto del_ptr;
7695         }
7696         /*
7697          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7698          * to this block
7699          */
7700         if (eb == info->extent_root->node) {
7701                 ret = -ENOENT;
7702                 goto out;
7703         } else {
7704                 level++;
7705                 btrfs_release_path(&path);
7706                 goto again;
7707         }
7708
7709 del_ptr:
7710         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7711         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7712
7713 out:
7714         btrfs_release_path(&path);
7715         return ret;
7716 }
7717
7718 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7719 {
7720         struct btrfs_trans_handle *trans = NULL;
7721         struct cache_extent *cache;
7722         struct btrfs_corrupt_block *corrupt;
7723
7724         while (1) {
7725                 cache = search_cache_extent(info->corrupt_blocks, 0);
7726                 if (!cache)
7727                         break;
7728                 if (!trans) {
7729                         trans = btrfs_start_transaction(info->extent_root, 1);
7730                         if (IS_ERR(trans))
7731                                 return PTR_ERR(trans);
7732                 }
7733                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7734                 prune_one_block(trans, info, corrupt);
7735                 remove_cache_extent(info->corrupt_blocks, cache);
7736         }
7737         if (trans)
7738                 return btrfs_commit_transaction(trans, info->extent_root);
7739         return 0;
7740 }
7741
7742 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7743 {
7744         struct btrfs_block_group_cache *cache;
7745         u64 start, end;
7746         int ret;
7747
7748         while (1) {
7749                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7750                                             &start, &end, EXTENT_DIRTY);
7751                 if (ret)
7752                         break;
7753                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7754                                    GFP_NOFS);
7755         }
7756
7757         start = 0;
7758         while (1) {
7759                 cache = btrfs_lookup_first_block_group(fs_info, start);
7760                 if (!cache)
7761                         break;
7762                 if (cache->cached)
7763                         cache->cached = 0;
7764                 start = cache->key.objectid + cache->key.offset;
7765         }
7766 }
7767
7768 static int check_extent_refs(struct btrfs_root *root,
7769                              struct cache_tree *extent_cache)
7770 {
7771         struct extent_record *rec;
7772         struct cache_extent *cache;
7773         int err = 0;
7774         int ret = 0;
7775         int fixed = 0;
7776         int had_dups = 0;
7777         int recorded = 0;
7778
7779         if (repair) {
7780                 /*
7781                  * if we're doing a repair, we have to make sure
7782                  * we don't allocate from the problem extents.
7783                  * In the worst case, this will be all the
7784                  * extents in the FS
7785                  */
7786                 cache = search_cache_extent(extent_cache, 0);
7787                 while(cache) {
7788                         rec = container_of(cache, struct extent_record, cache);
7789                         set_extent_dirty(root->fs_info->excluded_extents,
7790                                          rec->start,
7791                                          rec->start + rec->max_size - 1,
7792                                          GFP_NOFS);
7793                         cache = next_cache_extent(cache);
7794                 }
7795
7796                 /* pin down all the corrupted blocks too */
7797                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7798                 while(cache) {
7799                         set_extent_dirty(root->fs_info->excluded_extents,
7800                                          cache->start,
7801                                          cache->start + cache->size - 1,
7802                                          GFP_NOFS);
7803                         cache = next_cache_extent(cache);
7804                 }
7805                 prune_corrupt_blocks(root->fs_info);
7806                 reset_cached_block_groups(root->fs_info);
7807         }
7808
7809         reset_cached_block_groups(root->fs_info);
7810
7811         /*
7812          * We need to delete any duplicate entries we find first otherwise we
7813          * could mess up the extent tree when we have backrefs that actually
7814          * belong to a different extent item and not the weird duplicate one.
7815          */
7816         while (repair && !list_empty(&duplicate_extents)) {
7817                 rec = to_extent_record(duplicate_extents.next);
7818                 list_del_init(&rec->list);
7819
7820                 /* Sometimes we can find a backref before we find an actual
7821                  * extent, so we need to process it a little bit to see if there
7822                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7823                  * if this is a backref screwup.  If we need to delete stuff
7824                  * process_duplicates() will return 0, otherwise it will return
7825                  * 1 and we
7826                  */
7827                 if (process_duplicates(root, extent_cache, rec))
7828                         continue;
7829                 ret = delete_duplicate_records(root, rec);
7830                 if (ret < 0)
7831                         return ret;
7832                 /*
7833                  * delete_duplicate_records will return the number of entries
7834                  * deleted, so if it's greater than 0 then we know we actually
7835                  * did something and we need to remove.
7836                  */
7837                 if (ret)
7838                         had_dups = 1;
7839         }
7840
7841         if (had_dups)
7842                 return -EAGAIN;
7843
7844         while(1) {
7845                 int cur_err = 0;
7846
7847                 fixed = 0;
7848                 recorded = 0;
7849                 cache = search_cache_extent(extent_cache, 0);
7850                 if (!cache)
7851                         break;
7852                 rec = container_of(cache, struct extent_record, cache);
7853                 if (rec->num_duplicates) {
7854                         fprintf(stderr, "extent item %llu has multiple extent "
7855                                 "items\n", (unsigned long long)rec->start);
7856                         err = 1;
7857                         cur_err = 1;
7858                 }
7859
7860                 if (rec->refs != rec->extent_item_refs) {
7861                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7862                                 (unsigned long long)rec->start,
7863                                 (unsigned long long)rec->nr);
7864                         fprintf(stderr, "extent item %llu, found %llu\n",
7865                                 (unsigned long long)rec->extent_item_refs,
7866                                 (unsigned long long)rec->refs);
7867                         ret = record_orphan_data_extents(root->fs_info, rec);
7868                         if (ret < 0)
7869                                 goto repair_abort;
7870                         if (ret == 0) {
7871                                 recorded = 1;
7872                         } else {
7873                                 /*
7874                                  * we can't use the extent to repair file
7875                                  * extent, let the fallback method handle it.
7876                                  */
7877                                 if (!fixed && repair) {
7878                                         ret = fixup_extent_refs(
7879                                                         root->fs_info,
7880                                                         extent_cache, rec);
7881                                         if (ret)
7882                                                 goto repair_abort;
7883                                         fixed = 1;
7884                                 }
7885                         }
7886                         err = 1;
7887                         cur_err = 1;
7888                 }
7889                 if (all_backpointers_checked(rec, 1)) {
7890                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7891                                 (unsigned long long)rec->start,
7892                                 (unsigned long long)rec->nr);
7893
7894                         if (!fixed && !recorded && repair) {
7895                                 ret = fixup_extent_refs(root->fs_info,
7896                                                         extent_cache, rec);
7897                                 if (ret)
7898                                         goto repair_abort;
7899                                 fixed = 1;
7900                         }
7901                         cur_err = 1;
7902                         err = 1;
7903                 }
7904                 if (!rec->owner_ref_checked) {
7905                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7906                                 (unsigned long long)rec->start,
7907                                 (unsigned long long)rec->nr);
7908                         if (!fixed && !recorded && repair) {
7909                                 ret = fixup_extent_refs(root->fs_info,
7910                                                         extent_cache, rec);
7911                                 if (ret)
7912                                         goto repair_abort;
7913                                 fixed = 1;
7914                         }
7915                         err = 1;
7916                         cur_err = 1;
7917                 }
7918                 if (rec->bad_full_backref) {
7919                         fprintf(stderr, "bad full backref, on [%llu]\n",
7920                                 (unsigned long long)rec->start);
7921                         if (repair) {
7922                                 ret = fixup_extent_flags(root->fs_info, rec);
7923                                 if (ret)
7924                                         goto repair_abort;
7925                                 fixed = 1;
7926                         }
7927                         err = 1;
7928                         cur_err = 1;
7929                 }
7930                 /*
7931                  * Although it's not a extent ref's problem, we reuse this
7932                  * routine for error reporting.
7933                  * No repair function yet.
7934                  */
7935                 if (rec->crossing_stripes) {
7936                         fprintf(stderr,
7937                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7938                                 rec->start, rec->start + rec->max_size);
7939                         err = 1;
7940                         cur_err = 1;
7941                 }
7942
7943                 if (rec->wrong_chunk_type) {
7944                         fprintf(stderr,
7945                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7946                                 rec->start, rec->start + rec->max_size);
7947                         err = 1;
7948                         cur_err = 1;
7949                 }
7950
7951                 remove_cache_extent(extent_cache, cache);
7952                 free_all_extent_backrefs(rec);
7953                 if (!init_extent_tree && repair && (!cur_err || fixed))
7954                         clear_extent_dirty(root->fs_info->excluded_extents,
7955                                            rec->start,
7956                                            rec->start + rec->max_size - 1,
7957                                            GFP_NOFS);
7958                 free(rec);
7959         }
7960 repair_abort:
7961         if (repair) {
7962                 if (ret && ret != -EAGAIN) {
7963                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7964                         exit(1);
7965                 } else if (!ret) {
7966                         struct btrfs_trans_handle *trans;
7967
7968                         root = root->fs_info->extent_root;
7969                         trans = btrfs_start_transaction(root, 1);
7970                         if (IS_ERR(trans)) {
7971                                 ret = PTR_ERR(trans);
7972                                 goto repair_abort;
7973                         }
7974
7975                         btrfs_fix_block_accounting(trans, root);
7976                         ret = btrfs_commit_transaction(trans, root);
7977                         if (ret)
7978                                 goto repair_abort;
7979                 }
7980                 if (err)
7981                         fprintf(stderr, "repaired damaged extent references\n");
7982                 return ret;
7983         }
7984         return err;
7985 }
7986
7987 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7988 {
7989         u64 stripe_size;
7990
7991         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7992                 stripe_size = length;
7993                 stripe_size /= num_stripes;
7994         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7995                 stripe_size = length * 2;
7996                 stripe_size /= num_stripes;
7997         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7998                 stripe_size = length;
7999                 stripe_size /= (num_stripes - 1);
8000         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8001                 stripe_size = length;
8002                 stripe_size /= (num_stripes - 2);
8003         } else {
8004                 stripe_size = length;
8005         }
8006         return stripe_size;
8007 }
8008
8009 /*
8010  * Check the chunk with its block group/dev list ref:
8011  * Return 0 if all refs seems valid.
8012  * Return 1 if part of refs seems valid, need later check for rebuild ref
8013  * like missing block group and needs to search extent tree to rebuild them.
8014  * Return -1 if essential refs are missing and unable to rebuild.
8015  */
8016 static int check_chunk_refs(struct chunk_record *chunk_rec,
8017                             struct block_group_tree *block_group_cache,
8018                             struct device_extent_tree *dev_extent_cache,
8019                             int silent)
8020 {
8021         struct cache_extent *block_group_item;
8022         struct block_group_record *block_group_rec;
8023         struct cache_extent *dev_extent_item;
8024         struct device_extent_record *dev_extent_rec;
8025         u64 devid;
8026         u64 offset;
8027         u64 length;
8028         int metadump_v2 = 0;
8029         int i;
8030         int ret = 0;
8031
8032         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8033                                                chunk_rec->offset,
8034                                                chunk_rec->length);
8035         if (block_group_item) {
8036                 block_group_rec = container_of(block_group_item,
8037                                                struct block_group_record,
8038                                                cache);
8039                 if (chunk_rec->length != block_group_rec->offset ||
8040                     chunk_rec->offset != block_group_rec->objectid ||
8041                     (!metadump_v2 &&
8042                      chunk_rec->type_flags != block_group_rec->flags)) {
8043                         if (!silent)
8044                                 fprintf(stderr,
8045                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8046                                         chunk_rec->objectid,
8047                                         chunk_rec->type,
8048                                         chunk_rec->offset,
8049                                         chunk_rec->length,
8050                                         chunk_rec->offset,
8051                                         chunk_rec->type_flags,
8052                                         block_group_rec->objectid,
8053                                         block_group_rec->type,
8054                                         block_group_rec->offset,
8055                                         block_group_rec->offset,
8056                                         block_group_rec->objectid,
8057                                         block_group_rec->flags);
8058                         ret = -1;
8059                 } else {
8060                         list_del_init(&block_group_rec->list);
8061                         chunk_rec->bg_rec = block_group_rec;
8062                 }
8063         } else {
8064                 if (!silent)
8065                         fprintf(stderr,
8066                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8067                                 chunk_rec->objectid,
8068                                 chunk_rec->type,
8069                                 chunk_rec->offset,
8070                                 chunk_rec->length,
8071                                 chunk_rec->offset,
8072                                 chunk_rec->type_flags);
8073                 ret = 1;
8074         }
8075
8076         if (metadump_v2)
8077                 return ret;
8078
8079         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8080                                     chunk_rec->num_stripes);
8081         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8082                 devid = chunk_rec->stripes[i].devid;
8083                 offset = chunk_rec->stripes[i].offset;
8084                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8085                                                        devid, offset, length);
8086                 if (dev_extent_item) {
8087                         dev_extent_rec = container_of(dev_extent_item,
8088                                                 struct device_extent_record,
8089                                                 cache);
8090                         if (dev_extent_rec->objectid != devid ||
8091                             dev_extent_rec->offset != offset ||
8092                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8093                             dev_extent_rec->length != length) {
8094                                 if (!silent)
8095                                         fprintf(stderr,
8096                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8097                                                 chunk_rec->objectid,
8098                                                 chunk_rec->type,
8099                                                 chunk_rec->offset,
8100                                                 chunk_rec->stripes[i].devid,
8101                                                 chunk_rec->stripes[i].offset,
8102                                                 dev_extent_rec->objectid,
8103                                                 dev_extent_rec->offset,
8104                                                 dev_extent_rec->length);
8105                                 ret = -1;
8106                         } else {
8107                                 list_move(&dev_extent_rec->chunk_list,
8108                                           &chunk_rec->dextents);
8109                         }
8110                 } else {
8111                         if (!silent)
8112                                 fprintf(stderr,
8113                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8114                                         chunk_rec->objectid,
8115                                         chunk_rec->type,
8116                                         chunk_rec->offset,
8117                                         chunk_rec->stripes[i].devid,
8118                                         chunk_rec->stripes[i].offset);
8119                         ret = -1;
8120                 }
8121         }
8122         return ret;
8123 }
8124
8125 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8126 int check_chunks(struct cache_tree *chunk_cache,
8127                  struct block_group_tree *block_group_cache,
8128                  struct device_extent_tree *dev_extent_cache,
8129                  struct list_head *good, struct list_head *bad,
8130                  struct list_head *rebuild, int silent)
8131 {
8132         struct cache_extent *chunk_item;
8133         struct chunk_record *chunk_rec;
8134         struct block_group_record *bg_rec;
8135         struct device_extent_record *dext_rec;
8136         int err;
8137         int ret = 0;
8138
8139         chunk_item = first_cache_extent(chunk_cache);
8140         while (chunk_item) {
8141                 chunk_rec = container_of(chunk_item, struct chunk_record,
8142                                          cache);
8143                 err = check_chunk_refs(chunk_rec, block_group_cache,
8144                                        dev_extent_cache, silent);
8145                 if (err < 0)
8146                         ret = err;
8147                 if (err == 0 && good)
8148                         list_add_tail(&chunk_rec->list, good);
8149                 if (err > 0 && rebuild)
8150                         list_add_tail(&chunk_rec->list, rebuild);
8151                 if (err < 0 && bad)
8152                         list_add_tail(&chunk_rec->list, bad);
8153                 chunk_item = next_cache_extent(chunk_item);
8154         }
8155
8156         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8157                 if (!silent)
8158                         fprintf(stderr,
8159                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8160                                 bg_rec->objectid,
8161                                 bg_rec->offset,
8162                                 bg_rec->flags);
8163                 if (!ret)
8164                         ret = 1;
8165         }
8166
8167         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8168                             chunk_list) {
8169                 if (!silent)
8170                         fprintf(stderr,
8171                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8172                                 dext_rec->objectid,
8173                                 dext_rec->offset,
8174                                 dext_rec->length);
8175                 if (!ret)
8176                         ret = 1;
8177         }
8178         return ret;
8179 }
8180
8181
8182 static int check_device_used(struct device_record *dev_rec,
8183                              struct device_extent_tree *dext_cache)
8184 {
8185         struct cache_extent *cache;
8186         struct device_extent_record *dev_extent_rec;
8187         u64 total_byte = 0;
8188
8189         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8190         while (cache) {
8191                 dev_extent_rec = container_of(cache,
8192                                               struct device_extent_record,
8193                                               cache);
8194                 if (dev_extent_rec->objectid != dev_rec->devid)
8195                         break;
8196
8197                 list_del_init(&dev_extent_rec->device_list);
8198                 total_byte += dev_extent_rec->length;
8199                 cache = next_cache_extent(cache);
8200         }
8201
8202         if (total_byte != dev_rec->byte_used) {
8203                 fprintf(stderr,
8204                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8205                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8206                         dev_rec->type, dev_rec->offset);
8207                 return -1;
8208         } else {
8209                 return 0;
8210         }
8211 }
8212
8213 /* check btrfs_dev_item -> btrfs_dev_extent */
8214 static int check_devices(struct rb_root *dev_cache,
8215                          struct device_extent_tree *dev_extent_cache)
8216 {
8217         struct rb_node *dev_node;
8218         struct device_record *dev_rec;
8219         struct device_extent_record *dext_rec;
8220         int err;
8221         int ret = 0;
8222
8223         dev_node = rb_first(dev_cache);
8224         while (dev_node) {
8225                 dev_rec = container_of(dev_node, struct device_record, node);
8226                 err = check_device_used(dev_rec, dev_extent_cache);
8227                 if (err)
8228                         ret = err;
8229
8230                 dev_node = rb_next(dev_node);
8231         }
8232         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8233                             device_list) {
8234                 fprintf(stderr,
8235                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8236                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8237                 if (!ret)
8238                         ret = 1;
8239         }
8240         return ret;
8241 }
8242
8243 static int add_root_item_to_list(struct list_head *head,
8244                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8245                                   u8 level, u8 drop_level,
8246                                   int level_size, struct btrfs_key *drop_key)
8247 {
8248
8249         struct root_item_record *ri_rec;
8250         ri_rec = malloc(sizeof(*ri_rec));
8251         if (!ri_rec)
8252                 return -ENOMEM;
8253         ri_rec->bytenr = bytenr;
8254         ri_rec->objectid = objectid;
8255         ri_rec->level = level;
8256         ri_rec->level_size = level_size;
8257         ri_rec->drop_level = drop_level;
8258         ri_rec->last_snapshot = last_snapshot;
8259         if (drop_key)
8260                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8261         list_add_tail(&ri_rec->list, head);
8262
8263         return 0;
8264 }
8265
8266 static void free_root_item_list(struct list_head *list)
8267 {
8268         struct root_item_record *ri_rec;
8269
8270         while (!list_empty(list)) {
8271                 ri_rec = list_first_entry(list, struct root_item_record,
8272                                           list);
8273                 list_del_init(&ri_rec->list);
8274                 free(ri_rec);
8275         }
8276 }
8277
8278 static int deal_root_from_list(struct list_head *list,
8279                                struct btrfs_root *root,
8280                                struct block_info *bits,
8281                                int bits_nr,
8282                                struct cache_tree *pending,
8283                                struct cache_tree *seen,
8284                                struct cache_tree *reada,
8285                                struct cache_tree *nodes,
8286                                struct cache_tree *extent_cache,
8287                                struct cache_tree *chunk_cache,
8288                                struct rb_root *dev_cache,
8289                                struct block_group_tree *block_group_cache,
8290                                struct device_extent_tree *dev_extent_cache)
8291 {
8292         int ret = 0;
8293         u64 last;
8294
8295         while (!list_empty(list)) {
8296                 struct root_item_record *rec;
8297                 struct extent_buffer *buf;
8298                 rec = list_entry(list->next,
8299                                  struct root_item_record, list);
8300                 last = 0;
8301                 buf = read_tree_block(root->fs_info->tree_root,
8302                                       rec->bytenr, rec->level_size, 0);
8303                 if (!extent_buffer_uptodate(buf)) {
8304                         free_extent_buffer(buf);
8305                         ret = -EIO;
8306                         break;
8307                 }
8308                 add_root_to_pending(buf, extent_cache, pending,
8309                                     seen, nodes, rec->objectid);
8310                 /*
8311                  * To rebuild extent tree, we need deal with snapshot
8312                  * one by one, otherwise we deal with node firstly which
8313                  * can maximize readahead.
8314                  */
8315                 while (1) {
8316                         ret = run_next_block(root, bits, bits_nr, &last,
8317                                              pending, seen, reada, nodes,
8318                                              extent_cache, chunk_cache,
8319                                              dev_cache, block_group_cache,
8320                                              dev_extent_cache, rec);
8321                         if (ret != 0)
8322                                 break;
8323                 }
8324                 free_extent_buffer(buf);
8325                 list_del(&rec->list);
8326                 free(rec);
8327                 if (ret < 0)
8328                         break;
8329         }
8330         while (ret >= 0) {
8331                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8332                                      reada, nodes, extent_cache, chunk_cache,
8333                                      dev_cache, block_group_cache,
8334                                      dev_extent_cache, NULL);
8335                 if (ret != 0) {
8336                         if (ret > 0)
8337                                 ret = 0;
8338                         break;
8339                 }
8340         }
8341         return ret;
8342 }
8343
8344 static int check_chunks_and_extents(struct btrfs_root *root)
8345 {
8346         struct rb_root dev_cache;
8347         struct cache_tree chunk_cache;
8348         struct block_group_tree block_group_cache;
8349         struct device_extent_tree dev_extent_cache;
8350         struct cache_tree extent_cache;
8351         struct cache_tree seen;
8352         struct cache_tree pending;
8353         struct cache_tree reada;
8354         struct cache_tree nodes;
8355         struct extent_io_tree excluded_extents;
8356         struct cache_tree corrupt_blocks;
8357         struct btrfs_path path;
8358         struct btrfs_key key;
8359         struct btrfs_key found_key;
8360         int ret, err = 0;
8361         struct block_info *bits;
8362         int bits_nr;
8363         struct extent_buffer *leaf;
8364         int slot;
8365         struct btrfs_root_item ri;
8366         struct list_head dropping_trees;
8367         struct list_head normal_trees;
8368         struct btrfs_root *root1;
8369         u64 objectid;
8370         u32 level_size;
8371         u8 level;
8372
8373         dev_cache = RB_ROOT;
8374         cache_tree_init(&chunk_cache);
8375         block_group_tree_init(&block_group_cache);
8376         device_extent_tree_init(&dev_extent_cache);
8377
8378         cache_tree_init(&extent_cache);
8379         cache_tree_init(&seen);
8380         cache_tree_init(&pending);
8381         cache_tree_init(&nodes);
8382         cache_tree_init(&reada);
8383         cache_tree_init(&corrupt_blocks);
8384         extent_io_tree_init(&excluded_extents);
8385         INIT_LIST_HEAD(&dropping_trees);
8386         INIT_LIST_HEAD(&normal_trees);
8387
8388         if (repair) {
8389                 root->fs_info->excluded_extents = &excluded_extents;
8390                 root->fs_info->fsck_extent_cache = &extent_cache;
8391                 root->fs_info->free_extent_hook = free_extent_hook;
8392                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8393         }
8394
8395         bits_nr = 1024;
8396         bits = malloc(bits_nr * sizeof(struct block_info));
8397         if (!bits) {
8398                 perror("malloc");
8399                 exit(1);
8400         }
8401
8402         if (ctx.progress_enabled) {
8403                 ctx.tp = TASK_EXTENTS;
8404                 task_start(ctx.info);
8405         }
8406
8407 again:
8408         root1 = root->fs_info->tree_root;
8409         level = btrfs_header_level(root1->node);
8410         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8411                                     root1->node->start, 0, level, 0,
8412                                     root1->nodesize, NULL);
8413         if (ret < 0)
8414                 goto out;
8415         root1 = root->fs_info->chunk_root;
8416         level = btrfs_header_level(root1->node);
8417         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8418                                     root1->node->start, 0, level, 0,
8419                                     root1->nodesize, NULL);
8420         if (ret < 0)
8421                 goto out;
8422         btrfs_init_path(&path);
8423         key.offset = 0;
8424         key.objectid = 0;
8425         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8426         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8427                                         &key, &path, 0, 0);
8428         if (ret < 0)
8429                 goto out;
8430         while(1) {
8431                 leaf = path.nodes[0];
8432                 slot = path.slots[0];
8433                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8434                         ret = btrfs_next_leaf(root, &path);
8435                         if (ret != 0)
8436                                 break;
8437                         leaf = path.nodes[0];
8438                         slot = path.slots[0];
8439                 }
8440                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8441                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8442                         unsigned long offset;
8443                         u64 last_snapshot;
8444
8445                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8446                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8447                         last_snapshot = btrfs_root_last_snapshot(&ri);
8448                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8449                                 level = btrfs_root_level(&ri);
8450                                 level_size = root->nodesize;
8451                                 ret = add_root_item_to_list(&normal_trees,
8452                                                 found_key.objectid,
8453                                                 btrfs_root_bytenr(&ri),
8454                                                 last_snapshot, level,
8455                                                 0, level_size, NULL);
8456                                 if (ret < 0)
8457                                         goto out;
8458                         } else {
8459                                 level = btrfs_root_level(&ri);
8460                                 level_size = root->nodesize;
8461                                 objectid = found_key.objectid;
8462                                 btrfs_disk_key_to_cpu(&found_key,
8463                                                       &ri.drop_progress);
8464                                 ret = add_root_item_to_list(&dropping_trees,
8465                                                 objectid,
8466                                                 btrfs_root_bytenr(&ri),
8467                                                 last_snapshot, level,
8468                                                 ri.drop_level,
8469                                                 level_size, &found_key);
8470                                 if (ret < 0)
8471                                         goto out;
8472                         }
8473                 }
8474                 path.slots[0]++;
8475         }
8476         btrfs_release_path(&path);
8477
8478         /*
8479          * check_block can return -EAGAIN if it fixes something, please keep
8480          * this in mind when dealing with return values from these functions, if
8481          * we get -EAGAIN we want to fall through and restart the loop.
8482          */
8483         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8484                                   &seen, &reada, &nodes, &extent_cache,
8485                                   &chunk_cache, &dev_cache, &block_group_cache,
8486                                   &dev_extent_cache);
8487         if (ret < 0) {
8488                 if (ret == -EAGAIN)
8489                         goto loop;
8490                 goto out;
8491         }
8492         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8493                                   &pending, &seen, &reada, &nodes,
8494                                   &extent_cache, &chunk_cache, &dev_cache,
8495                                   &block_group_cache, &dev_extent_cache);
8496         if (ret < 0) {
8497                 if (ret == -EAGAIN)
8498                         goto loop;
8499                 goto out;
8500         }
8501
8502         ret = check_chunks(&chunk_cache, &block_group_cache,
8503                            &dev_extent_cache, NULL, NULL, NULL, 0);
8504         if (ret) {
8505                 if (ret == -EAGAIN)
8506                         goto loop;
8507                 err = ret;
8508         }
8509
8510         ret = check_extent_refs(root, &extent_cache);
8511         if (ret < 0) {
8512                 if (ret == -EAGAIN)
8513                         goto loop;
8514                 goto out;
8515         }
8516
8517         ret = check_devices(&dev_cache, &dev_extent_cache);
8518         if (ret && err)
8519                 ret = err;
8520
8521 out:
8522         task_stop(ctx.info);
8523         if (repair) {
8524                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8525                 extent_io_tree_cleanup(&excluded_extents);
8526                 root->fs_info->fsck_extent_cache = NULL;
8527                 root->fs_info->free_extent_hook = NULL;
8528                 root->fs_info->corrupt_blocks = NULL;
8529                 root->fs_info->excluded_extents = NULL;
8530         }
8531         free(bits);
8532         free_chunk_cache_tree(&chunk_cache);
8533         free_device_cache_tree(&dev_cache);
8534         free_block_group_tree(&block_group_cache);
8535         free_device_extent_tree(&dev_extent_cache);
8536         free_extent_cache_tree(&seen);
8537         free_extent_cache_tree(&pending);
8538         free_extent_cache_tree(&reada);
8539         free_extent_cache_tree(&nodes);
8540         return ret;
8541 loop:
8542         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8543         free_extent_cache_tree(&seen);
8544         free_extent_cache_tree(&pending);
8545         free_extent_cache_tree(&reada);
8546         free_extent_cache_tree(&nodes);
8547         free_chunk_cache_tree(&chunk_cache);
8548         free_block_group_tree(&block_group_cache);
8549         free_device_cache_tree(&dev_cache);
8550         free_device_extent_tree(&dev_extent_cache);
8551         free_extent_record_cache(root->fs_info, &extent_cache);
8552         free_root_item_list(&normal_trees);
8553         free_root_item_list(&dropping_trees);
8554         extent_io_tree_cleanup(&excluded_extents);
8555         goto again;
8556 }
8557
8558 /*
8559  * Check backrefs of a tree block given by @bytenr or @eb.
8560  *
8561  * @root:       the root containing the @bytenr or @eb
8562  * @eb:         tree block extent buffer, can be NULL
8563  * @bytenr:     bytenr of the tree block to search
8564  * @level:      tree level of the tree block
8565  * @owner:      owner of the tree block
8566  *
8567  * Return >0 for any error found and output error message
8568  * Return 0 for no error found
8569  */
8570 static int check_tree_block_ref(struct btrfs_root *root,
8571                                 struct extent_buffer *eb, u64 bytenr,
8572                                 int level, u64 owner)
8573 {
8574         struct btrfs_key key;
8575         struct btrfs_root *extent_root = root->fs_info->extent_root;
8576         struct btrfs_path path;
8577         struct btrfs_extent_item *ei;
8578         struct btrfs_extent_inline_ref *iref;
8579         struct extent_buffer *leaf;
8580         unsigned long end;
8581         unsigned long ptr;
8582         int slot;
8583         int skinny_level;
8584         int type;
8585         u32 nodesize = root->nodesize;
8586         u32 item_size;
8587         u64 offset;
8588         int found_ref = 0;
8589         int err = 0;
8590         int ret;
8591
8592         btrfs_init_path(&path);
8593         key.objectid = bytenr;
8594         if (btrfs_fs_incompat(root->fs_info,
8595                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8596                 key.type = BTRFS_METADATA_ITEM_KEY;
8597         else
8598                 key.type = BTRFS_EXTENT_ITEM_KEY;
8599         key.offset = (u64)-1;
8600
8601         /* Search for the backref in extent tree */
8602         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8603         if (ret < 0) {
8604                 err |= BACKREF_MISSING;
8605                 goto out;
8606         }
8607         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8608         if (ret) {
8609                 err |= BACKREF_MISSING;
8610                 goto out;
8611         }
8612
8613         leaf = path.nodes[0];
8614         slot = path.slots[0];
8615         btrfs_item_key_to_cpu(leaf, &key, slot);
8616
8617         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8618
8619         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8620                 skinny_level = (int)key.offset;
8621                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8622         } else {
8623                 struct btrfs_tree_block_info *info;
8624
8625                 info = (struct btrfs_tree_block_info *)(ei + 1);
8626                 skinny_level = btrfs_tree_block_level(leaf, info);
8627                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8628         }
8629
8630         if (eb) {
8631                 u64 header_gen;
8632                 u64 extent_gen;
8633
8634                 if (!(btrfs_extent_flags(leaf, ei) &
8635                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8636                         error(
8637                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8638                                 key.objectid, nodesize,
8639                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8640                         err = BACKREF_MISMATCH;
8641                 }
8642                 header_gen = btrfs_header_generation(eb);
8643                 extent_gen = btrfs_extent_generation(leaf, ei);
8644                 if (header_gen != extent_gen) {
8645                         error(
8646         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8647                                 key.objectid, nodesize, header_gen,
8648                                 extent_gen);
8649                         err = BACKREF_MISMATCH;
8650                 }
8651                 if (level != skinny_level) {
8652                         error(
8653                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8654                                 key.objectid, nodesize, level, skinny_level);
8655                         err = BACKREF_MISMATCH;
8656                 }
8657                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8658                         error(
8659                         "extent[%llu %u] is referred by other roots than %llu",
8660                                 key.objectid, nodesize, root->objectid);
8661                         err = BACKREF_MISMATCH;
8662                 }
8663         }
8664
8665         /*
8666          * Iterate the extent/metadata item to find the exact backref
8667          */
8668         item_size = btrfs_item_size_nr(leaf, slot);
8669         ptr = (unsigned long)iref;
8670         end = (unsigned long)ei + item_size;
8671         while (ptr < end) {
8672                 iref = (struct btrfs_extent_inline_ref *)ptr;
8673                 type = btrfs_extent_inline_ref_type(leaf, iref);
8674                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8675
8676                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8677                         (offset == root->objectid || offset == owner)) {
8678                         found_ref = 1;
8679                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8680                         /* Check if the backref points to valid referencer */
8681                         found_ref = !check_tree_block_ref(root, NULL, offset,
8682                                                           level + 1, owner);
8683                 }
8684
8685                 if (found_ref)
8686                         break;
8687                 ptr += btrfs_extent_inline_ref_size(type);
8688         }
8689
8690         /*
8691          * Inlined extent item doesn't have what we need, check
8692          * TREE_BLOCK_REF_KEY
8693          */
8694         if (!found_ref) {
8695                 btrfs_release_path(&path);
8696                 key.objectid = bytenr;
8697                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8698                 key.offset = root->objectid;
8699
8700                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8701                 if (!ret)
8702                         found_ref = 1;
8703         }
8704         if (!found_ref)
8705                 err |= BACKREF_MISSING;
8706 out:
8707         btrfs_release_path(&path);
8708         if (eb && (err & BACKREF_MISSING))
8709                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8710                         bytenr, nodesize, owner, level);
8711         return err;
8712 }
8713
8714 /*
8715  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8716  *
8717  * Return >0 any error found and output error message
8718  * Return 0 for no error found
8719  */
8720 static int check_extent_data_item(struct btrfs_root *root,
8721                                   struct extent_buffer *eb, int slot)
8722 {
8723         struct btrfs_file_extent_item *fi;
8724         struct btrfs_path path;
8725         struct btrfs_root *extent_root = root->fs_info->extent_root;
8726         struct btrfs_key fi_key;
8727         struct btrfs_key dbref_key;
8728         struct extent_buffer *leaf;
8729         struct btrfs_extent_item *ei;
8730         struct btrfs_extent_inline_ref *iref;
8731         struct btrfs_extent_data_ref *dref;
8732         u64 owner;
8733         u64 file_extent_gen;
8734         u64 disk_bytenr;
8735         u64 disk_num_bytes;
8736         u64 extent_num_bytes;
8737         u64 extent_flags;
8738         u64 extent_gen;
8739         u32 item_size;
8740         unsigned long end;
8741         unsigned long ptr;
8742         int type;
8743         u64 ref_root;
8744         int found_dbackref = 0;
8745         int err = 0;
8746         int ret;
8747
8748         btrfs_item_key_to_cpu(eb, &fi_key, slot);
8749         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8750         file_extent_gen = btrfs_file_extent_generation(eb, fi);
8751
8752         /* Nothing to check for hole and inline data extents */
8753         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8754             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8755                 return 0;
8756
8757         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8758         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8759         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8760
8761         /* Check unaligned disk_num_bytes and num_bytes */
8762         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8763                 error(
8764 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8765                         fi_key.objectid, fi_key.offset, disk_num_bytes,
8766                         root->sectorsize);
8767                 err |= BYTES_UNALIGNED;
8768         } else {
8769                 data_bytes_allocated += disk_num_bytes;
8770         }
8771         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8772                 error(
8773 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8774                         fi_key.objectid, fi_key.offset, extent_num_bytes,
8775                         root->sectorsize);
8776                 err |= BYTES_UNALIGNED;
8777         } else {
8778                 data_bytes_referenced += extent_num_bytes;
8779         }
8780         owner = btrfs_header_owner(eb);
8781
8782         /* Check the extent item of the file extent in extent tree */
8783         btrfs_init_path(&path);
8784         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8785         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8786         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8787
8788         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8789         if (ret) {
8790                 err |= BACKREF_MISSING;
8791                 goto error;
8792         }
8793
8794         leaf = path.nodes[0];
8795         slot = path.slots[0];
8796         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8797
8798         extent_flags = btrfs_extent_flags(leaf, ei);
8799         extent_gen = btrfs_extent_generation(leaf, ei);
8800
8801         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8802                 error(
8803                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8804                     disk_bytenr, disk_num_bytes,
8805                     BTRFS_EXTENT_FLAG_DATA);
8806                 err |= BACKREF_MISMATCH;
8807         }
8808
8809         if (file_extent_gen < extent_gen) {
8810                 error(
8811 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8812                         disk_bytenr, disk_num_bytes, file_extent_gen,
8813                         extent_gen);
8814                 err |= BACKREF_MISMATCH;
8815         }
8816
8817         /* Check data backref inside that extent item */
8818         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8819         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8820         ptr = (unsigned long)iref;
8821         end = (unsigned long)ei + item_size;
8822         while (ptr < end) {
8823                 iref = (struct btrfs_extent_inline_ref *)ptr;
8824                 type = btrfs_extent_inline_ref_type(leaf, iref);
8825                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8826
8827                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8828                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
8829                         if (ref_root == owner || ref_root == root->objectid)
8830                                 found_dbackref = 1;
8831                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8832                         found_dbackref = !check_tree_block_ref(root, NULL,
8833                                 btrfs_extent_inline_ref_offset(leaf, iref),
8834                                 0, owner);
8835                 }
8836
8837                 if (found_dbackref)
8838                         break;
8839                 ptr += btrfs_extent_inline_ref_size(type);
8840         }
8841
8842         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8843         if (!found_dbackref) {
8844                 btrfs_release_path(&path);
8845
8846                 btrfs_init_path(&path);
8847                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8848                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8849                 dbref_key.offset = hash_extent_data_ref(root->objectid,
8850                                 fi_key.objectid, fi_key.offset);
8851
8852                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8853                                         &dbref_key, &path, 0, 0);
8854                 if (!ret)
8855                         found_dbackref = 1;
8856         }
8857
8858         if (!found_dbackref)
8859                 err |= BACKREF_MISSING;
8860 error:
8861         btrfs_release_path(&path);
8862         if (err & BACKREF_MISSING) {
8863                 error("data extent[%llu %llu] backref lost",
8864                       disk_bytenr, disk_num_bytes);
8865         }
8866         return err;
8867 }
8868
8869 /*
8870  * Get real tree block level for the case like shared block
8871  * Return >= 0 as tree level
8872  * Return <0 for error
8873  */
8874 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8875 {
8876         struct extent_buffer *eb;
8877         struct btrfs_path path;
8878         struct btrfs_key key;
8879         struct btrfs_extent_item *ei;
8880         u64 flags;
8881         u64 transid;
8882         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8883         u8 backref_level;
8884         u8 header_level;
8885         int ret;
8886
8887         /* Search extent tree for extent generation and level */
8888         key.objectid = bytenr;
8889         key.type = BTRFS_METADATA_ITEM_KEY;
8890         key.offset = (u64)-1;
8891
8892         btrfs_init_path(&path);
8893         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8894         if (ret < 0)
8895                 goto release_out;
8896         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8897         if (ret < 0)
8898                 goto release_out;
8899         if (ret > 0) {
8900                 ret = -ENOENT;
8901                 goto release_out;
8902         }
8903
8904         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8905         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8906                             struct btrfs_extent_item);
8907         flags = btrfs_extent_flags(path.nodes[0], ei);
8908         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8909                 ret = -ENOENT;
8910                 goto release_out;
8911         }
8912
8913         /* Get transid for later read_tree_block() check */
8914         transid = btrfs_extent_generation(path.nodes[0], ei);
8915
8916         /* Get backref level as one source */
8917         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8918                 backref_level = key.offset;
8919         } else {
8920                 struct btrfs_tree_block_info *info;
8921
8922                 info = (struct btrfs_tree_block_info *)(ei + 1);
8923                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
8924         }
8925         btrfs_release_path(&path);
8926
8927         /* Get level from tree block as an alternative source */
8928         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
8929         if (!extent_buffer_uptodate(eb)) {
8930                 free_extent_buffer(eb);
8931                 return -EIO;
8932         }
8933         header_level = btrfs_header_level(eb);
8934         free_extent_buffer(eb);
8935
8936         if (header_level != backref_level)
8937                 return -EIO;
8938         return header_level;
8939
8940 release_out:
8941         btrfs_release_path(&path);
8942         return ret;
8943 }
8944
8945 /*
8946  * Check if a tree block backref is valid (points to a valid tree block)
8947  * if level == -1, level will be resolved
8948  * Return >0 for any error found and print error message
8949  */
8950 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
8951                                     u64 bytenr, int level)
8952 {
8953         struct btrfs_root *root;
8954         struct btrfs_key key;
8955         struct btrfs_path path;
8956         struct extent_buffer *eb;
8957         struct extent_buffer *node;
8958         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8959         int err = 0;
8960         int ret;
8961
8962         /* Query level for level == -1 special case */
8963         if (level == -1)
8964                 level = query_tree_block_level(fs_info, bytenr);
8965         if (level < 0) {
8966                 err |= REFERENCER_MISSING;
8967                 goto out;
8968         }
8969
8970         key.objectid = root_id;
8971         key.type = BTRFS_ROOT_ITEM_KEY;
8972         key.offset = (u64)-1;
8973
8974         root = btrfs_read_fs_root(fs_info, &key);
8975         if (IS_ERR(root)) {
8976                 err |= REFERENCER_MISSING;
8977                 goto out;
8978         }
8979
8980         /* Read out the tree block to get item/node key */
8981         eb = read_tree_block(root, bytenr, root->nodesize, 0);
8982         if (!extent_buffer_uptodate(eb)) {
8983                 err |= REFERENCER_MISSING;
8984                 free_extent_buffer(eb);
8985                 goto out;
8986         }
8987
8988         /* Empty tree, no need to check key */
8989         if (!btrfs_header_nritems(eb) && !level) {
8990                 free_extent_buffer(eb);
8991                 goto out;
8992         }
8993
8994         if (level)
8995                 btrfs_node_key_to_cpu(eb, &key, 0);
8996         else
8997                 btrfs_item_key_to_cpu(eb, &key, 0);
8998
8999         free_extent_buffer(eb);
9000
9001         btrfs_init_path(&path);
9002         /* Search with the first key, to ensure we can reach it */
9003         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9004         if (ret) {
9005                 err |= REFERENCER_MISSING;
9006                 goto release_out;
9007         }
9008
9009         node = path.nodes[level];
9010         if (btrfs_header_bytenr(node) != bytenr) {
9011                 error(
9012         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9013                         bytenr, nodesize, bytenr,
9014                         btrfs_header_bytenr(node));
9015                 err |= REFERENCER_MISMATCH;
9016         }
9017         if (btrfs_header_level(node) != level) {
9018                 error(
9019         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9020                         bytenr, nodesize, level,
9021                         btrfs_header_level(node));
9022                 err |= REFERENCER_MISMATCH;
9023         }
9024
9025 release_out:
9026         btrfs_release_path(&path);
9027 out:
9028         if (err & REFERENCER_MISSING) {
9029                 if (level < 0)
9030                         error("extent [%llu %d] lost referencer (owner: %llu)",
9031                                 bytenr, nodesize, root_id);
9032                 else
9033                         error(
9034                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9035                                 bytenr, nodesize, root_id, level);
9036         }
9037
9038         return err;
9039 }
9040
9041 /*
9042  * Check referencer for shared block backref
9043  * If level == -1, this function will resolve the level.
9044  */
9045 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9046                                      u64 parent, u64 bytenr, int level)
9047 {
9048         struct extent_buffer *eb;
9049         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9050         u32 nr;
9051         int found_parent = 0;
9052         int i;
9053
9054         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9055         if (!extent_buffer_uptodate(eb))
9056                 goto out;
9057
9058         if (level == -1)
9059                 level = query_tree_block_level(fs_info, bytenr);
9060         if (level < 0)
9061                 goto out;
9062
9063         if (level + 1 != btrfs_header_level(eb))
9064                 goto out;
9065
9066         nr = btrfs_header_nritems(eb);
9067         for (i = 0; i < nr; i++) {
9068                 if (bytenr == btrfs_node_blockptr(eb, i)) {
9069                         found_parent = 1;
9070                         break;
9071                 }
9072         }
9073 out:
9074         free_extent_buffer(eb);
9075         if (!found_parent) {
9076                 error(
9077         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9078                         bytenr, nodesize, parent, level);
9079                 return REFERENCER_MISSING;
9080         }
9081         return 0;
9082 }
9083
9084 /*
9085  * Check referencer for normal (inlined) data ref
9086  * If len == 0, it will be resolved by searching in extent tree
9087  */
9088 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9089                                      u64 root_id, u64 objectid, u64 offset,
9090                                      u64 bytenr, u64 len, u32 count)
9091 {
9092         struct btrfs_root *root;
9093         struct btrfs_root *extent_root = fs_info->extent_root;
9094         struct btrfs_key key;
9095         struct btrfs_path path;
9096         struct extent_buffer *leaf;
9097         struct btrfs_file_extent_item *fi;
9098         u32 found_count = 0;
9099         int slot;
9100         int ret = 0;
9101
9102         if (!len) {
9103                 key.objectid = bytenr;
9104                 key.type = BTRFS_EXTENT_ITEM_KEY;
9105                 key.offset = (u64)-1;
9106
9107                 btrfs_init_path(&path);
9108                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9109                 if (ret < 0)
9110                         goto out;
9111                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9112                 if (ret)
9113                         goto out;
9114                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9115                 if (key.objectid != bytenr ||
9116                     key.type != BTRFS_EXTENT_ITEM_KEY)
9117                         goto out;
9118                 len = key.offset;
9119                 btrfs_release_path(&path);
9120         }
9121         key.objectid = root_id;
9122         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
9123         key.offset = (u64)-1;
9124         btrfs_init_path(&path);
9125
9126         root = btrfs_read_fs_root(fs_info, &key);
9127         if (IS_ERR(root))
9128                 goto out;
9129
9130         key.objectid = objectid;
9131         key.type = BTRFS_EXTENT_DATA_KEY;
9132         /*
9133          * It can be nasty as data backref offset is
9134          * file offset - file extent offset, which is smaller or
9135          * equal to original backref offset.  The only special case is
9136          * overflow.  So we need to special check and do further search.
9137          */
9138         key.offset = offset & (1ULL << 63) ? 0 : offset;
9139
9140         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9141         if (ret < 0)
9142                 goto out;
9143
9144         /*
9145          * Search afterwards to get correct one
9146          * NOTE: As we must do a comprehensive check on the data backref to
9147          * make sure the dref count also matches, we must iterate all file
9148          * extents for that inode.
9149          */
9150         while (1) {
9151                 leaf = path.nodes[0];
9152                 slot = path.slots[0];
9153
9154                 btrfs_item_key_to_cpu(leaf, &key, slot);
9155                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9156                         break;
9157                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9158                 /*
9159                  * Except normal disk bytenr and disk num bytes, we still
9160                  * need to do extra check on dbackref offset as
9161                  * dbackref offset = file_offset - file_extent_offset
9162                  */
9163                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9164                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9165                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9166                     offset)
9167                         found_count++;
9168
9169                 ret = btrfs_next_item(root, &path);
9170                 if (ret)
9171                         break;
9172         }
9173 out:
9174         btrfs_release_path(&path);
9175         if (found_count != count) {
9176                 error(
9177 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9178                         bytenr, len, root_id, objectid, offset, count, found_count);
9179                 return REFERENCER_MISSING;
9180         }
9181         return 0;
9182 }
9183
9184 /*
9185  * Check if the referencer of a shared data backref exists
9186  */
9187 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9188                                      u64 parent, u64 bytenr)
9189 {
9190         struct extent_buffer *eb;
9191         struct btrfs_key key;
9192         struct btrfs_file_extent_item *fi;
9193         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9194         u32 nr;
9195         int found_parent = 0;
9196         int i;
9197
9198         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9199         if (!extent_buffer_uptodate(eb))
9200                 goto out;
9201
9202         nr = btrfs_header_nritems(eb);
9203         for (i = 0; i < nr; i++) {
9204                 btrfs_item_key_to_cpu(eb, &key, i);
9205                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9206                         continue;
9207
9208                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9209                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9210                         continue;
9211
9212                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9213                         found_parent = 1;
9214                         break;
9215                 }
9216         }
9217
9218 out:
9219         free_extent_buffer(eb);
9220         if (!found_parent) {
9221                 error("shared extent %llu referencer lost (parent: %llu)",
9222                         bytenr, parent);
9223                 return REFERENCER_MISSING;
9224         }
9225         return 0;
9226 }
9227
9228 /*
9229  * This function will check a given extent item, including its backref and
9230  * itself (like crossing stripe boundary and type)
9231  *
9232  * Since we don't use extent_record anymore, introduce new error bit
9233  */
9234 static int check_extent_item(struct btrfs_fs_info *fs_info,
9235                              struct extent_buffer *eb, int slot)
9236 {
9237         struct btrfs_extent_item *ei;
9238         struct btrfs_extent_inline_ref *iref;
9239         struct btrfs_extent_data_ref *dref;
9240         unsigned long end;
9241         unsigned long ptr;
9242         int type;
9243         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9244         u32 item_size = btrfs_item_size_nr(eb, slot);
9245         u64 flags;
9246         u64 offset;
9247         int metadata = 0;
9248         int level;
9249         struct btrfs_key key;
9250         int ret;
9251         int err = 0;
9252
9253         btrfs_item_key_to_cpu(eb, &key, slot);
9254         if (key.type == BTRFS_EXTENT_ITEM_KEY)
9255                 bytes_used += key.offset;
9256         else
9257                 bytes_used += nodesize;
9258
9259         if (item_size < sizeof(*ei)) {
9260                 /*
9261                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9262                  * old thing when on disk format is still un-determined.
9263                  * No need to care about it anymore
9264                  */
9265                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9266                 return -ENOTTY;
9267         }
9268
9269         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9270         flags = btrfs_extent_flags(eb, ei);
9271
9272         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9273                 metadata = 1;
9274         if (metadata && check_crossing_stripes(key.objectid, eb->len)) {
9275                 error("bad metadata [%llu, %llu) crossing stripe boundary",
9276                       key.objectid, key.objectid + nodesize);
9277                 err |= CROSSING_STRIPE_BOUNDARY;
9278         }
9279
9280         ptr = (unsigned long)(ei + 1);
9281
9282         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9283                 /* Old EXTENT_ITEM metadata */
9284                 struct btrfs_tree_block_info *info;
9285
9286                 info = (struct btrfs_tree_block_info *)ptr;
9287                 level = btrfs_tree_block_level(eb, info);
9288                 ptr += sizeof(struct btrfs_tree_block_info);
9289         } else {
9290                 /* New METADATA_ITEM */
9291                 level = key.offset;
9292         }
9293         end = (unsigned long)ei + item_size;
9294
9295         if (ptr >= end) {
9296                 err |= ITEM_SIZE_MISMATCH;
9297                 goto out;
9298         }
9299
9300         /* Now check every backref in this extent item */
9301 next:
9302         iref = (struct btrfs_extent_inline_ref *)ptr;
9303         type = btrfs_extent_inline_ref_type(eb, iref);
9304         offset = btrfs_extent_inline_ref_offset(eb, iref);
9305         switch (type) {
9306         case BTRFS_TREE_BLOCK_REF_KEY:
9307                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9308                                                level);
9309                 err |= ret;
9310                 break;
9311         case BTRFS_SHARED_BLOCK_REF_KEY:
9312                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9313                                                  level);
9314                 err |= ret;
9315                 break;
9316         case BTRFS_EXTENT_DATA_REF_KEY:
9317                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9318                 ret = check_extent_data_backref(fs_info,
9319                                 btrfs_extent_data_ref_root(eb, dref),
9320                                 btrfs_extent_data_ref_objectid(eb, dref),
9321                                 btrfs_extent_data_ref_offset(eb, dref),
9322                                 key.objectid, key.offset,
9323                                 btrfs_extent_data_ref_count(eb, dref));
9324                 err |= ret;
9325                 break;
9326         case BTRFS_SHARED_DATA_REF_KEY:
9327                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9328                 err |= ret;
9329                 break;
9330         default:
9331                 error("extent[%llu %d %llu] has unknown ref type: %d",
9332                         key.objectid, key.type, key.offset, type);
9333                 err |= UNKNOWN_TYPE;
9334                 goto out;
9335         }
9336
9337         ptr += btrfs_extent_inline_ref_size(type);
9338         if (ptr < end)
9339                 goto next;
9340
9341 out:
9342         return err;
9343 }
9344
9345 /*
9346  * Check if a dev extent item is referred correctly by its chunk
9347  */
9348 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9349                                  struct extent_buffer *eb, int slot)
9350 {
9351         struct btrfs_root *chunk_root = fs_info->chunk_root;
9352         struct btrfs_dev_extent *ptr;
9353         struct btrfs_path path;
9354         struct btrfs_key chunk_key;
9355         struct btrfs_key devext_key;
9356         struct btrfs_chunk *chunk;
9357         struct extent_buffer *l;
9358         int num_stripes;
9359         u64 length;
9360         int i;
9361         int found_chunk = 0;
9362         int ret;
9363
9364         btrfs_item_key_to_cpu(eb, &devext_key, slot);
9365         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9366         length = btrfs_dev_extent_length(eb, ptr);
9367
9368         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9369         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9370         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9371
9372         btrfs_init_path(&path);
9373         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9374         if (ret)
9375                 goto out;
9376
9377         l = path.nodes[0];
9378         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9379         if (btrfs_chunk_length(l, chunk) != length)
9380                 goto out;
9381
9382         num_stripes = btrfs_chunk_num_stripes(l, chunk);
9383         for (i = 0; i < num_stripes; i++) {
9384                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9385                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9386
9387                 if (devid == devext_key.objectid &&
9388                     offset == devext_key.offset) {
9389                         found_chunk = 1;
9390                         break;
9391                 }
9392         }
9393 out:
9394         btrfs_release_path(&path);
9395         if (!found_chunk) {
9396                 error(
9397                 "device extent[%llu, %llu, %llu] did not find the related chunk",
9398                         devext_key.objectid, devext_key.offset, length);
9399                 return REFERENCER_MISSING;
9400         }
9401         return 0;
9402 }
9403
9404 /*
9405  * Check if the used space is correct with the dev item
9406  */
9407 static int check_dev_item(struct btrfs_fs_info *fs_info,
9408                           struct extent_buffer *eb, int slot)
9409 {
9410         struct btrfs_root *dev_root = fs_info->dev_root;
9411         struct btrfs_dev_item *dev_item;
9412         struct btrfs_path path;
9413         struct btrfs_key key;
9414         struct btrfs_dev_extent *ptr;
9415         u64 dev_id;
9416         u64 used;
9417         u64 total = 0;
9418         int ret;
9419
9420         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9421         dev_id = btrfs_device_id(eb, dev_item);
9422         used = btrfs_device_bytes_used(eb, dev_item);
9423
9424         key.objectid = dev_id;
9425         key.type = BTRFS_DEV_EXTENT_KEY;
9426         key.offset = 0;
9427
9428         btrfs_init_path(&path);
9429         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9430         if (ret < 0) {
9431                 btrfs_item_key_to_cpu(eb, &key, slot);
9432                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9433                         key.objectid, key.type, key.offset);
9434                 btrfs_release_path(&path);
9435                 return REFERENCER_MISSING;
9436         }
9437
9438         /* Iterate dev_extents to calculate the used space of a device */
9439         while (1) {
9440                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9441
9442                 if (key.objectid > dev_id)
9443                         break;
9444                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9445                         goto next;
9446
9447                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9448                                      struct btrfs_dev_extent);
9449                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9450 next:
9451                 ret = btrfs_next_item(dev_root, &path);
9452                 if (ret)
9453                         break;
9454         }
9455         btrfs_release_path(&path);
9456
9457         if (used != total) {
9458                 btrfs_item_key_to_cpu(eb, &key, slot);
9459                 error(
9460 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9461                         total, used, BTRFS_ROOT_TREE_OBJECTID,
9462                         BTRFS_DEV_EXTENT_KEY, dev_id);
9463                 return ACCOUNTING_MISMATCH;
9464         }
9465         return 0;
9466 }
9467
9468 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
9469                            struct btrfs_root *root, int overwrite)
9470 {
9471         struct extent_buffer *c;
9472         struct extent_buffer *old = root->node;
9473         int level;
9474         int ret;
9475         struct btrfs_disk_key disk_key = {0,0,0};
9476
9477         level = 0;
9478
9479         if (overwrite) {
9480                 c = old;
9481                 extent_buffer_get(c);
9482                 goto init;
9483         }
9484         c = btrfs_alloc_free_block(trans, root,
9485                                    root->nodesize,
9486                                    root->root_key.objectid,
9487                                    &disk_key, level, 0, 0);
9488         if (IS_ERR(c)) {
9489                 c = old;
9490                 extent_buffer_get(c);
9491                 overwrite = 1;
9492         }
9493 init:
9494         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
9495         btrfs_set_header_level(c, level);
9496         btrfs_set_header_bytenr(c, c->start);
9497         btrfs_set_header_generation(c, trans->transid);
9498         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
9499         btrfs_set_header_owner(c, root->root_key.objectid);
9500
9501         write_extent_buffer(c, root->fs_info->fsid,
9502                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
9503
9504         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
9505                             btrfs_header_chunk_tree_uuid(c),
9506                             BTRFS_UUID_SIZE);
9507
9508         btrfs_mark_buffer_dirty(c);
9509         /*
9510          * this case can happen in the following case:
9511          *
9512          * 1.overwrite previous root.
9513          *
9514          * 2.reinit reloc data root, this is because we skip pin
9515          * down reloc data tree before which means we can allocate
9516          * same block bytenr here.
9517          */
9518         if (old->start == c->start) {
9519                 btrfs_set_root_generation(&root->root_item,
9520                                           trans->transid);
9521                 root->root_item.level = btrfs_header_level(root->node);
9522                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
9523                                         &root->root_key, &root->root_item);
9524                 if (ret) {
9525                         free_extent_buffer(c);
9526                         return ret;
9527                 }
9528         }
9529         free_extent_buffer(old);
9530         root->node = c;
9531         add_root_to_dirty_list(root);
9532         return 0;
9533 }
9534
9535 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
9536                                 struct extent_buffer *eb, int tree_root)
9537 {
9538         struct extent_buffer *tmp;
9539         struct btrfs_root_item *ri;
9540         struct btrfs_key key;
9541         u64 bytenr;
9542         u32 nodesize;
9543         int level = btrfs_header_level(eb);
9544         int nritems;
9545         int ret;
9546         int i;
9547
9548         /*
9549          * If we have pinned this block before, don't pin it again.
9550          * This can not only avoid forever loop with broken filesystem
9551          * but also give us some speedups.
9552          */
9553         if (test_range_bit(&fs_info->pinned_extents, eb->start,
9554                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
9555                 return 0;
9556
9557         btrfs_pin_extent(fs_info, eb->start, eb->len);
9558
9559         nodesize = btrfs_super_nodesize(fs_info->super_copy);
9560         nritems = btrfs_header_nritems(eb);
9561         for (i = 0; i < nritems; i++) {
9562                 if (level == 0) {
9563                         btrfs_item_key_to_cpu(eb, &key, i);
9564                         if (key.type != BTRFS_ROOT_ITEM_KEY)
9565                                 continue;
9566                         /* Skip the extent root and reloc roots */
9567                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
9568                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
9569                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
9570                                 continue;
9571                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
9572                         bytenr = btrfs_disk_root_bytenr(eb, ri);
9573
9574                         /*
9575                          * If at any point we start needing the real root we
9576                          * will have to build a stump root for the root we are
9577                          * in, but for now this doesn't actually use the root so
9578                          * just pass in extent_root.
9579                          */
9580                         tmp = read_tree_block(fs_info->extent_root, bytenr,
9581                                               nodesize, 0);
9582                         if (!extent_buffer_uptodate(tmp)) {
9583                                 fprintf(stderr, "Error reading root block\n");
9584                                 return -EIO;
9585                         }
9586                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
9587                         free_extent_buffer(tmp);
9588                         if (ret)
9589                                 return ret;
9590                 } else {
9591                         bytenr = btrfs_node_blockptr(eb, i);
9592
9593                         /* If we aren't the tree root don't read the block */
9594                         if (level == 1 && !tree_root) {
9595                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
9596                                 continue;
9597                         }
9598
9599                         tmp = read_tree_block(fs_info->extent_root, bytenr,
9600                                               nodesize, 0);
9601                         if (!extent_buffer_uptodate(tmp)) {
9602                                 fprintf(stderr, "Error reading tree block\n");
9603                                 return -EIO;
9604                         }
9605                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
9606                         free_extent_buffer(tmp);
9607                         if (ret)
9608                                 return ret;
9609                 }
9610         }
9611
9612         return 0;
9613 }
9614
9615 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
9616 {
9617         int ret;
9618
9619         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
9620         if (ret)
9621                 return ret;
9622
9623         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
9624 }
9625
9626 static int reset_block_groups(struct btrfs_fs_info *fs_info)
9627 {
9628         struct btrfs_block_group_cache *cache;
9629         struct btrfs_path *path;
9630         struct extent_buffer *leaf;
9631         struct btrfs_chunk *chunk;
9632         struct btrfs_key key;
9633         int ret;
9634         u64 start;
9635
9636         path = btrfs_alloc_path();
9637         if (!path)
9638                 return -ENOMEM;
9639
9640         key.objectid = 0;
9641         key.type = BTRFS_CHUNK_ITEM_KEY;
9642         key.offset = 0;
9643
9644         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
9645         if (ret < 0) {
9646                 btrfs_free_path(path);
9647                 return ret;
9648         }
9649
9650         /*
9651          * We do this in case the block groups were screwed up and had alloc
9652          * bits that aren't actually set on the chunks.  This happens with
9653          * restored images every time and could happen in real life I guess.
9654          */
9655         fs_info->avail_data_alloc_bits = 0;
9656         fs_info->avail_metadata_alloc_bits = 0;
9657         fs_info->avail_system_alloc_bits = 0;
9658
9659         /* First we need to create the in-memory block groups */
9660         while (1) {
9661                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9662                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
9663                         if (ret < 0) {
9664                                 btrfs_free_path(path);
9665                                 return ret;
9666                         }
9667                         if (ret) {
9668                                 ret = 0;
9669                                 break;
9670                         }
9671                 }
9672                 leaf = path->nodes[0];
9673                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9674                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
9675                         path->slots[0]++;
9676                         continue;
9677                 }
9678
9679                 chunk = btrfs_item_ptr(leaf, path->slots[0],
9680                                        struct btrfs_chunk);
9681                 btrfs_add_block_group(fs_info, 0,
9682                                       btrfs_chunk_type(leaf, chunk),
9683                                       key.objectid, key.offset,
9684                                       btrfs_chunk_length(leaf, chunk));
9685                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
9686                                  key.offset + btrfs_chunk_length(leaf, chunk),
9687                                  GFP_NOFS);
9688                 path->slots[0]++;
9689         }
9690         start = 0;
9691         while (1) {
9692                 cache = btrfs_lookup_first_block_group(fs_info, start);
9693                 if (!cache)
9694                         break;
9695                 cache->cached = 1;
9696                 start = cache->key.objectid + cache->key.offset;
9697         }
9698
9699         btrfs_free_path(path);
9700         return 0;
9701 }
9702
9703 static int reset_balance(struct btrfs_trans_handle *trans,
9704                          struct btrfs_fs_info *fs_info)
9705 {
9706         struct btrfs_root *root = fs_info->tree_root;
9707         struct btrfs_path *path;
9708         struct extent_buffer *leaf;
9709         struct btrfs_key key;
9710         int del_slot, del_nr = 0;
9711         int ret;
9712         int found = 0;
9713
9714         path = btrfs_alloc_path();
9715         if (!path)
9716                 return -ENOMEM;
9717
9718         key.objectid = BTRFS_BALANCE_OBJECTID;
9719         key.type = BTRFS_BALANCE_ITEM_KEY;
9720         key.offset = 0;
9721
9722         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
9723         if (ret) {
9724                 if (ret > 0)
9725                         ret = 0;
9726                 if (!ret)
9727                         goto reinit_data_reloc;
9728                 else
9729                         goto out;
9730         }
9731
9732         ret = btrfs_del_item(trans, root, path);
9733         if (ret)
9734                 goto out;
9735         btrfs_release_path(path);
9736
9737         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
9738         key.type = BTRFS_ROOT_ITEM_KEY;
9739         key.offset = 0;
9740
9741         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
9742         if (ret < 0)
9743                 goto out;
9744         while (1) {
9745                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9746                         if (!found)
9747                                 break;
9748
9749                         if (del_nr) {
9750                                 ret = btrfs_del_items(trans, root, path,
9751                                                       del_slot, del_nr);
9752                                 del_nr = 0;
9753                                 if (ret)
9754                                         goto out;
9755                         }
9756                         key.offset++;
9757                         btrfs_release_path(path);
9758
9759                         found = 0;
9760                         ret = btrfs_search_slot(trans, root, &key, path,
9761                                                 -1, 1);
9762                         if (ret < 0)
9763                                 goto out;
9764                         continue;
9765                 }
9766                 found = 1;
9767                 leaf = path->nodes[0];
9768                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9769                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
9770                         break;
9771                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
9772                         path->slots[0]++;
9773                         continue;
9774                 }
9775                 if (!del_nr) {
9776                         del_slot = path->slots[0];
9777                         del_nr = 1;
9778                 } else {
9779                         del_nr++;
9780                 }
9781                 path->slots[0]++;
9782         }
9783
9784         if (del_nr) {
9785                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
9786                 if (ret)
9787                         goto out;
9788         }
9789         btrfs_release_path(path);
9790
9791 reinit_data_reloc:
9792         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
9793         key.type = BTRFS_ROOT_ITEM_KEY;
9794         key.offset = (u64)-1;
9795         root = btrfs_read_fs_root(fs_info, &key);
9796         if (IS_ERR(root)) {
9797                 fprintf(stderr, "Error reading data reloc tree\n");
9798                 ret = PTR_ERR(root);
9799                 goto out;
9800         }
9801         record_root_in_trans(trans, root);
9802         ret = btrfs_fsck_reinit_root(trans, root, 0);
9803         if (ret)
9804                 goto out;
9805         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
9806 out:
9807         btrfs_free_path(path);
9808         return ret;
9809 }
9810
9811 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
9812                               struct btrfs_fs_info *fs_info)
9813 {
9814         u64 start = 0;
9815         int ret;
9816
9817         /*
9818          * The only reason we don't do this is because right now we're just
9819          * walking the trees we find and pinning down their bytes, we don't look
9820          * at any of the leaves.  In order to do mixed groups we'd have to check
9821          * the leaves of any fs roots and pin down the bytes for any file
9822          * extents we find.  Not hard but why do it if we don't have to?
9823          */
9824         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
9825                 fprintf(stderr, "We don't support re-initing the extent tree "
9826                         "for mixed block groups yet, please notify a btrfs "
9827                         "developer you want to do this so they can add this "
9828                         "functionality.\n");
9829                 return -EINVAL;
9830         }
9831
9832         /*
9833          * first we need to walk all of the trees except the extent tree and pin
9834          * down the bytes that are in use so we don't overwrite any existing
9835          * metadata.
9836          */
9837         ret = pin_metadata_blocks(fs_info);
9838         if (ret) {
9839                 fprintf(stderr, "error pinning down used bytes\n");
9840                 return ret;
9841         }
9842
9843         /*
9844          * Need to drop all the block groups since we're going to recreate all
9845          * of them again.
9846          */
9847         btrfs_free_block_groups(fs_info);
9848         ret = reset_block_groups(fs_info);
9849         if (ret) {
9850                 fprintf(stderr, "error resetting the block groups\n");
9851                 return ret;
9852         }
9853
9854         /* Ok we can allocate now, reinit the extent root */
9855         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
9856         if (ret) {
9857                 fprintf(stderr, "extent root initialization failed\n");
9858                 /*
9859                  * When the transaction code is updated we should end the
9860                  * transaction, but for now progs only knows about commit so
9861                  * just return an error.
9862                  */
9863                 return ret;
9864         }
9865
9866         /*
9867          * Now we have all the in-memory block groups setup so we can make
9868          * allocations properly, and the metadata we care about is safe since we
9869          * pinned all of it above.
9870          */
9871         while (1) {
9872                 struct btrfs_block_group_cache *cache;
9873
9874                 cache = btrfs_lookup_first_block_group(fs_info, start);
9875                 if (!cache)
9876                         break;
9877                 start = cache->key.objectid + cache->key.offset;
9878                 ret = btrfs_insert_item(trans, fs_info->extent_root,
9879                                         &cache->key, &cache->item,
9880                                         sizeof(cache->item));
9881                 if (ret) {
9882                         fprintf(stderr, "Error adding block group\n");
9883                         return ret;
9884                 }
9885                 btrfs_extent_post_op(trans, fs_info->extent_root);
9886         }
9887
9888         ret = reset_balance(trans, fs_info);
9889         if (ret)
9890                 fprintf(stderr, "error resetting the pending balance\n");
9891
9892         return ret;
9893 }
9894
9895 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
9896 {
9897         struct btrfs_path *path;
9898         struct btrfs_trans_handle *trans;
9899         struct btrfs_key key;
9900         int ret;
9901
9902         printf("Recowing metadata block %llu\n", eb->start);
9903         key.objectid = btrfs_header_owner(eb);
9904         key.type = BTRFS_ROOT_ITEM_KEY;
9905         key.offset = (u64)-1;
9906
9907         root = btrfs_read_fs_root(root->fs_info, &key);
9908         if (IS_ERR(root)) {
9909                 fprintf(stderr, "Couldn't find owner root %llu\n",
9910                         key.objectid);
9911                 return PTR_ERR(root);
9912         }
9913
9914         path = btrfs_alloc_path();
9915         if (!path)
9916                 return -ENOMEM;
9917
9918         trans = btrfs_start_transaction(root, 1);
9919         if (IS_ERR(trans)) {
9920                 btrfs_free_path(path);
9921                 return PTR_ERR(trans);
9922         }
9923
9924         path->lowest_level = btrfs_header_level(eb);
9925         if (path->lowest_level)
9926                 btrfs_node_key_to_cpu(eb, &key, 0);
9927         else
9928                 btrfs_item_key_to_cpu(eb, &key, 0);
9929
9930         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9931         btrfs_commit_transaction(trans, root);
9932         btrfs_free_path(path);
9933         return ret;
9934 }
9935
9936 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
9937 {
9938         struct btrfs_path *path;
9939         struct btrfs_trans_handle *trans;
9940         struct btrfs_key key;
9941         int ret;
9942
9943         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
9944                bad->key.type, bad->key.offset);
9945         key.objectid = bad->root_id;
9946         key.type = BTRFS_ROOT_ITEM_KEY;
9947         key.offset = (u64)-1;
9948
9949         root = btrfs_read_fs_root(root->fs_info, &key);
9950         if (IS_ERR(root)) {
9951                 fprintf(stderr, "Couldn't find owner root %llu\n",
9952                         key.objectid);
9953                 return PTR_ERR(root);
9954         }
9955
9956         path = btrfs_alloc_path();
9957         if (!path)
9958                 return -ENOMEM;
9959
9960         trans = btrfs_start_transaction(root, 1);
9961         if (IS_ERR(trans)) {
9962                 btrfs_free_path(path);
9963                 return PTR_ERR(trans);
9964         }
9965
9966         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
9967         if (ret) {
9968                 if (ret > 0)
9969                         ret = 0;
9970                 goto out;
9971         }
9972         ret = btrfs_del_item(trans, root, path);
9973 out:
9974         btrfs_commit_transaction(trans, root);
9975         btrfs_free_path(path);
9976         return ret;
9977 }
9978
9979 static int zero_log_tree(struct btrfs_root *root)
9980 {
9981         struct btrfs_trans_handle *trans;
9982         int ret;
9983
9984         trans = btrfs_start_transaction(root, 1);
9985         if (IS_ERR(trans)) {
9986                 ret = PTR_ERR(trans);
9987                 return ret;
9988         }
9989         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
9990         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
9991         ret = btrfs_commit_transaction(trans, root);
9992         return ret;
9993 }
9994
9995 static int populate_csum(struct btrfs_trans_handle *trans,
9996                          struct btrfs_root *csum_root, char *buf, u64 start,
9997                          u64 len)
9998 {
9999         u64 offset = 0;
10000         u64 sectorsize;
10001         int ret = 0;
10002
10003         while (offset < len) {
10004                 sectorsize = csum_root->sectorsize;
10005                 ret = read_extent_data(csum_root, buf, start + offset,
10006                                        &sectorsize, 0);
10007                 if (ret)
10008                         break;
10009                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10010                                             start + offset, buf, sectorsize);
10011                 if (ret)
10012                         break;
10013                 offset += sectorsize;
10014         }
10015         return ret;
10016 }
10017
10018 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10019                                       struct btrfs_root *csum_root,
10020                                       struct btrfs_root *cur_root)
10021 {
10022         struct btrfs_path *path;
10023         struct btrfs_key key;
10024         struct extent_buffer *node;
10025         struct btrfs_file_extent_item *fi;
10026         char *buf = NULL;
10027         u64 start = 0;
10028         u64 len = 0;
10029         int slot = 0;
10030         int ret = 0;
10031
10032         path = btrfs_alloc_path();
10033         if (!path)
10034                 return -ENOMEM;
10035         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10036         if (!buf) {
10037                 ret = -ENOMEM;
10038                 goto out;
10039         }
10040
10041         key.objectid = 0;
10042         key.offset = 0;
10043         key.type = 0;
10044
10045         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10046         if (ret < 0)
10047                 goto out;
10048         /* Iterate all regular file extents and fill its csum */
10049         while (1) {
10050                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10051
10052                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10053                         goto next;
10054                 node = path->nodes[0];
10055                 slot = path->slots[0];
10056                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10057                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10058                         goto next;
10059                 start = btrfs_file_extent_disk_bytenr(node, fi);
10060                 len = btrfs_file_extent_disk_num_bytes(node, fi);
10061
10062                 ret = populate_csum(trans, csum_root, buf, start, len);
10063                 if (ret == -EEXIST)
10064                         ret = 0;
10065                 if (ret < 0)
10066                         goto out;
10067 next:
10068                 /*
10069                  * TODO: if next leaf is corrupted, jump to nearest next valid
10070                  * leaf.
10071                  */
10072                 ret = btrfs_next_item(cur_root, path);
10073                 if (ret < 0)
10074                         goto out;
10075                 if (ret > 0) {
10076                         ret = 0;
10077                         goto out;
10078                 }
10079         }
10080
10081 out:
10082         btrfs_free_path(path);
10083         free(buf);
10084         return ret;
10085 }
10086
10087 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10088                                   struct btrfs_root *csum_root)
10089 {
10090         struct btrfs_fs_info *fs_info = csum_root->fs_info;
10091         struct btrfs_path *path;
10092         struct btrfs_root *tree_root = fs_info->tree_root;
10093         struct btrfs_root *cur_root;
10094         struct extent_buffer *node;
10095         struct btrfs_key key;
10096         int slot = 0;
10097         int ret = 0;
10098
10099         path = btrfs_alloc_path();
10100         if (!path)
10101                 return -ENOMEM;
10102
10103         key.objectid = BTRFS_FS_TREE_OBJECTID;
10104         key.offset = 0;
10105         key.type = BTRFS_ROOT_ITEM_KEY;
10106
10107         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10108         if (ret < 0)
10109                 goto out;
10110         if (ret > 0) {
10111                 ret = -ENOENT;
10112                 goto out;
10113         }
10114
10115         while (1) {
10116                 node = path->nodes[0];
10117                 slot = path->slots[0];
10118                 btrfs_item_key_to_cpu(node, &key, slot);
10119                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10120                         goto out;
10121                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10122                         goto next;
10123                 if (!is_fstree(key.objectid))
10124                         goto next;
10125                 key.offset = (u64)-1;
10126
10127                 cur_root = btrfs_read_fs_root(fs_info, &key);
10128                 if (IS_ERR(cur_root) || !cur_root) {
10129                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10130                                 key.objectid);
10131                         goto out;
10132                 }
10133                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10134                                 cur_root);
10135                 if (ret < 0)
10136                         goto out;
10137 next:
10138                 ret = btrfs_next_item(tree_root, path);
10139                 if (ret > 0) {
10140                         ret = 0;
10141                         goto out;
10142                 }
10143                 if (ret < 0)
10144                         goto out;
10145         }
10146
10147 out:
10148         btrfs_free_path(path);
10149         return ret;
10150 }
10151
10152 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10153                                       struct btrfs_root *csum_root)
10154 {
10155         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10156         struct btrfs_path *path;
10157         struct btrfs_extent_item *ei;
10158         struct extent_buffer *leaf;
10159         char *buf;
10160         struct btrfs_key key;
10161         int ret;
10162
10163         path = btrfs_alloc_path();
10164         if (!path)
10165                 return -ENOMEM;
10166
10167         key.objectid = 0;
10168         key.type = BTRFS_EXTENT_ITEM_KEY;
10169         key.offset = 0;
10170
10171         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10172         if (ret < 0) {
10173                 btrfs_free_path(path);
10174                 return ret;
10175         }
10176
10177         buf = malloc(csum_root->sectorsize);
10178         if (!buf) {
10179                 btrfs_free_path(path);
10180                 return -ENOMEM;
10181         }
10182
10183         while (1) {
10184                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10185                         ret = btrfs_next_leaf(extent_root, path);
10186                         if (ret < 0)
10187                                 break;
10188                         if (ret) {
10189                                 ret = 0;
10190                                 break;
10191                         }
10192                 }
10193                 leaf = path->nodes[0];
10194
10195                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10196                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10197                         path->slots[0]++;
10198                         continue;
10199                 }
10200
10201                 ei = btrfs_item_ptr(leaf, path->slots[0],
10202                                     struct btrfs_extent_item);
10203                 if (!(btrfs_extent_flags(leaf, ei) &
10204                       BTRFS_EXTENT_FLAG_DATA)) {
10205                         path->slots[0]++;
10206                         continue;
10207                 }
10208
10209                 ret = populate_csum(trans, csum_root, buf, key.objectid,
10210                                     key.offset);
10211                 if (ret)
10212                         break;
10213                 path->slots[0]++;
10214         }
10215
10216         btrfs_free_path(path);
10217         free(buf);
10218         return ret;
10219 }
10220
10221 /*
10222  * Recalculate the csum and put it into the csum tree.
10223  *
10224  * Extent tree init will wipe out all the extent info, so in that case, we
10225  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
10226  * will use fs/subvol trees to init the csum tree.
10227  */
10228 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10229                           struct btrfs_root *csum_root,
10230                           int search_fs_tree)
10231 {
10232         if (search_fs_tree)
10233                 return fill_csum_tree_from_fs(trans, csum_root);
10234         else
10235                 return fill_csum_tree_from_extent(trans, csum_root);
10236 }
10237
10238 static void free_roots_info_cache(void)
10239 {
10240         if (!roots_info_cache)
10241                 return;
10242
10243         while (!cache_tree_empty(roots_info_cache)) {
10244                 struct cache_extent *entry;
10245                 struct root_item_info *rii;
10246
10247                 entry = first_cache_extent(roots_info_cache);
10248                 if (!entry)
10249                         break;
10250                 remove_cache_extent(roots_info_cache, entry);
10251                 rii = container_of(entry, struct root_item_info, cache_extent);
10252                 free(rii);
10253         }
10254
10255         free(roots_info_cache);
10256         roots_info_cache = NULL;
10257 }
10258
10259 static int build_roots_info_cache(struct btrfs_fs_info *info)
10260 {
10261         int ret = 0;
10262         struct btrfs_key key;
10263         struct extent_buffer *leaf;
10264         struct btrfs_path *path;
10265
10266         if (!roots_info_cache) {
10267                 roots_info_cache = malloc(sizeof(*roots_info_cache));
10268                 if (!roots_info_cache)
10269                         return -ENOMEM;
10270                 cache_tree_init(roots_info_cache);
10271         }
10272
10273         path = btrfs_alloc_path();
10274         if (!path)
10275                 return -ENOMEM;
10276
10277         key.objectid = 0;
10278         key.type = BTRFS_EXTENT_ITEM_KEY;
10279         key.offset = 0;
10280
10281         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10282         if (ret < 0)
10283                 goto out;
10284         leaf = path->nodes[0];
10285
10286         while (1) {
10287                 struct btrfs_key found_key;
10288                 struct btrfs_extent_item *ei;
10289                 struct btrfs_extent_inline_ref *iref;
10290                 int slot = path->slots[0];
10291                 int type;
10292                 u64 flags;
10293                 u64 root_id;
10294                 u8 level;
10295                 struct cache_extent *entry;
10296                 struct root_item_info *rii;
10297
10298                 if (slot >= btrfs_header_nritems(leaf)) {
10299                         ret = btrfs_next_leaf(info->extent_root, path);
10300                         if (ret < 0) {
10301                                 break;
10302                         } else if (ret) {
10303                                 ret = 0;
10304                                 break;
10305                         }
10306                         leaf = path->nodes[0];
10307                         slot = path->slots[0];
10308                 }
10309
10310                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10311
10312                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10313                     found_key.type != BTRFS_METADATA_ITEM_KEY)
10314                         goto next;
10315
10316                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10317                 flags = btrfs_extent_flags(leaf, ei);
10318
10319                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10320                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10321                         goto next;
10322
10323                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10324                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10325                         level = found_key.offset;
10326                 } else {
10327                         struct btrfs_tree_block_info *binfo;
10328
10329                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
10330                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10331                         level = btrfs_tree_block_level(leaf, binfo);
10332                 }
10333
10334                 /*
10335                  * For a root extent, it must be of the following type and the
10336                  * first (and only one) iref in the item.
10337                  */
10338                 type = btrfs_extent_inline_ref_type(leaf, iref);
10339                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10340                         goto next;
10341
10342                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10343                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10344                 if (!entry) {
10345                         rii = malloc(sizeof(struct root_item_info));
10346                         if (!rii) {
10347                                 ret = -ENOMEM;
10348                                 goto out;
10349                         }
10350                         rii->cache_extent.start = root_id;
10351                         rii->cache_extent.size = 1;
10352                         rii->level = (u8)-1;
10353                         entry = &rii->cache_extent;
10354                         ret = insert_cache_extent(roots_info_cache, entry);
10355                         ASSERT(ret == 0);
10356                 } else {
10357                         rii = container_of(entry, struct root_item_info,
10358                                            cache_extent);
10359                 }
10360
10361                 ASSERT(rii->cache_extent.start == root_id);
10362                 ASSERT(rii->cache_extent.size == 1);
10363
10364                 if (level > rii->level || rii->level == (u8)-1) {
10365                         rii->level = level;
10366                         rii->bytenr = found_key.objectid;
10367                         rii->gen = btrfs_extent_generation(leaf, ei);
10368                         rii->node_count = 1;
10369                 } else if (level == rii->level) {
10370                         rii->node_count++;
10371                 }
10372 next:
10373                 path->slots[0]++;
10374         }
10375
10376 out:
10377         btrfs_free_path(path);
10378
10379         return ret;
10380 }
10381
10382 static int maybe_repair_root_item(struct btrfs_fs_info *info,
10383                                   struct btrfs_path *path,
10384                                   const struct btrfs_key *root_key,
10385                                   const int read_only_mode)
10386 {
10387         const u64 root_id = root_key->objectid;
10388         struct cache_extent *entry;
10389         struct root_item_info *rii;
10390         struct btrfs_root_item ri;
10391         unsigned long offset;
10392
10393         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10394         if (!entry) {
10395                 fprintf(stderr,
10396                         "Error: could not find extent items for root %llu\n",
10397                         root_key->objectid);
10398                 return -ENOENT;
10399         }
10400
10401         rii = container_of(entry, struct root_item_info, cache_extent);
10402         ASSERT(rii->cache_extent.start == root_id);
10403         ASSERT(rii->cache_extent.size == 1);
10404
10405         if (rii->node_count != 1) {
10406                 fprintf(stderr,
10407                         "Error: could not find btree root extent for root %llu\n",
10408                         root_id);
10409                 return -ENOENT;
10410         }
10411
10412         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
10413         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
10414
10415         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
10416             btrfs_root_level(&ri) != rii->level ||
10417             btrfs_root_generation(&ri) != rii->gen) {
10418
10419                 /*
10420                  * If we're in repair mode but our caller told us to not update
10421                  * the root item, i.e. just check if it needs to be updated, don't
10422                  * print this message, since the caller will call us again shortly
10423                  * for the same root item without read only mode (the caller will
10424                  * open a transaction first).
10425                  */
10426                 if (!(read_only_mode && repair))
10427                         fprintf(stderr,
10428                                 "%sroot item for root %llu,"
10429                                 " current bytenr %llu, current gen %llu, current level %u,"
10430                                 " new bytenr %llu, new gen %llu, new level %u\n",
10431                                 (read_only_mode ? "" : "fixing "),
10432                                 root_id,
10433                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
10434                                 btrfs_root_level(&ri),
10435                                 rii->bytenr, rii->gen, rii->level);
10436
10437                 if (btrfs_root_generation(&ri) > rii->gen) {
10438                         fprintf(stderr,
10439                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
10440                                 root_id, btrfs_root_generation(&ri), rii->gen);
10441                         return -EINVAL;
10442                 }
10443
10444                 if (!read_only_mode) {
10445                         btrfs_set_root_bytenr(&ri, rii->bytenr);
10446                         btrfs_set_root_level(&ri, rii->level);
10447                         btrfs_set_root_generation(&ri, rii->gen);
10448                         write_extent_buffer(path->nodes[0], &ri,
10449                                             offset, sizeof(ri));
10450                 }
10451
10452                 return 1;
10453         }
10454
10455         return 0;
10456 }
10457
10458 /*
10459  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
10460  * caused read-only snapshots to be corrupted if they were created at a moment
10461  * when the source subvolume/snapshot had orphan items. The issue was that the
10462  * on-disk root items became incorrect, referring to the pre orphan cleanup root
10463  * node instead of the post orphan cleanup root node.
10464  * So this function, and its callees, just detects and fixes those cases. Even
10465  * though the regression was for read-only snapshots, this function applies to
10466  * any snapshot/subvolume root.
10467  * This must be run before any other repair code - not doing it so, makes other
10468  * repair code delete or modify backrefs in the extent tree for example, which
10469  * will result in an inconsistent fs after repairing the root items.
10470  */
10471 static int repair_root_items(struct btrfs_fs_info *info)
10472 {
10473         struct btrfs_path *path = NULL;
10474         struct btrfs_key key;
10475         struct extent_buffer *leaf;
10476         struct btrfs_trans_handle *trans = NULL;
10477         int ret = 0;
10478         int bad_roots = 0;
10479         int need_trans = 0;
10480
10481         ret = build_roots_info_cache(info);
10482         if (ret)
10483                 goto out;
10484
10485         path = btrfs_alloc_path();
10486         if (!path) {
10487                 ret = -ENOMEM;
10488                 goto out;
10489         }
10490
10491         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
10492         key.type = BTRFS_ROOT_ITEM_KEY;
10493         key.offset = 0;
10494
10495 again:
10496         /*
10497          * Avoid opening and committing transactions if a leaf doesn't have
10498          * any root items that need to be fixed, so that we avoid rotating
10499          * backup roots unnecessarily.
10500          */
10501         if (need_trans) {
10502                 trans = btrfs_start_transaction(info->tree_root, 1);
10503                 if (IS_ERR(trans)) {
10504                         ret = PTR_ERR(trans);
10505                         goto out;
10506                 }
10507         }
10508
10509         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
10510                                 0, trans ? 1 : 0);
10511         if (ret < 0)
10512                 goto out;
10513         leaf = path->nodes[0];
10514
10515         while (1) {
10516                 struct btrfs_key found_key;
10517
10518                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
10519                         int no_more_keys = find_next_key(path, &key);
10520
10521                         btrfs_release_path(path);
10522                         if (trans) {
10523                                 ret = btrfs_commit_transaction(trans,
10524                                                                info->tree_root);
10525                                 trans = NULL;
10526                                 if (ret < 0)
10527                                         goto out;
10528                         }
10529                         need_trans = 0;
10530                         if (no_more_keys)
10531                                 break;
10532                         goto again;
10533                 }
10534
10535                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10536
10537                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
10538                         goto next;
10539                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
10540                         goto next;
10541
10542                 ret = maybe_repair_root_item(info, path, &found_key,
10543                                              trans ? 0 : 1);
10544                 if (ret < 0)
10545                         goto out;
10546                 if (ret) {
10547                         if (!trans && repair) {
10548                                 need_trans = 1;
10549                                 key = found_key;
10550                                 btrfs_release_path(path);
10551                                 goto again;
10552                         }
10553                         bad_roots++;
10554                 }
10555 next:
10556                 path->slots[0]++;
10557         }
10558         ret = 0;
10559 out:
10560         free_roots_info_cache();
10561         btrfs_free_path(path);
10562         if (trans)
10563                 btrfs_commit_transaction(trans, info->tree_root);
10564         if (ret < 0)
10565                 return ret;
10566
10567         return bad_roots;
10568 }
10569
10570 const char * const cmd_check_usage[] = {
10571         "btrfs check [options] <device>",
10572         "Check structural integrity of a filesystem (unmounted).",
10573         "Check structural integrity of an unmounted filesystem. Verify internal",
10574         "trees' consistency and item connectivity. In the repair mode try to",
10575         "fix the problems found.",
10576         "WARNING: the repair mode is considered dangerous",
10577         "",
10578         "-s|--super <superblock>     use this superblock copy",
10579         "-b|--backup                 use the first valid backup root copy",
10580         "--repair                    try to repair the filesystem",
10581         "--readonly                  run in read-only mode (default)",
10582         "--init-csum-tree            create a new CRC tree",
10583         "--init-extent-tree          create a new extent tree",
10584         "--check-data-csum           verify checksums of data blocks",
10585         "-Q|--qgroup-report           print a report on qgroup consistency",
10586         "-E|--subvol-extents <subvolid>",
10587         "                            print subvolume extents and sharing state",
10588         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
10589         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
10590         "-p|--progress               indicate progress",
10591         NULL
10592 };
10593
10594 int cmd_check(int argc, char **argv)
10595 {
10596         struct cache_tree root_cache;
10597         struct btrfs_root *root;
10598         struct btrfs_fs_info *info;
10599         u64 bytenr = 0;
10600         u64 subvolid = 0;
10601         u64 tree_root_bytenr = 0;
10602         u64 chunk_root_bytenr = 0;
10603         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
10604         int ret;
10605         u64 num;
10606         int init_csum_tree = 0;
10607         int readonly = 0;
10608         int qgroup_report = 0;
10609         int qgroups_repaired = 0;
10610         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
10611
10612         while(1) {
10613                 int c;
10614                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
10615                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
10616                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE };
10617                 static const struct option long_options[] = {
10618                         { "super", required_argument, NULL, 's' },
10619                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
10620                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
10621                         { "init-csum-tree", no_argument, NULL,
10622                                 GETOPT_VAL_INIT_CSUM },
10623                         { "init-extent-tree", no_argument, NULL,
10624                                 GETOPT_VAL_INIT_EXTENT },
10625                         { "check-data-csum", no_argument, NULL,
10626                                 GETOPT_VAL_CHECK_CSUM },
10627                         { "backup", no_argument, NULL, 'b' },
10628                         { "subvol-extents", required_argument, NULL, 'E' },
10629                         { "qgroup-report", no_argument, NULL, 'Q' },
10630                         { "tree-root", required_argument, NULL, 'r' },
10631                         { "chunk-root", required_argument, NULL,
10632                                 GETOPT_VAL_CHUNK_TREE },
10633                         { "progress", no_argument, NULL, 'p' },
10634                         { NULL, 0, NULL, 0}
10635                 };
10636
10637                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
10638                 if (c < 0)
10639                         break;
10640                 switch(c) {
10641                         case 'a': /* ignored */ break;
10642                         case 'b':
10643                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
10644                                 break;
10645                         case 's':
10646                                 num = arg_strtou64(optarg);
10647                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
10648                                         fprintf(stderr,
10649                                                 "ERROR: super mirror should be less than: %d\n",
10650                                                 BTRFS_SUPER_MIRROR_MAX);
10651                                         exit(1);
10652                                 }
10653                                 bytenr = btrfs_sb_offset(((int)num));
10654                                 printf("using SB copy %llu, bytenr %llu\n", num,
10655                                        (unsigned long long)bytenr);
10656                                 break;
10657                         case 'Q':
10658                                 qgroup_report = 1;
10659                                 break;
10660                         case 'E':
10661                                 subvolid = arg_strtou64(optarg);
10662                                 break;
10663                         case 'r':
10664                                 tree_root_bytenr = arg_strtou64(optarg);
10665                                 break;
10666                         case GETOPT_VAL_CHUNK_TREE:
10667                                 chunk_root_bytenr = arg_strtou64(optarg);
10668                                 break;
10669                         case 'p':
10670                                 ctx.progress_enabled = true;
10671                                 break;
10672                         case '?':
10673                         case 'h':
10674                                 usage(cmd_check_usage);
10675                         case GETOPT_VAL_REPAIR:
10676                                 printf("enabling repair mode\n");
10677                                 repair = 1;
10678                                 ctree_flags |= OPEN_CTREE_WRITES;
10679                                 break;
10680                         case GETOPT_VAL_READONLY:
10681                                 readonly = 1;
10682                                 break;
10683                         case GETOPT_VAL_INIT_CSUM:
10684                                 printf("Creating a new CRC tree\n");
10685                                 init_csum_tree = 1;
10686                                 repair = 1;
10687                                 ctree_flags |= OPEN_CTREE_WRITES;
10688                                 break;
10689                         case GETOPT_VAL_INIT_EXTENT:
10690                                 init_extent_tree = 1;
10691                                 ctree_flags |= (OPEN_CTREE_WRITES |
10692                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
10693                                 repair = 1;
10694                                 break;
10695                         case GETOPT_VAL_CHECK_CSUM:
10696                                 check_data_csum = 1;
10697                                 break;
10698                 }
10699         }
10700
10701         if (check_argc_exact(argc - optind, 1))
10702                 usage(cmd_check_usage);
10703
10704         if (ctx.progress_enabled) {
10705                 ctx.tp = TASK_NOTHING;
10706                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
10707         }
10708
10709         /* This check is the only reason for --readonly to exist */
10710         if (readonly && repair) {
10711                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
10712                 exit(1);
10713         }
10714
10715         radix_tree_init();
10716         cache_tree_init(&root_cache);
10717
10718         if((ret = check_mounted(argv[optind])) < 0) {
10719                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
10720                 goto err_out;
10721         } else if(ret) {
10722                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
10723                 ret = -EBUSY;
10724                 goto err_out;
10725         }
10726
10727         /* only allow partial opening under repair mode */
10728         if (repair)
10729                 ctree_flags |= OPEN_CTREE_PARTIAL;
10730
10731         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
10732                                   chunk_root_bytenr, ctree_flags);
10733         if (!info) {
10734                 fprintf(stderr, "Couldn't open file system\n");
10735                 ret = -EIO;
10736                 goto err_out;
10737         }
10738
10739         global_info = info;
10740         root = info->fs_root;
10741
10742         /*
10743          * repair mode will force us to commit transaction which
10744          * will make us fail to load log tree when mounting.
10745          */
10746         if (repair && btrfs_super_log_root(info->super_copy)) {
10747                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
10748                 if (!ret) {
10749                         ret = 1;
10750                         goto close_out;
10751                 }
10752                 ret = zero_log_tree(root);
10753                 if (ret) {
10754                         fprintf(stderr, "fail to zero log tree\n");
10755                         goto close_out;
10756                 }
10757         }
10758
10759         uuid_unparse(info->super_copy->fsid, uuidbuf);
10760         if (qgroup_report) {
10761                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
10762                        uuidbuf);
10763                 ret = qgroup_verify_all(info);
10764                 if (ret == 0)
10765                         report_qgroups(1);
10766                 goto close_out;
10767         }
10768         if (subvolid) {
10769                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
10770                        subvolid, argv[optind], uuidbuf);
10771                 ret = print_extent_state(info, subvolid);
10772                 goto close_out;
10773         }
10774         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
10775
10776         if (!extent_buffer_uptodate(info->tree_root->node) ||
10777             !extent_buffer_uptodate(info->dev_root->node) ||
10778             !extent_buffer_uptodate(info->chunk_root->node)) {
10779                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
10780                 ret = -EIO;
10781                 goto close_out;
10782         }
10783
10784         if (init_extent_tree || init_csum_tree) {
10785                 struct btrfs_trans_handle *trans;
10786
10787                 trans = btrfs_start_transaction(info->extent_root, 0);
10788                 if (IS_ERR(trans)) {
10789                         fprintf(stderr, "Error starting transaction\n");
10790                         ret = PTR_ERR(trans);
10791                         goto close_out;
10792                 }
10793
10794                 if (init_extent_tree) {
10795                         printf("Creating a new extent tree\n");
10796                         ret = reinit_extent_tree(trans, info);
10797                         if (ret)
10798                                 goto close_out;
10799                 }
10800
10801                 if (init_csum_tree) {
10802                         fprintf(stderr, "Reinit crc root\n");
10803                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
10804                         if (ret) {
10805                                 fprintf(stderr, "crc root initialization failed\n");
10806                                 ret = -EIO;
10807                                 goto close_out;
10808                         }
10809
10810                         ret = fill_csum_tree(trans, info->csum_root,
10811                                              init_extent_tree);
10812                         if (ret) {
10813                                 fprintf(stderr, "crc refilling failed\n");
10814                                 return -EIO;
10815                         }
10816                 }
10817                 /*
10818                  * Ok now we commit and run the normal fsck, which will add
10819                  * extent entries for all of the items it finds.
10820                  */
10821                 ret = btrfs_commit_transaction(trans, info->extent_root);
10822                 if (ret)
10823                         goto close_out;
10824         }
10825         if (!extent_buffer_uptodate(info->extent_root->node)) {
10826                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
10827                 ret = -EIO;
10828                 goto close_out;
10829         }
10830         if (!extent_buffer_uptodate(info->csum_root->node)) {
10831                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
10832                 ret = -EIO;
10833                 goto close_out;
10834         }
10835
10836         if (!ctx.progress_enabled)
10837                 fprintf(stderr, "checking extents\n");
10838         ret = check_chunks_and_extents(root);
10839         if (ret)
10840                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
10841
10842         ret = repair_root_items(info);
10843         if (ret < 0)
10844                 goto close_out;
10845         if (repair) {
10846                 fprintf(stderr, "Fixed %d roots.\n", ret);
10847                 ret = 0;
10848         } else if (ret > 0) {
10849                 fprintf(stderr,
10850                        "Found %d roots with an outdated root item.\n",
10851                        ret);
10852                 fprintf(stderr,
10853                         "Please run a filesystem check with the option --repair to fix them.\n");
10854                 ret = 1;
10855                 goto close_out;
10856         }
10857
10858         if (!ctx.progress_enabled) {
10859                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
10860                         fprintf(stderr, "checking free space tree\n");
10861                 else
10862                         fprintf(stderr, "checking free space cache\n");
10863         }
10864         ret = check_space_cache(root);
10865         if (ret)
10866                 goto out;
10867
10868         /*
10869          * We used to have to have these hole extents in between our real
10870          * extents so if we don't have this flag set we need to make sure there
10871          * are no gaps in the file extents for inodes, otherwise we can just
10872          * ignore it when this happens.
10873          */
10874         no_holes = btrfs_fs_incompat(root->fs_info,
10875                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
10876         if (!ctx.progress_enabled)
10877                 fprintf(stderr, "checking fs roots\n");
10878         ret = check_fs_roots(root, &root_cache);
10879         if (ret)
10880                 goto out;
10881
10882         fprintf(stderr, "checking csums\n");
10883         ret = check_csums(root);
10884         if (ret)
10885                 goto out;
10886
10887         fprintf(stderr, "checking root refs\n");
10888         ret = check_root_refs(root, &root_cache);
10889         if (ret)
10890                 goto out;
10891
10892         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
10893                 struct extent_buffer *eb;
10894
10895                 eb = list_first_entry(&root->fs_info->recow_ebs,
10896                                       struct extent_buffer, recow);
10897                 list_del_init(&eb->recow);
10898                 ret = recow_extent_buffer(root, eb);
10899                 if (ret)
10900                         break;
10901         }
10902
10903         while (!list_empty(&delete_items)) {
10904                 struct bad_item *bad;
10905
10906                 bad = list_first_entry(&delete_items, struct bad_item, list);
10907                 list_del_init(&bad->list);
10908                 if (repair)
10909                         ret = delete_bad_item(root, bad);
10910                 free(bad);
10911         }
10912
10913         if (info->quota_enabled) {
10914                 int err;
10915                 fprintf(stderr, "checking quota groups\n");
10916                 err = qgroup_verify_all(info);
10917                 if (err)
10918                         goto out;
10919                 report_qgroups(0);
10920                 err = repair_qgroups(info, &qgroups_repaired);
10921                 if (err)
10922                         goto out;
10923         }
10924
10925         if (!list_empty(&root->fs_info->recow_ebs)) {
10926                 fprintf(stderr, "Transid errors in file system\n");
10927                 ret = 1;
10928         }
10929 out:
10930         /* Don't override original ret */
10931         if (!ret && qgroups_repaired)
10932                 ret = qgroups_repaired;
10933
10934         if (found_old_backref) { /*
10935                  * there was a disk format change when mixed
10936                  * backref was in testing tree. The old format
10937                  * existed about one week.
10938                  */
10939                 printf("\n * Found old mixed backref format. "
10940                        "The old format is not supported! *"
10941                        "\n * Please mount the FS in readonly mode, "
10942                        "backup data and re-format the FS. *\n\n");
10943                 ret = 1;
10944         }
10945         printf("found %llu bytes used err is %d\n",
10946                (unsigned long long)bytes_used, ret);
10947         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
10948         printf("total tree bytes: %llu\n",
10949                (unsigned long long)total_btree_bytes);
10950         printf("total fs tree bytes: %llu\n",
10951                (unsigned long long)total_fs_tree_bytes);
10952         printf("total extent tree bytes: %llu\n",
10953                (unsigned long long)total_extent_tree_bytes);
10954         printf("btree space waste bytes: %llu\n",
10955                (unsigned long long)btree_space_waste);
10956         printf("file data blocks allocated: %llu\n referenced %llu\n",
10957                 (unsigned long long)data_bytes_allocated,
10958                 (unsigned long long)data_bytes_referenced);
10959
10960         free_qgroup_counts();
10961         free_root_recs_tree(&root_cache);
10962 close_out:
10963         close_ctree(root);
10964 err_out:
10965         if (ctx.progress_enabled)
10966                 task_deinit(ctx.info);
10967
10968         return ret;
10969 }