btrfs-progs: check: remove unused argument from process_dir_item
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         int level_size;
230         struct btrfs_key drop_key;
231 };
232
233 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
235 #define REF_ERR_NO_INODE_REF            (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
238 #define REF_ERR_DUP_INODE_REF           (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF             (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
246
247 struct file_extent_hole {
248         struct rb_node node;
249         u64 start;
250         u64 len;
251 };
252
253 struct inode_record {
254         struct list_head backrefs;
255         unsigned int checked:1;
256         unsigned int merging:1;
257         unsigned int found_inode_item:1;
258         unsigned int found_dir_item:1;
259         unsigned int found_file_extent:1;
260         unsigned int found_csum_item:1;
261         unsigned int some_csum_missing:1;
262         unsigned int nodatasum:1;
263         int errors;
264
265         u64 ino;
266         u32 nlink;
267         u32 imode;
268         u64 isize;
269         u64 nbytes;
270
271         u32 found_link;
272         u64 found_size;
273         u64 extent_start;
274         u64 extent_end;
275         struct rb_root holes;
276         struct list_head orphan_extents;
277
278         u32 refs;
279 };
280
281 #define I_ERR_NO_INODE_ITEM             (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
296
297 struct root_backref {
298         struct list_head list;
299         unsigned int found_dir_item:1;
300         unsigned int found_dir_index:1;
301         unsigned int found_back_ref:1;
302         unsigned int found_forward_ref:1;
303         unsigned int reachable:1;
304         int errors;
305         u64 ref_root;
306         u64 dir;
307         u64 index;
308         u16 namelen;
309         char name[0];
310 };
311
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 {
314         return list_entry(entry, struct root_backref, list);
315 }
316
317 struct root_record {
318         struct list_head backrefs;
319         struct cache_extent cache;
320         unsigned int found_root_item:1;
321         u64 objectid;
322         u32 found_ref;
323 };
324
325 struct ptr_node {
326         struct cache_extent cache;
327         void *data;
328 };
329
330 struct shared_node {
331         struct cache_extent cache;
332         struct cache_tree root_cache;
333         struct cache_tree inode_cache;
334         struct inode_record *current;
335         u32 refs;
336 };
337
338 struct block_info {
339         u64 start;
340         u32 size;
341 };
342
343 struct walk_control {
344         struct cache_tree shared;
345         struct shared_node *nodes[BTRFS_MAX_LEVEL];
346         int active_node;
347         int root_level;
348 };
349
350 struct bad_item {
351         struct btrfs_key key;
352         u64 root_id;
353         struct list_head list;
354 };
355
356 struct extent_entry {
357         u64 bytenr;
358         u64 bytes;
359         int count;
360         int broken;
361         struct list_head list;
362 };
363
364 struct root_item_info {
365         /* level of the root */
366         u8 level;
367         /* number of nodes at this level, must be 1 for a root */
368         int node_count;
369         u64 bytenr;
370         u64 gen;
371         struct cache_extent cache_extent;
372 };
373
374 /*
375  * Error bit for low memory mode check.
376  *
377  * Currently no caller cares about it yet.  Just internal use for error
378  * classification.
379  */
380 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH     (1 << 8)
390
391 static void *print_status_check(void *p)
392 {
393         struct task_ctx *priv = p;
394         const char work_indicator[] = { '.', 'o', 'O', 'o' };
395         uint32_t count = 0;
396         static char *task_position_string[] = {
397                 "checking extents",
398                 "checking free space cache",
399                 "checking fs roots",
400         };
401
402         task_period_start(priv->info, 1000 /* 1s */);
403
404         if (priv->tp == TASK_NOTHING)
405                 return NULL;
406
407         while (1) {
408                 printf("%s [%c]\r", task_position_string[priv->tp],
409                                 work_indicator[count % 4]);
410                 count++;
411                 fflush(stdout);
412                 task_period_wait(priv->info);
413         }
414         return NULL;
415 }
416
417 static int print_status_return(void *p)
418 {
419         printf("\n");
420         fflush(stdout);
421
422         return 0;
423 }
424
425 static enum btrfs_check_mode parse_check_mode(const char *str)
426 {
427         if (strcmp(str, "lowmem") == 0)
428                 return CHECK_MODE_LOWMEM;
429         if (strcmp(str, "orig") == 0)
430                 return CHECK_MODE_ORIGINAL;
431         if (strcmp(str, "original") == 0)
432                 return CHECK_MODE_ORIGINAL;
433
434         return CHECK_MODE_UNKNOWN;
435 }
436
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
439 {
440         struct file_extent_hole *hole;
441
442         if (RB_EMPTY_ROOT(holes))
443                 return (u64)-1;
444
445         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446         return hole->start;
447 }
448
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 {
451         struct file_extent_hole *hole1;
452         struct file_extent_hole *hole2;
453
454         hole1 = rb_entry(node1, struct file_extent_hole, node);
455         hole2 = rb_entry(node2, struct file_extent_hole, node);
456
457         if (hole1->start > hole2->start)
458                 return -1;
459         if (hole1->start < hole2->start)
460                 return 1;
461         /* Now hole1->start == hole2->start */
462         if (hole1->len >= hole2->len)
463                 /*
464                  * Hole 1 will be merge center
465                  * Same hole will be merged later
466                  */
467                 return -1;
468         /* Hole 2 will be merge center */
469         return 1;
470 }
471
472 /*
473  * Add a hole to the record
474  *
475  * This will do hole merge for copy_file_extent_holes(),
476  * which will ensure there won't be continuous holes.
477  */
478 static int add_file_extent_hole(struct rb_root *holes,
479                                 u64 start, u64 len)
480 {
481         struct file_extent_hole *hole;
482         struct file_extent_hole *prev = NULL;
483         struct file_extent_hole *next = NULL;
484
485         hole = malloc(sizeof(*hole));
486         if (!hole)
487                 return -ENOMEM;
488         hole->start = start;
489         hole->len = len;
490         /* Since compare will not return 0, no -EEXIST will happen */
491         rb_insert(holes, &hole->node, compare_hole);
492
493         /* simple merge with previous hole */
494         if (rb_prev(&hole->node))
495                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496                                 node);
497         if (prev && prev->start + prev->len >= hole->start) {
498                 hole->len = hole->start + hole->len - prev->start;
499                 hole->start = prev->start;
500                 rb_erase(&prev->node, holes);
501                 free(prev);
502                 prev = NULL;
503         }
504
505         /* iterate merge with next holes */
506         while (1) {
507                 if (!rb_next(&hole->node))
508                         break;
509                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510                                         node);
511                 if (hole->start + hole->len >= next->start) {
512                         if (hole->start + hole->len <= next->start + next->len)
513                                 hole->len = next->start + next->len -
514                                             hole->start;
515                         rb_erase(&next->node, holes);
516                         free(next);
517                         next = NULL;
518                 } else
519                         break;
520         }
521         return 0;
522 }
523
524 static int compare_hole_range(struct rb_node *node, void *data)
525 {
526         struct file_extent_hole *hole;
527         u64 start;
528
529         hole = (struct file_extent_hole *)data;
530         start = hole->start;
531
532         hole = rb_entry(node, struct file_extent_hole, node);
533         if (start < hole->start)
534                 return -1;
535         if (start >= hole->start && start < hole->start + hole->len)
536                 return 0;
537         return 1;
538 }
539
540 /*
541  * Delete a hole in the record
542  *
543  * This will do the hole split and is much restrict than add.
544  */
545 static int del_file_extent_hole(struct rb_root *holes,
546                                 u64 start, u64 len)
547 {
548         struct file_extent_hole *hole;
549         struct file_extent_hole tmp;
550         u64 prev_start = 0;
551         u64 prev_len = 0;
552         u64 next_start = 0;
553         u64 next_len = 0;
554         struct rb_node *node;
555         int have_prev = 0;
556         int have_next = 0;
557         int ret = 0;
558
559         tmp.start = start;
560         tmp.len = len;
561         node = rb_search(holes, &tmp, compare_hole_range, NULL);
562         if (!node)
563                 return -EEXIST;
564         hole = rb_entry(node, struct file_extent_hole, node);
565         if (start + len > hole->start + hole->len)
566                 return -EEXIST;
567
568         /*
569          * Now there will be no overlap, delete the hole and re-add the
570          * split(s) if they exists.
571          */
572         if (start > hole->start) {
573                 prev_start = hole->start;
574                 prev_len = start - hole->start;
575                 have_prev = 1;
576         }
577         if (hole->start + hole->len > start + len) {
578                 next_start = start + len;
579                 next_len = hole->start + hole->len - start - len;
580                 have_next = 1;
581         }
582         rb_erase(node, holes);
583         free(hole);
584         if (have_prev) {
585                 ret = add_file_extent_hole(holes, prev_start, prev_len);
586                 if (ret < 0)
587                         return ret;
588         }
589         if (have_next) {
590                 ret = add_file_extent_hole(holes, next_start, next_len);
591                 if (ret < 0)
592                         return ret;
593         }
594         return 0;
595 }
596
597 static int copy_file_extent_holes(struct rb_root *dst,
598                                   struct rb_root *src)
599 {
600         struct file_extent_hole *hole;
601         struct rb_node *node;
602         int ret = 0;
603
604         node = rb_first(src);
605         while (node) {
606                 hole = rb_entry(node, struct file_extent_hole, node);
607                 ret = add_file_extent_hole(dst, hole->start, hole->len);
608                 if (ret)
609                         break;
610                 node = rb_next(node);
611         }
612         return ret;
613 }
614
615 static void free_file_extent_holes(struct rb_root *holes)
616 {
617         struct rb_node *node;
618         struct file_extent_hole *hole;
619
620         node = rb_first(holes);
621         while (node) {
622                 hole = rb_entry(node, struct file_extent_hole, node);
623                 rb_erase(node, holes);
624                 free(hole);
625                 node = rb_first(holes);
626         }
627 }
628
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632                                  struct btrfs_root *root)
633 {
634         if (root->last_trans != trans->transid) {
635                 root->track_dirty = 1;
636                 root->last_trans = trans->transid;
637                 root->commit_root = root->node;
638                 extent_buffer_get(root->node);
639         }
640 }
641
642 static u8 imode_to_type(u32 imode)
643 {
644 #define S_SHIFT 12
645         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
647                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
648                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
649                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
650                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
651                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
652                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
653         };
654
655         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
657 }
658
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 {
661         struct device_record *rec1;
662         struct device_record *rec2;
663
664         rec1 = rb_entry(node1, struct device_record, node);
665         rec2 = rb_entry(node2, struct device_record, node);
666         if (rec1->devid > rec2->devid)
667                 return -1;
668         else if (rec1->devid < rec2->devid)
669                 return 1;
670         else
671                 return 0;
672 }
673
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 {
676         struct inode_record *rec;
677         struct inode_backref *backref;
678         struct inode_backref *orig;
679         struct inode_backref *tmp;
680         struct orphan_data_extent *src_orphan;
681         struct orphan_data_extent *dst_orphan;
682         struct rb_node *rb;
683         size_t size;
684         int ret;
685
686         rec = malloc(sizeof(*rec));
687         if (!rec)
688                 return ERR_PTR(-ENOMEM);
689         memcpy(rec, orig_rec, sizeof(*rec));
690         rec->refs = 1;
691         INIT_LIST_HEAD(&rec->backrefs);
692         INIT_LIST_HEAD(&rec->orphan_extents);
693         rec->holes = RB_ROOT;
694
695         list_for_each_entry(orig, &orig_rec->backrefs, list) {
696                 size = sizeof(*orig) + orig->namelen + 1;
697                 backref = malloc(size);
698                 if (!backref) {
699                         ret = -ENOMEM;
700                         goto cleanup;
701                 }
702                 memcpy(backref, orig, size);
703                 list_add_tail(&backref->list, &rec->backrefs);
704         }
705         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706                 dst_orphan = malloc(sizeof(*dst_orphan));
707                 if (!dst_orphan) {
708                         ret = -ENOMEM;
709                         goto cleanup;
710                 }
711                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713         }
714         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715         if (ret < 0)
716                 goto cleanup_rb;
717
718         return rec;
719
720 cleanup_rb:
721         rb = rb_first(&rec->holes);
722         while (rb) {
723                 struct file_extent_hole *hole;
724
725                 hole = rb_entry(rb, struct file_extent_hole, node);
726                 rb = rb_next(rb);
727                 free(hole);
728         }
729
730 cleanup:
731         if (!list_empty(&rec->backrefs))
732                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733                         list_del(&orig->list);
734                         free(orig);
735                 }
736
737         if (!list_empty(&rec->orphan_extents))
738                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739                         list_del(&orig->list);
740                         free(orig);
741                 }
742
743         free(rec);
744
745         return ERR_PTR(ret);
746 }
747
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749                                       u64 objectid)
750 {
751         struct orphan_data_extent *orphan;
752
753         if (list_empty(orphan_extents))
754                 return;
755         printf("The following data extent is lost in tree %llu:\n",
756                objectid);
757         list_for_each_entry(orphan, orphan_extents, list) {
758                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
760                        orphan->disk_len);
761         }
762 }
763
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 {
766         u64 root_objectid = root->root_key.objectid;
767         int errors = rec->errors;
768
769         if (!errors)
770                 return;
771         /* reloc root errors, we print its corresponding fs root objectid*/
772         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773                 root_objectid = root->root_key.offset;
774                 fprintf(stderr, "reloc");
775         }
776         fprintf(stderr, "root %llu inode %llu errors %x",
777                 (unsigned long long) root_objectid,
778                 (unsigned long long) rec->ino, rec->errors);
779
780         if (errors & I_ERR_NO_INODE_ITEM)
781                 fprintf(stderr, ", no inode item");
782         if (errors & I_ERR_NO_ORPHAN_ITEM)
783                 fprintf(stderr, ", no orphan item");
784         if (errors & I_ERR_DUP_INODE_ITEM)
785                 fprintf(stderr, ", dup inode item");
786         if (errors & I_ERR_DUP_DIR_INDEX)
787                 fprintf(stderr, ", dup dir index");
788         if (errors & I_ERR_ODD_DIR_ITEM)
789                 fprintf(stderr, ", odd dir item");
790         if (errors & I_ERR_ODD_FILE_EXTENT)
791                 fprintf(stderr, ", odd file extent");
792         if (errors & I_ERR_BAD_FILE_EXTENT)
793                 fprintf(stderr, ", bad file extent");
794         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795                 fprintf(stderr, ", file extent overlap");
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797                 fprintf(stderr, ", file extent discount");
798         if (errors & I_ERR_DIR_ISIZE_WRONG)
799                 fprintf(stderr, ", dir isize wrong");
800         if (errors & I_ERR_FILE_NBYTES_WRONG)
801                 fprintf(stderr, ", nbytes wrong");
802         if (errors & I_ERR_ODD_CSUM_ITEM)
803                 fprintf(stderr, ", odd csum item");
804         if (errors & I_ERR_SOME_CSUM_MISSING)
805                 fprintf(stderr, ", some csum missing");
806         if (errors & I_ERR_LINK_COUNT_WRONG)
807                 fprintf(stderr, ", link count wrong");
808         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809                 fprintf(stderr, ", orphan file extent");
810         fprintf(stderr, "\n");
811         /* Print the orphan extents if needed */
812         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814
815         /* Print the holes if needed */
816         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817                 struct file_extent_hole *hole;
818                 struct rb_node *node;
819                 int found = 0;
820
821                 node = rb_first(&rec->holes);
822                 fprintf(stderr, "Found file extent holes:\n");
823                 while (node) {
824                         found = 1;
825                         hole = rb_entry(node, struct file_extent_hole, node);
826                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
827                                 hole->start, hole->len);
828                         node = rb_next(node);
829                 }
830                 if (!found)
831                         fprintf(stderr, "\tstart: 0, len: %llu\n",
832                                 round_up(rec->isize, root->sectorsize));
833         }
834 }
835
836 static void print_ref_error(int errors)
837 {
838         if (errors & REF_ERR_NO_DIR_ITEM)
839                 fprintf(stderr, ", no dir item");
840         if (errors & REF_ERR_NO_DIR_INDEX)
841                 fprintf(stderr, ", no dir index");
842         if (errors & REF_ERR_NO_INODE_REF)
843                 fprintf(stderr, ", no inode ref");
844         if (errors & REF_ERR_DUP_DIR_ITEM)
845                 fprintf(stderr, ", dup dir item");
846         if (errors & REF_ERR_DUP_DIR_INDEX)
847                 fprintf(stderr, ", dup dir index");
848         if (errors & REF_ERR_DUP_INODE_REF)
849                 fprintf(stderr, ", dup inode ref");
850         if (errors & REF_ERR_INDEX_UNMATCH)
851                 fprintf(stderr, ", index mismatch");
852         if (errors & REF_ERR_FILETYPE_UNMATCH)
853                 fprintf(stderr, ", filetype mismatch");
854         if (errors & REF_ERR_NAME_TOO_LONG)
855                 fprintf(stderr, ", name too long");
856         if (errors & REF_ERR_NO_ROOT_REF)
857                 fprintf(stderr, ", no root ref");
858         if (errors & REF_ERR_NO_ROOT_BACKREF)
859                 fprintf(stderr, ", no root backref");
860         if (errors & REF_ERR_DUP_ROOT_REF)
861                 fprintf(stderr, ", dup root ref");
862         if (errors & REF_ERR_DUP_ROOT_BACKREF)
863                 fprintf(stderr, ", dup root backref");
864         fprintf(stderr, "\n");
865 }
866
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
868                                           u64 ino, int mod)
869 {
870         struct ptr_node *node;
871         struct cache_extent *cache;
872         struct inode_record *rec = NULL;
873         int ret;
874
875         cache = lookup_cache_extent(inode_cache, ino, 1);
876         if (cache) {
877                 node = container_of(cache, struct ptr_node, cache);
878                 rec = node->data;
879                 if (mod && rec->refs > 1) {
880                         node->data = clone_inode_rec(rec);
881                         if (IS_ERR(node->data))
882                                 return node->data;
883                         rec->refs--;
884                         rec = node->data;
885                 }
886         } else if (mod) {
887                 rec = calloc(1, sizeof(*rec));
888                 if (!rec)
889                         return ERR_PTR(-ENOMEM);
890                 rec->ino = ino;
891                 rec->extent_start = (u64)-1;
892                 rec->refs = 1;
893                 INIT_LIST_HEAD(&rec->backrefs);
894                 INIT_LIST_HEAD(&rec->orphan_extents);
895                 rec->holes = RB_ROOT;
896
897                 node = malloc(sizeof(*node));
898                 if (!node) {
899                         free(rec);
900                         return ERR_PTR(-ENOMEM);
901                 }
902                 node->cache.start = ino;
903                 node->cache.size = 1;
904                 node->data = rec;
905
906                 if (ino == BTRFS_FREE_INO_OBJECTID)
907                         rec->found_link = 1;
908
909                 ret = insert_cache_extent(inode_cache, &node->cache);
910                 if (ret)
911                         return ERR_PTR(-EEXIST);
912         }
913         return rec;
914 }
915
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
917 {
918         struct orphan_data_extent *orphan;
919
920         while (!list_empty(orphan_extents)) {
921                 orphan = list_entry(orphan_extents->next,
922                                     struct orphan_data_extent, list);
923                 list_del(&orphan->list);
924                 free(orphan);
925         }
926 }
927
928 static void free_inode_rec(struct inode_record *rec)
929 {
930         struct inode_backref *backref;
931
932         if (--rec->refs > 0)
933                 return;
934
935         while (!list_empty(&rec->backrefs)) {
936                 backref = to_inode_backref(rec->backrefs.next);
937                 list_del(&backref->list);
938                 free(backref);
939         }
940         free_orphan_data_extents(&rec->orphan_extents);
941         free_file_extent_holes(&rec->holes);
942         free(rec);
943 }
944
945 static int can_free_inode_rec(struct inode_record *rec)
946 {
947         if (!rec->errors && rec->checked && rec->found_inode_item &&
948             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
949                 return 1;
950         return 0;
951 }
952
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954                                  struct inode_record *rec)
955 {
956         struct cache_extent *cache;
957         struct inode_backref *tmp, *backref;
958         struct ptr_node *node;
959         u8 filetype;
960
961         if (!rec->found_inode_item)
962                 return;
963
964         filetype = imode_to_type(rec->imode);
965         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966                 if (backref->found_dir_item && backref->found_dir_index) {
967                         if (backref->filetype != filetype)
968                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969                         if (!backref->errors && backref->found_inode_ref &&
970                             rec->nlink == rec->found_link) {
971                                 list_del(&backref->list);
972                                 free(backref);
973                         }
974                 }
975         }
976
977         if (!rec->checked || rec->merging)
978                 return;
979
980         if (S_ISDIR(rec->imode)) {
981                 if (rec->found_size != rec->isize)
982                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983                 if (rec->found_file_extent)
984                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
985         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986                 if (rec->found_dir_item)
987                         rec->errors |= I_ERR_ODD_DIR_ITEM;
988                 if (rec->found_size != rec->nbytes)
989                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990                 if (rec->nlink > 0 && !no_holes &&
991                     (rec->extent_end < rec->isize ||
992                      first_extent_gap(&rec->holes) < rec->isize))
993                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
994         }
995
996         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997                 if (rec->found_csum_item && rec->nodatasum)
998                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
999                 if (rec->some_csum_missing && !rec->nodatasum)
1000                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1001         }
1002
1003         BUG_ON(rec->refs != 1);
1004         if (can_free_inode_rec(rec)) {
1005                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006                 node = container_of(cache, struct ptr_node, cache);
1007                 BUG_ON(node->data != rec);
1008                 remove_cache_extent(inode_cache, &node->cache);
1009                 free(node);
1010                 free_inode_rec(rec);
1011         }
1012 }
1013
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1015 {
1016         struct btrfs_path path;
1017         struct btrfs_key key;
1018         int ret;
1019
1020         key.objectid = BTRFS_ORPHAN_OBJECTID;
1021         key.type = BTRFS_ORPHAN_ITEM_KEY;
1022         key.offset = ino;
1023
1024         btrfs_init_path(&path);
1025         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026         btrfs_release_path(&path);
1027         if (ret > 0)
1028                 ret = -ENOENT;
1029         return ret;
1030 }
1031
1032 static int process_inode_item(struct extent_buffer *eb,
1033                               int slot, struct btrfs_key *key,
1034                               struct shared_node *active_node)
1035 {
1036         struct inode_record *rec;
1037         struct btrfs_inode_item *item;
1038
1039         rec = active_node->current;
1040         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041         if (rec->found_inode_item) {
1042                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1043                 return 1;
1044         }
1045         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046         rec->nlink = btrfs_inode_nlink(eb, item);
1047         rec->isize = btrfs_inode_size(eb, item);
1048         rec->nbytes = btrfs_inode_nbytes(eb, item);
1049         rec->imode = btrfs_inode_mode(eb, item);
1050         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051                 rec->nodatasum = 1;
1052         rec->found_inode_item = 1;
1053         if (rec->nlink == 0)
1054                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055         maybe_free_inode_rec(&active_node->inode_cache, rec);
1056         return 0;
1057 }
1058
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060                                                 const char *name,
1061                                                 int namelen, u64 dir)
1062 {
1063         struct inode_backref *backref;
1064
1065         list_for_each_entry(backref, &rec->backrefs, list) {
1066                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067                         break;
1068                 if (backref->dir != dir || backref->namelen != namelen)
1069                         continue;
1070                 if (memcmp(name, backref->name, namelen))
1071                         continue;
1072                 return backref;
1073         }
1074
1075         backref = malloc(sizeof(*backref) + namelen + 1);
1076         if (!backref)
1077                 return NULL;
1078         memset(backref, 0, sizeof(*backref));
1079         backref->dir = dir;
1080         backref->namelen = namelen;
1081         memcpy(backref->name, name, namelen);
1082         backref->name[namelen] = '\0';
1083         list_add_tail(&backref->list, &rec->backrefs);
1084         return backref;
1085 }
1086
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088                              u64 ino, u64 dir, u64 index,
1089                              const char *name, int namelen,
1090                              u8 filetype, u8 itemtype, int errors)
1091 {
1092         struct inode_record *rec;
1093         struct inode_backref *backref;
1094
1095         rec = get_inode_rec(inode_cache, ino, 1);
1096         BUG_ON(IS_ERR(rec));
1097         backref = get_inode_backref(rec, name, namelen, dir);
1098         BUG_ON(!backref);
1099         if (errors)
1100                 backref->errors |= errors;
1101         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102                 if (backref->found_dir_index)
1103                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104                 if (backref->found_inode_ref && backref->index != index)
1105                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1106                 if (backref->found_dir_item && backref->filetype != filetype)
1107                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1108
1109                 backref->index = index;
1110                 backref->filetype = filetype;
1111                 backref->found_dir_index = 1;
1112         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113                 rec->found_link++;
1114                 if (backref->found_dir_item)
1115                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116                 if (backref->found_dir_index && backref->filetype != filetype)
1117                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1118
1119                 backref->filetype = filetype;
1120                 backref->found_dir_item = 1;
1121         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123                 if (backref->found_inode_ref)
1124                         backref->errors |= REF_ERR_DUP_INODE_REF;
1125                 if (backref->found_dir_index && backref->index != index)
1126                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1127                 else
1128                         backref->index = index;
1129
1130                 backref->ref_type = itemtype;
1131                 backref->found_inode_ref = 1;
1132         } else {
1133                 BUG_ON(1);
1134         }
1135
1136         maybe_free_inode_rec(inode_cache, rec);
1137         return 0;
1138 }
1139
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141                             struct cache_tree *dst_cache)
1142 {
1143         struct inode_backref *backref;
1144         u32 dir_count = 0;
1145         int ret = 0;
1146
1147         dst->merging = 1;
1148         list_for_each_entry(backref, &src->backrefs, list) {
1149                 if (backref->found_dir_index) {
1150                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1151                                         backref->index, backref->name,
1152                                         backref->namelen, backref->filetype,
1153                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1154                 }
1155                 if (backref->found_dir_item) {
1156                         dir_count++;
1157                         add_inode_backref(dst_cache, dst->ino,
1158                                         backref->dir, 0, backref->name,
1159                                         backref->namelen, backref->filetype,
1160                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1161                 }
1162                 if (backref->found_inode_ref) {
1163                         add_inode_backref(dst_cache, dst->ino,
1164                                         backref->dir, backref->index,
1165                                         backref->name, backref->namelen, 0,
1166                                         backref->ref_type, backref->errors);
1167                 }
1168         }
1169
1170         if (src->found_dir_item)
1171                 dst->found_dir_item = 1;
1172         if (src->found_file_extent)
1173                 dst->found_file_extent = 1;
1174         if (src->found_csum_item)
1175                 dst->found_csum_item = 1;
1176         if (src->some_csum_missing)
1177                 dst->some_csum_missing = 1;
1178         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1180                 if (ret < 0)
1181                         return ret;
1182         }
1183
1184         BUG_ON(src->found_link < dir_count);
1185         dst->found_link += src->found_link - dir_count;
1186         dst->found_size += src->found_size;
1187         if (src->extent_start != (u64)-1) {
1188                 if (dst->extent_start == (u64)-1) {
1189                         dst->extent_start = src->extent_start;
1190                         dst->extent_end = src->extent_end;
1191                 } else {
1192                         if (dst->extent_end > src->extent_start)
1193                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194                         else if (dst->extent_end < src->extent_start) {
1195                                 ret = add_file_extent_hole(&dst->holes,
1196                                         dst->extent_end,
1197                                         src->extent_start - dst->extent_end);
1198                         }
1199                         if (dst->extent_end < src->extent_end)
1200                                 dst->extent_end = src->extent_end;
1201                 }
1202         }
1203
1204         dst->errors |= src->errors;
1205         if (src->found_inode_item) {
1206                 if (!dst->found_inode_item) {
1207                         dst->nlink = src->nlink;
1208                         dst->isize = src->isize;
1209                         dst->nbytes = src->nbytes;
1210                         dst->imode = src->imode;
1211                         dst->nodatasum = src->nodatasum;
1212                         dst->found_inode_item = 1;
1213                 } else {
1214                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1215                 }
1216         }
1217         dst->merging = 0;
1218
1219         return 0;
1220 }
1221
1222 static int splice_shared_node(struct shared_node *src_node,
1223                               struct shared_node *dst_node)
1224 {
1225         struct cache_extent *cache;
1226         struct ptr_node *node, *ins;
1227         struct cache_tree *src, *dst;
1228         struct inode_record *rec, *conflict;
1229         u64 current_ino = 0;
1230         int splice = 0;
1231         int ret;
1232
1233         if (--src_node->refs == 0)
1234                 splice = 1;
1235         if (src_node->current)
1236                 current_ino = src_node->current->ino;
1237
1238         src = &src_node->root_cache;
1239         dst = &dst_node->root_cache;
1240 again:
1241         cache = search_cache_extent(src, 0);
1242         while (cache) {
1243                 node = container_of(cache, struct ptr_node, cache);
1244                 rec = node->data;
1245                 cache = next_cache_extent(cache);
1246
1247                 if (splice) {
1248                         remove_cache_extent(src, &node->cache);
1249                         ins = node;
1250                 } else {
1251                         ins = malloc(sizeof(*ins));
1252                         BUG_ON(!ins);
1253                         ins->cache.start = node->cache.start;
1254                         ins->cache.size = node->cache.size;
1255                         ins->data = rec;
1256                         rec->refs++;
1257                 }
1258                 ret = insert_cache_extent(dst, &ins->cache);
1259                 if (ret == -EEXIST) {
1260                         conflict = get_inode_rec(dst, rec->ino, 1);
1261                         BUG_ON(IS_ERR(conflict));
1262                         merge_inode_recs(rec, conflict, dst);
1263                         if (rec->checked) {
1264                                 conflict->checked = 1;
1265                                 if (dst_node->current == conflict)
1266                                         dst_node->current = NULL;
1267                         }
1268                         maybe_free_inode_rec(dst, conflict);
1269                         free_inode_rec(rec);
1270                         free(ins);
1271                 } else {
1272                         BUG_ON(ret);
1273                 }
1274         }
1275
1276         if (src == &src_node->root_cache) {
1277                 src = &src_node->inode_cache;
1278                 dst = &dst_node->inode_cache;
1279                 goto again;
1280         }
1281
1282         if (current_ino > 0 && (!dst_node->current ||
1283             current_ino > dst_node->current->ino)) {
1284                 if (dst_node->current) {
1285                         dst_node->current->checked = 1;
1286                         maybe_free_inode_rec(dst, dst_node->current);
1287                 }
1288                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289                 BUG_ON(IS_ERR(dst_node->current));
1290         }
1291         return 0;
1292 }
1293
1294 static void free_inode_ptr(struct cache_extent *cache)
1295 {
1296         struct ptr_node *node;
1297         struct inode_record *rec;
1298
1299         node = container_of(cache, struct ptr_node, cache);
1300         rec = node->data;
1301         free_inode_rec(rec);
1302         free(node);
1303 }
1304
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1306
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1308                                             u64 bytenr)
1309 {
1310         struct cache_extent *cache;
1311         struct shared_node *node;
1312
1313         cache = lookup_cache_extent(shared, bytenr, 1);
1314         if (cache) {
1315                 node = container_of(cache, struct shared_node, cache);
1316                 return node;
1317         }
1318         return NULL;
1319 }
1320
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1322 {
1323         int ret;
1324         struct shared_node *node;
1325
1326         node = calloc(1, sizeof(*node));
1327         if (!node)
1328                 return -ENOMEM;
1329         node->cache.start = bytenr;
1330         node->cache.size = 1;
1331         cache_tree_init(&node->root_cache);
1332         cache_tree_init(&node->inode_cache);
1333         node->refs = refs;
1334
1335         ret = insert_cache_extent(shared, &node->cache);
1336
1337         return ret;
1338 }
1339
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341                              struct walk_control *wc, int level)
1342 {
1343         struct shared_node *node;
1344         struct shared_node *dest;
1345         int ret;
1346
1347         if (level == wc->active_node)
1348                 return 0;
1349
1350         BUG_ON(wc->active_node <= level);
1351         node = find_shared_node(&wc->shared, bytenr);
1352         if (!node) {
1353                 ret = add_shared_node(&wc->shared, bytenr, refs);
1354                 BUG_ON(ret);
1355                 node = find_shared_node(&wc->shared, bytenr);
1356                 wc->nodes[level] = node;
1357                 wc->active_node = level;
1358                 return 0;
1359         }
1360
1361         if (wc->root_level == wc->active_node &&
1362             btrfs_root_refs(&root->root_item) == 0) {
1363                 if (--node->refs == 0) {
1364                         free_inode_recs_tree(&node->root_cache);
1365                         free_inode_recs_tree(&node->inode_cache);
1366                         remove_cache_extent(&wc->shared, &node->cache);
1367                         free(node);
1368                 }
1369                 return 1;
1370         }
1371
1372         dest = wc->nodes[wc->active_node];
1373         splice_shared_node(node, dest);
1374         if (node->refs == 0) {
1375                 remove_cache_extent(&wc->shared, &node->cache);
1376                 free(node);
1377         }
1378         return 1;
1379 }
1380
1381 static int leave_shared_node(struct btrfs_root *root,
1382                              struct walk_control *wc, int level)
1383 {
1384         struct shared_node *node;
1385         struct shared_node *dest;
1386         int i;
1387
1388         if (level == wc->root_level)
1389                 return 0;
1390
1391         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1392                 if (wc->nodes[i])
1393                         break;
1394         }
1395         BUG_ON(i >= BTRFS_MAX_LEVEL);
1396
1397         node = wc->nodes[wc->active_node];
1398         wc->nodes[wc->active_node] = NULL;
1399         wc->active_node = i;
1400
1401         dest = wc->nodes[wc->active_node];
1402         if (wc->active_node < wc->root_level ||
1403             btrfs_root_refs(&root->root_item) > 0) {
1404                 BUG_ON(node->refs <= 1);
1405                 splice_shared_node(node, dest);
1406         } else {
1407                 BUG_ON(node->refs < 2);
1408                 node->refs--;
1409         }
1410         return 0;
1411 }
1412
1413 /*
1414  * Returns:
1415  * < 0 - on error
1416  * 1   - if the root with id child_root_id is a child of root parent_root_id
1417  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1418  *       has other root(s) as parent(s)
1419  * 2   - if the root child_root_id doesn't have any parent roots
1420  */
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1422                          u64 child_root_id)
1423 {
1424         struct btrfs_path path;
1425         struct btrfs_key key;
1426         struct extent_buffer *leaf;
1427         int has_parent = 0;
1428         int ret;
1429
1430         btrfs_init_path(&path);
1431
1432         key.objectid = parent_root_id;
1433         key.type = BTRFS_ROOT_REF_KEY;
1434         key.offset = child_root_id;
1435         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1436                                 0, 0);
1437         if (ret < 0)
1438                 return ret;
1439         btrfs_release_path(&path);
1440         if (!ret)
1441                 return 1;
1442
1443         key.objectid = child_root_id;
1444         key.type = BTRFS_ROOT_BACKREF_KEY;
1445         key.offset = 0;
1446         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1447                                 0, 0);
1448         if (ret < 0)
1449                 goto out;
1450
1451         while (1) {
1452                 leaf = path.nodes[0];
1453                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1455                         if (ret)
1456                                 break;
1457                         leaf = path.nodes[0];
1458                 }
1459
1460                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461                 if (key.objectid != child_root_id ||
1462                     key.type != BTRFS_ROOT_BACKREF_KEY)
1463                         break;
1464
1465                 has_parent = 1;
1466
1467                 if (key.offset == parent_root_id) {
1468                         btrfs_release_path(&path);
1469                         return 1;
1470                 }
1471
1472                 path.slots[0]++;
1473         }
1474 out:
1475         btrfs_release_path(&path);
1476         if (ret < 0)
1477                 return ret;
1478         return has_parent ? 0 : 2;
1479 }
1480
1481 static int process_dir_item(struct extent_buffer *eb,
1482                             int slot, struct btrfs_key *key,
1483                             struct shared_node *active_node)
1484 {
1485         u32 total;
1486         u32 cur = 0;
1487         u32 len;
1488         u32 name_len;
1489         u32 data_len;
1490         int error;
1491         int nritems = 0;
1492         u8 filetype;
1493         struct btrfs_dir_item *di;
1494         struct inode_record *rec;
1495         struct cache_tree *root_cache;
1496         struct cache_tree *inode_cache;
1497         struct btrfs_key location;
1498         char namebuf[BTRFS_NAME_LEN];
1499
1500         root_cache = &active_node->root_cache;
1501         inode_cache = &active_node->inode_cache;
1502         rec = active_node->current;
1503         rec->found_dir_item = 1;
1504
1505         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506         total = btrfs_item_size_nr(eb, slot);
1507         while (cur < total) {
1508                 nritems++;
1509                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510                 name_len = btrfs_dir_name_len(eb, di);
1511                 data_len = btrfs_dir_data_len(eb, di);
1512                 filetype = btrfs_dir_type(eb, di);
1513
1514                 rec->found_size += name_len;
1515                 if (name_len <= BTRFS_NAME_LEN) {
1516                         len = name_len;
1517                         error = 0;
1518                 } else {
1519                         len = BTRFS_NAME_LEN;
1520                         error = REF_ERR_NAME_TOO_LONG;
1521                 }
1522                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1523
1524                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525                         add_inode_backref(inode_cache, location.objectid,
1526                                           key->objectid, key->offset, namebuf,
1527                                           len, filetype, key->type, error);
1528                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529                         add_inode_backref(root_cache, location.objectid,
1530                                           key->objectid, key->offset,
1531                                           namebuf, len, filetype,
1532                                           key->type, error);
1533                 } else {
1534                         fprintf(stderr, "invalid location in dir item %u\n",
1535                                 location.type);
1536                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537                                           key->objectid, key->offset, namebuf,
1538                                           len, filetype, key->type, error);
1539                 }
1540
1541                 len = sizeof(*di) + name_len + data_len;
1542                 di = (struct btrfs_dir_item *)((char *)di + len);
1543                 cur += len;
1544         }
1545         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1547
1548         return 0;
1549 }
1550
1551 static int process_inode_ref(struct extent_buffer *eb,
1552                              int slot, struct btrfs_key *key,
1553                              struct shared_node *active_node)
1554 {
1555         u32 total;
1556         u32 cur = 0;
1557         u32 len;
1558         u32 name_len;
1559         u64 index;
1560         int error;
1561         struct cache_tree *inode_cache;
1562         struct btrfs_inode_ref *ref;
1563         char namebuf[BTRFS_NAME_LEN];
1564
1565         inode_cache = &active_node->inode_cache;
1566
1567         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568         total = btrfs_item_size_nr(eb, slot);
1569         while (cur < total) {
1570                 name_len = btrfs_inode_ref_name_len(eb, ref);
1571                 index = btrfs_inode_ref_index(eb, ref);
1572                 if (name_len <= BTRFS_NAME_LEN) {
1573                         len = name_len;
1574                         error = 0;
1575                 } else {
1576                         len = BTRFS_NAME_LEN;
1577                         error = REF_ERR_NAME_TOO_LONG;
1578                 }
1579                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1580                 add_inode_backref(inode_cache, key->objectid, key->offset,
1581                                   index, namebuf, len, 0, key->type, error);
1582
1583                 len = sizeof(*ref) + name_len;
1584                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1585                 cur += len;
1586         }
1587         return 0;
1588 }
1589
1590 static int process_inode_extref(struct extent_buffer *eb,
1591                                 int slot, struct btrfs_key *key,
1592                                 struct shared_node *active_node)
1593 {
1594         u32 total;
1595         u32 cur = 0;
1596         u32 len;
1597         u32 name_len;
1598         u64 index;
1599         u64 parent;
1600         int error;
1601         struct cache_tree *inode_cache;
1602         struct btrfs_inode_extref *extref;
1603         char namebuf[BTRFS_NAME_LEN];
1604
1605         inode_cache = &active_node->inode_cache;
1606
1607         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1608         total = btrfs_item_size_nr(eb, slot);
1609         while (cur < total) {
1610                 name_len = btrfs_inode_extref_name_len(eb, extref);
1611                 index = btrfs_inode_extref_index(eb, extref);
1612                 parent = btrfs_inode_extref_parent(eb, extref);
1613                 if (name_len <= BTRFS_NAME_LEN) {
1614                         len = name_len;
1615                         error = 0;
1616                 } else {
1617                         len = BTRFS_NAME_LEN;
1618                         error = REF_ERR_NAME_TOO_LONG;
1619                 }
1620                 read_extent_buffer(eb, namebuf,
1621                                    (unsigned long)(extref + 1), len);
1622                 add_inode_backref(inode_cache, key->objectid, parent,
1623                                   index, namebuf, len, 0, key->type, error);
1624
1625                 len = sizeof(*extref) + name_len;
1626                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1627                 cur += len;
1628         }
1629         return 0;
1630
1631 }
1632
1633 static int count_csum_range(struct btrfs_root *root, u64 start,
1634                             u64 len, u64 *found)
1635 {
1636         struct btrfs_key key;
1637         struct btrfs_path path;
1638         struct extent_buffer *leaf;
1639         int ret;
1640         size_t size;
1641         *found = 0;
1642         u64 csum_end;
1643         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1644
1645         btrfs_init_path(&path);
1646
1647         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1648         key.offset = start;
1649         key.type = BTRFS_EXTENT_CSUM_KEY;
1650
1651         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1652                                 &key, &path, 0, 0);
1653         if (ret < 0)
1654                 goto out;
1655         if (ret > 0 && path.slots[0] > 0) {
1656                 leaf = path.nodes[0];
1657                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1658                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1659                     key.type == BTRFS_EXTENT_CSUM_KEY)
1660                         path.slots[0]--;
1661         }
1662
1663         while (len > 0) {
1664                 leaf = path.nodes[0];
1665                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1666                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1667                         if (ret > 0)
1668                                 break;
1669                         else if (ret < 0)
1670                                 goto out;
1671                         leaf = path.nodes[0];
1672                 }
1673
1674                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1675                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1676                     key.type != BTRFS_EXTENT_CSUM_KEY)
1677                         break;
1678
1679                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1680                 if (key.offset >= start + len)
1681                         break;
1682
1683                 if (key.offset > start)
1684                         start = key.offset;
1685
1686                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1687                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1688                 if (csum_end > start) {
1689                         size = min(csum_end - start, len);
1690                         len -= size;
1691                         start += size;
1692                         *found += size;
1693                 }
1694
1695                 path.slots[0]++;
1696         }
1697 out:
1698         btrfs_release_path(&path);
1699         if (ret < 0)
1700                 return ret;
1701         return 0;
1702 }
1703
1704 static int process_file_extent(struct btrfs_root *root,
1705                                 struct extent_buffer *eb,
1706                                 int slot, struct btrfs_key *key,
1707                                 struct shared_node *active_node)
1708 {
1709         struct inode_record *rec;
1710         struct btrfs_file_extent_item *fi;
1711         u64 num_bytes = 0;
1712         u64 disk_bytenr = 0;
1713         u64 extent_offset = 0;
1714         u64 mask = root->sectorsize - 1;
1715         int extent_type;
1716         int ret;
1717
1718         rec = active_node->current;
1719         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1720         rec->found_file_extent = 1;
1721
1722         if (rec->extent_start == (u64)-1) {
1723                 rec->extent_start = key->offset;
1724                 rec->extent_end = key->offset;
1725         }
1726
1727         if (rec->extent_end > key->offset)
1728                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1729         else if (rec->extent_end < key->offset) {
1730                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1731                                            key->offset - rec->extent_end);
1732                 if (ret < 0)
1733                         return ret;
1734         }
1735
1736         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1737         extent_type = btrfs_file_extent_type(eb, fi);
1738
1739         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1740                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1741                 if (num_bytes == 0)
1742                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1743                 rec->found_size += num_bytes;
1744                 num_bytes = (num_bytes + mask) & ~mask;
1745         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1746                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1747                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1748                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1749                 extent_offset = btrfs_file_extent_offset(eb, fi);
1750                 if (num_bytes == 0 || (num_bytes & mask))
1751                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752                 if (num_bytes + extent_offset >
1753                     btrfs_file_extent_ram_bytes(eb, fi))
1754                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1755                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1756                     (btrfs_file_extent_compression(eb, fi) ||
1757                      btrfs_file_extent_encryption(eb, fi) ||
1758                      btrfs_file_extent_other_encoding(eb, fi)))
1759                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760                 if (disk_bytenr > 0)
1761                         rec->found_size += num_bytes;
1762         } else {
1763                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1764         }
1765         rec->extent_end = key->offset + num_bytes;
1766
1767         /*
1768          * The data reloc tree will copy full extents into its inode and then
1769          * copy the corresponding csums.  Because the extent it copied could be
1770          * a preallocated extent that hasn't been written to yet there may be no
1771          * csums to copy, ergo we won't have csums for our file extent.  This is
1772          * ok so just don't bother checking csums if the inode belongs to the
1773          * data reloc tree.
1774          */
1775         if (disk_bytenr > 0 &&
1776             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1777                 u64 found;
1778                 if (btrfs_file_extent_compression(eb, fi))
1779                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1780                 else
1781                         disk_bytenr += extent_offset;
1782
1783                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1784                 if (ret < 0)
1785                         return ret;
1786                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1787                         if (found > 0)
1788                                 rec->found_csum_item = 1;
1789                         if (found < num_bytes)
1790                                 rec->some_csum_missing = 1;
1791                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1792                         if (found > 0)
1793                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1794                 }
1795         }
1796         return 0;
1797 }
1798
1799 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1800                             struct walk_control *wc)
1801 {
1802         struct btrfs_key key;
1803         u32 nritems;
1804         int i;
1805         int ret = 0;
1806         struct cache_tree *inode_cache;
1807         struct shared_node *active_node;
1808
1809         if (wc->root_level == wc->active_node &&
1810             btrfs_root_refs(&root->root_item) == 0)
1811                 return 0;
1812
1813         active_node = wc->nodes[wc->active_node];
1814         inode_cache = &active_node->inode_cache;
1815         nritems = btrfs_header_nritems(eb);
1816         for (i = 0; i < nritems; i++) {
1817                 btrfs_item_key_to_cpu(eb, &key, i);
1818
1819                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1820                         continue;
1821                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1822                         continue;
1823
1824                 if (active_node->current == NULL ||
1825                     active_node->current->ino < key.objectid) {
1826                         if (active_node->current) {
1827                                 active_node->current->checked = 1;
1828                                 maybe_free_inode_rec(inode_cache,
1829                                                      active_node->current);
1830                         }
1831                         active_node->current = get_inode_rec(inode_cache,
1832                                                              key.objectid, 1);
1833                         BUG_ON(IS_ERR(active_node->current));
1834                 }
1835                 switch (key.type) {
1836                 case BTRFS_DIR_ITEM_KEY:
1837                 case BTRFS_DIR_INDEX_KEY:
1838                         ret = process_dir_item(eb, i, &key, active_node);
1839                         break;
1840                 case BTRFS_INODE_REF_KEY:
1841                         ret = process_inode_ref(eb, i, &key, active_node);
1842                         break;
1843                 case BTRFS_INODE_EXTREF_KEY:
1844                         ret = process_inode_extref(eb, i, &key, active_node);
1845                         break;
1846                 case BTRFS_INODE_ITEM_KEY:
1847                         ret = process_inode_item(eb, i, &key, active_node);
1848                         break;
1849                 case BTRFS_EXTENT_DATA_KEY:
1850                         ret = process_file_extent(root, eb, i, &key,
1851                                                   active_node);
1852                         break;
1853                 default:
1854                         break;
1855                 };
1856         }
1857         return ret;
1858 }
1859
1860 struct node_refs {
1861         u64 bytenr[BTRFS_MAX_LEVEL];
1862         u64 refs[BTRFS_MAX_LEVEL];
1863         int need_check[BTRFS_MAX_LEVEL];
1864 };
1865
1866 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1867                              struct node_refs *nrefs, u64 level);
1868 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1869                             unsigned int ext_ref);
1870
1871 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1872                                struct node_refs *nrefs, int *level, int ext_ref)
1873 {
1874         struct extent_buffer *cur = path->nodes[0];
1875         struct btrfs_key key;
1876         u64 cur_bytenr;
1877         u32 nritems;
1878         u64 first_ino = 0;
1879         int root_level = btrfs_header_level(root->node);
1880         int i;
1881         int ret = 0; /* Final return value */
1882         int err = 0; /* Positive error bitmap */
1883
1884         cur_bytenr = cur->start;
1885
1886         /* skip to first inode item or the first inode number change */
1887         nritems = btrfs_header_nritems(cur);
1888         for (i = 0; i < nritems; i++) {
1889                 btrfs_item_key_to_cpu(cur, &key, i);
1890                 if (i == 0)
1891                         first_ino = key.objectid;
1892                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1893                     (first_ino && first_ino != key.objectid))
1894                         break;
1895         }
1896         if (i == nritems) {
1897                 path->slots[0] = nritems;
1898                 return 0;
1899         }
1900         path->slots[0] = i;
1901
1902 again:
1903         err |= check_inode_item(root, path, ext_ref);
1904
1905         if (err & LAST_ITEM)
1906                 goto out;
1907
1908         /* still have inode items in thie leaf */
1909         if (cur->start == cur_bytenr)
1910                 goto again;
1911
1912         /*
1913          * we have switched to another leaf, above nodes may
1914          * have changed, here walk down the path, if a node
1915          * or leaf is shared, check whether we can skip this
1916          * node or leaf.
1917          */
1918         for (i = root_level; i >= 0; i--) {
1919                 if (path->nodes[i]->start == nrefs->bytenr[i])
1920                         continue;
1921
1922                 ret = update_nodes_refs(root,
1923                                 path->nodes[i]->start,
1924                                 nrefs, i);
1925                 if (ret)
1926                         goto out;
1927
1928                 if (!nrefs->need_check[i]) {
1929                         *level += 1;
1930                         break;
1931                 }
1932         }
1933
1934         for (i = 0; i < *level; i++) {
1935                 free_extent_buffer(path->nodes[i]);
1936                 path->nodes[i] = NULL;
1937         }
1938 out:
1939         err &= ~LAST_ITEM;
1940         /*
1941          * Convert any error bitmap to -EIO, as we should avoid
1942          * mixing positive and negative return value to represent
1943          * error
1944          */
1945         if (err && !ret)
1946                 ret = -EIO;
1947         return ret;
1948 }
1949
1950 static void reada_walk_down(struct btrfs_root *root,
1951                             struct extent_buffer *node, int slot)
1952 {
1953         u64 bytenr;
1954         u64 ptr_gen;
1955         u32 nritems;
1956         u32 blocksize;
1957         int i;
1958         int level;
1959
1960         level = btrfs_header_level(node);
1961         if (level != 1)
1962                 return;
1963
1964         nritems = btrfs_header_nritems(node);
1965         blocksize = root->nodesize;
1966         for (i = slot; i < nritems; i++) {
1967                 bytenr = btrfs_node_blockptr(node, i);
1968                 ptr_gen = btrfs_node_ptr_generation(node, i);
1969                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1970         }
1971 }
1972
1973 /*
1974  * Check the child node/leaf by the following condition:
1975  * 1. the first item key of the node/leaf should be the same with the one
1976  *    in parent.
1977  * 2. block in parent node should match the child node/leaf.
1978  * 3. generation of parent node and child's header should be consistent.
1979  *
1980  * Or the child node/leaf pointed by the key in parent is not valid.
1981  *
1982  * We hope to check leaf owner too, but since subvol may share leaves,
1983  * which makes leaf owner check not so strong, key check should be
1984  * sufficient enough for that case.
1985  */
1986 static int check_child_node(struct btrfs_root *root,
1987                             struct extent_buffer *parent, int slot,
1988                             struct extent_buffer *child)
1989 {
1990         struct btrfs_key parent_key;
1991         struct btrfs_key child_key;
1992         int ret = 0;
1993
1994         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1995         if (btrfs_header_level(child) == 0)
1996                 btrfs_item_key_to_cpu(child, &child_key, 0);
1997         else
1998                 btrfs_node_key_to_cpu(child, &child_key, 0);
1999
2000         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2001                 ret = -EINVAL;
2002                 fprintf(stderr,
2003                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2004                         parent_key.objectid, parent_key.type, parent_key.offset,
2005                         child_key.objectid, child_key.type, child_key.offset);
2006         }
2007         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2008                 ret = -EINVAL;
2009                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2010                         btrfs_node_blockptr(parent, slot),
2011                         btrfs_header_bytenr(child));
2012         }
2013         if (btrfs_node_ptr_generation(parent, slot) !=
2014             btrfs_header_generation(child)) {
2015                 ret = -EINVAL;
2016                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2017                         btrfs_header_generation(child),
2018                         btrfs_node_ptr_generation(parent, slot));
2019         }
2020         return ret;
2021 }
2022
2023 /*
2024  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2025  * in every fs or file tree check. Here we find its all root ids, and only check
2026  * it in the fs or file tree which has the smallest root id.
2027  */
2028 static int need_check(struct btrfs_root *root, struct ulist *roots)
2029 {
2030         struct rb_node *node;
2031         struct ulist_node *u;
2032
2033         if (roots->nnodes == 1)
2034                 return 1;
2035
2036         node = rb_first(&roots->root);
2037         u = rb_entry(node, struct ulist_node, rb_node);
2038         /*
2039          * current root id is not smallest, we skip it and let it be checked
2040          * in the fs or file tree who hash the smallest root id.
2041          */
2042         if (root->objectid != u->val)
2043                 return 0;
2044
2045         return 1;
2046 }
2047
2048 /*
2049  * for a tree node or leaf, we record its reference count, so later if we still
2050  * process this node or leaf, don't need to compute its reference count again.
2051  */
2052 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2053                              struct node_refs *nrefs, u64 level)
2054 {
2055         int check, ret;
2056         u64 refs;
2057         struct ulist *roots;
2058
2059         if (nrefs->bytenr[level] != bytenr) {
2060                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2061                                        level, 1, &refs, NULL);
2062                 if (ret < 0)
2063                         return ret;
2064
2065                 nrefs->bytenr[level] = bytenr;
2066                 nrefs->refs[level] = refs;
2067                 if (refs > 1) {
2068                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2069                                                    0, &roots);
2070                         if (ret)
2071                                 return -EIO;
2072
2073                         check = need_check(root, roots);
2074                         ulist_free(roots);
2075                         nrefs->need_check[level] = check;
2076                 } else {
2077                         nrefs->need_check[level] = 1;
2078                 }
2079         }
2080
2081         return 0;
2082 }
2083
2084 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2085                           struct walk_control *wc, int *level,
2086                           struct node_refs *nrefs)
2087 {
2088         enum btrfs_tree_block_status status;
2089         u64 bytenr;
2090         u64 ptr_gen;
2091         struct extent_buffer *next;
2092         struct extent_buffer *cur;
2093         u32 blocksize;
2094         int ret, err = 0;
2095         u64 refs;
2096
2097         WARN_ON(*level < 0);
2098         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2099
2100         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2101                 refs = nrefs->refs[*level];
2102                 ret = 0;
2103         } else {
2104                 ret = btrfs_lookup_extent_info(NULL, root,
2105                                        path->nodes[*level]->start,
2106                                        *level, 1, &refs, NULL);
2107                 if (ret < 0) {
2108                         err = ret;
2109                         goto out;
2110                 }
2111                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2112                 nrefs->refs[*level] = refs;
2113         }
2114
2115         if (refs > 1) {
2116                 ret = enter_shared_node(root, path->nodes[*level]->start,
2117                                         refs, wc, *level);
2118                 if (ret > 0) {
2119                         err = ret;
2120                         goto out;
2121                 }
2122         }
2123
2124         while (*level >= 0) {
2125                 WARN_ON(*level < 0);
2126                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2127                 cur = path->nodes[*level];
2128
2129                 if (btrfs_header_level(cur) != *level)
2130                         WARN_ON(1);
2131
2132                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2133                         break;
2134                 if (*level == 0) {
2135                         ret = process_one_leaf(root, cur, wc);
2136                         if (ret < 0)
2137                                 err = ret;
2138                         break;
2139                 }
2140                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2141                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2142                 blocksize = root->nodesize;
2143
2144                 if (bytenr == nrefs->bytenr[*level - 1]) {
2145                         refs = nrefs->refs[*level - 1];
2146                 } else {
2147                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2148                                         *level - 1, 1, &refs, NULL);
2149                         if (ret < 0) {
2150                                 refs = 0;
2151                         } else {
2152                                 nrefs->bytenr[*level - 1] = bytenr;
2153                                 nrefs->refs[*level - 1] = refs;
2154                         }
2155                 }
2156
2157                 if (refs > 1) {
2158                         ret = enter_shared_node(root, bytenr, refs,
2159                                                 wc, *level - 1);
2160                         if (ret > 0) {
2161                                 path->slots[*level]++;
2162                                 continue;
2163                         }
2164                 }
2165
2166                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2167                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2168                         free_extent_buffer(next);
2169                         reada_walk_down(root, cur, path->slots[*level]);
2170                         next = read_tree_block(root, bytenr, blocksize,
2171                                                ptr_gen);
2172                         if (!extent_buffer_uptodate(next)) {
2173                                 struct btrfs_key node_key;
2174
2175                                 btrfs_node_key_to_cpu(path->nodes[*level],
2176                                                       &node_key,
2177                                                       path->slots[*level]);
2178                                 btrfs_add_corrupt_extent_record(root->fs_info,
2179                                                 &node_key,
2180                                                 path->nodes[*level]->start,
2181                                                 root->nodesize, *level);
2182                                 err = -EIO;
2183                                 goto out;
2184                         }
2185                 }
2186
2187                 ret = check_child_node(root, cur, path->slots[*level], next);
2188                 if (ret) {
2189                         err = ret;
2190                         goto out;
2191                 }
2192
2193                 if (btrfs_is_leaf(next))
2194                         status = btrfs_check_leaf(root, NULL, next);
2195                 else
2196                         status = btrfs_check_node(root, NULL, next);
2197                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2198                         free_extent_buffer(next);
2199                         err = -EIO;
2200                         goto out;
2201                 }
2202
2203                 *level = *level - 1;
2204                 free_extent_buffer(path->nodes[*level]);
2205                 path->nodes[*level] = next;
2206                 path->slots[*level] = 0;
2207         }
2208 out:
2209         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2210         return err;
2211 }
2212
2213 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2214                             unsigned int ext_ref);
2215
2216 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2217                              int *level, struct node_refs *nrefs, int ext_ref)
2218 {
2219         enum btrfs_tree_block_status status;
2220         u64 bytenr;
2221         u64 ptr_gen;
2222         struct extent_buffer *next;
2223         struct extent_buffer *cur;
2224         u32 blocksize;
2225         int ret;
2226
2227         WARN_ON(*level < 0);
2228         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2229
2230         ret = update_nodes_refs(root, path->nodes[*level]->start,
2231                                 nrefs, *level);
2232         if (ret < 0)
2233                 return ret;
2234
2235         while (*level >= 0) {
2236                 WARN_ON(*level < 0);
2237                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2238                 cur = path->nodes[*level];
2239
2240                 if (btrfs_header_level(cur) != *level)
2241                         WARN_ON(1);
2242
2243                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2244                         break;
2245                 /* Don't forgot to check leaf/node validation */
2246                 if (*level == 0) {
2247                         ret = btrfs_check_leaf(root, NULL, cur);
2248                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2249                                 ret = -EIO;
2250                                 break;
2251                         }
2252                         ret = process_one_leaf_v2(root, path, nrefs,
2253                                                   level, ext_ref);
2254                         break;
2255                 } else {
2256                         ret = btrfs_check_node(root, NULL, cur);
2257                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2258                                 ret = -EIO;
2259                                 break;
2260                         }
2261                 }
2262                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2263                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2264                 blocksize = root->nodesize;
2265
2266                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2267                 if (ret)
2268                         break;
2269                 if (!nrefs->need_check[*level - 1]) {
2270                         path->slots[*level]++;
2271                         continue;
2272                 }
2273
2274                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2275                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2276                         free_extent_buffer(next);
2277                         reada_walk_down(root, cur, path->slots[*level]);
2278                         next = read_tree_block(root, bytenr, blocksize,
2279                                                ptr_gen);
2280                         if (!extent_buffer_uptodate(next)) {
2281                                 struct btrfs_key node_key;
2282
2283                                 btrfs_node_key_to_cpu(path->nodes[*level],
2284                                                       &node_key,
2285                                                       path->slots[*level]);
2286                                 btrfs_add_corrupt_extent_record(root->fs_info,
2287                                                 &node_key,
2288                                                 path->nodes[*level]->start,
2289                                                 root->nodesize, *level);
2290                                 ret = -EIO;
2291                                 break;
2292                         }
2293                 }
2294
2295                 ret = check_child_node(root, cur, path->slots[*level], next);
2296                 if (ret < 0) 
2297                         break;
2298
2299                 if (btrfs_is_leaf(next))
2300                         status = btrfs_check_leaf(root, NULL, next);
2301                 else
2302                         status = btrfs_check_node(root, NULL, next);
2303                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2304                         free_extent_buffer(next);
2305                         ret = -EIO;
2306                         break;
2307                 }
2308
2309                 *level = *level - 1;
2310                 free_extent_buffer(path->nodes[*level]);
2311                 path->nodes[*level] = next;
2312                 path->slots[*level] = 0;
2313         }
2314         return ret;
2315 }
2316
2317 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2318                         struct walk_control *wc, int *level)
2319 {
2320         int i;
2321         struct extent_buffer *leaf;
2322
2323         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2324                 leaf = path->nodes[i];
2325                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2326                         path->slots[i]++;
2327                         *level = i;
2328                         return 0;
2329                 } else {
2330                         free_extent_buffer(path->nodes[*level]);
2331                         path->nodes[*level] = NULL;
2332                         BUG_ON(*level > wc->active_node);
2333                         if (*level == wc->active_node)
2334                                 leave_shared_node(root, wc, *level);
2335                         *level = i + 1;
2336                 }
2337         }
2338         return 1;
2339 }
2340
2341 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2342                            int *level)
2343 {
2344         int i;
2345         struct extent_buffer *leaf;
2346
2347         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2348                 leaf = path->nodes[i];
2349                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2350                         path->slots[i]++;
2351                         *level = i;
2352                         return 0;
2353                 } else {
2354                         free_extent_buffer(path->nodes[*level]);
2355                         path->nodes[*level] = NULL;
2356                         *level = i + 1;
2357                 }
2358         }
2359         return 1;
2360 }
2361
2362 static int check_root_dir(struct inode_record *rec)
2363 {
2364         struct inode_backref *backref;
2365         int ret = -1;
2366
2367         if (!rec->found_inode_item || rec->errors)
2368                 goto out;
2369         if (rec->nlink != 1 || rec->found_link != 0)
2370                 goto out;
2371         if (list_empty(&rec->backrefs))
2372                 goto out;
2373         backref = to_inode_backref(rec->backrefs.next);
2374         if (!backref->found_inode_ref)
2375                 goto out;
2376         if (backref->index != 0 || backref->namelen != 2 ||
2377             memcmp(backref->name, "..", 2))
2378                 goto out;
2379         if (backref->found_dir_index || backref->found_dir_item)
2380                 goto out;
2381         ret = 0;
2382 out:
2383         return ret;
2384 }
2385
2386 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2387                               struct btrfs_root *root, struct btrfs_path *path,
2388                               struct inode_record *rec)
2389 {
2390         struct btrfs_inode_item *ei;
2391         struct btrfs_key key;
2392         int ret;
2393
2394         key.objectid = rec->ino;
2395         key.type = BTRFS_INODE_ITEM_KEY;
2396         key.offset = (u64)-1;
2397
2398         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2399         if (ret < 0)
2400                 goto out;
2401         if (ret) {
2402                 if (!path->slots[0]) {
2403                         ret = -ENOENT;
2404                         goto out;
2405                 }
2406                 path->slots[0]--;
2407                 ret = 0;
2408         }
2409         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2410         if (key.objectid != rec->ino) {
2411                 ret = -ENOENT;
2412                 goto out;
2413         }
2414
2415         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2416                             struct btrfs_inode_item);
2417         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2418         btrfs_mark_buffer_dirty(path->nodes[0]);
2419         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2420         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2421                root->root_key.objectid);
2422 out:
2423         btrfs_release_path(path);
2424         return ret;
2425 }
2426
2427 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2428                                     struct btrfs_root *root,
2429                                     struct btrfs_path *path,
2430                                     struct inode_record *rec)
2431 {
2432         int ret;
2433
2434         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2435         btrfs_release_path(path);
2436         if (!ret)
2437                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2438         return ret;
2439 }
2440
2441 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2442                                struct btrfs_root *root,
2443                                struct btrfs_path *path,
2444                                struct inode_record *rec)
2445 {
2446         struct btrfs_inode_item *ei;
2447         struct btrfs_key key;
2448         int ret = 0;
2449
2450         key.objectid = rec->ino;
2451         key.type = BTRFS_INODE_ITEM_KEY;
2452         key.offset = 0;
2453
2454         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2455         if (ret) {
2456                 if (ret > 0)
2457                         ret = -ENOENT;
2458                 goto out;
2459         }
2460
2461         /* Since ret == 0, no need to check anything */
2462         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2463                             struct btrfs_inode_item);
2464         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2465         btrfs_mark_buffer_dirty(path->nodes[0]);
2466         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2467         printf("reset nbytes for ino %llu root %llu\n",
2468                rec->ino, root->root_key.objectid);
2469 out:
2470         btrfs_release_path(path);
2471         return ret;
2472 }
2473
2474 static int add_missing_dir_index(struct btrfs_root *root,
2475                                  struct cache_tree *inode_cache,
2476                                  struct inode_record *rec,
2477                                  struct inode_backref *backref)
2478 {
2479         struct btrfs_path path;
2480         struct btrfs_trans_handle *trans;
2481         struct btrfs_dir_item *dir_item;
2482         struct extent_buffer *leaf;
2483         struct btrfs_key key;
2484         struct btrfs_disk_key disk_key;
2485         struct inode_record *dir_rec;
2486         unsigned long name_ptr;
2487         u32 data_size = sizeof(*dir_item) + backref->namelen;
2488         int ret;
2489
2490         trans = btrfs_start_transaction(root, 1);
2491         if (IS_ERR(trans))
2492                 return PTR_ERR(trans);
2493
2494         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2495                 (unsigned long long)rec->ino);
2496
2497         btrfs_init_path(&path);
2498         key.objectid = backref->dir;
2499         key.type = BTRFS_DIR_INDEX_KEY;
2500         key.offset = backref->index;
2501         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2502         BUG_ON(ret);
2503
2504         leaf = path.nodes[0];
2505         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2506
2507         disk_key.objectid = cpu_to_le64(rec->ino);
2508         disk_key.type = BTRFS_INODE_ITEM_KEY;
2509         disk_key.offset = 0;
2510
2511         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2512         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2513         btrfs_set_dir_data_len(leaf, dir_item, 0);
2514         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2515         name_ptr = (unsigned long)(dir_item + 1);
2516         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2517         btrfs_mark_buffer_dirty(leaf);
2518         btrfs_release_path(&path);
2519         btrfs_commit_transaction(trans, root);
2520
2521         backref->found_dir_index = 1;
2522         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2523         BUG_ON(IS_ERR(dir_rec));
2524         if (!dir_rec)
2525                 return 0;
2526         dir_rec->found_size += backref->namelen;
2527         if (dir_rec->found_size == dir_rec->isize &&
2528             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2529                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2530         if (dir_rec->found_size != dir_rec->isize)
2531                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2532
2533         return 0;
2534 }
2535
2536 static int delete_dir_index(struct btrfs_root *root,
2537                             struct cache_tree *inode_cache,
2538                             struct inode_record *rec,
2539                             struct inode_backref *backref)
2540 {
2541         struct btrfs_trans_handle *trans;
2542         struct btrfs_dir_item *di;
2543         struct btrfs_path path;
2544         int ret = 0;
2545
2546         trans = btrfs_start_transaction(root, 1);
2547         if (IS_ERR(trans))
2548                 return PTR_ERR(trans);
2549
2550         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2551                 (unsigned long long)backref->dir,
2552                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2553                 (unsigned long long)root->objectid);
2554
2555         btrfs_init_path(&path);
2556         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2557                                     backref->name, backref->namelen,
2558                                     backref->index, -1);
2559         if (IS_ERR(di)) {
2560                 ret = PTR_ERR(di);
2561                 btrfs_release_path(&path);
2562                 btrfs_commit_transaction(trans, root);
2563                 if (ret == -ENOENT)
2564                         return 0;
2565                 return ret;
2566         }
2567
2568         if (!di)
2569                 ret = btrfs_del_item(trans, root, &path);
2570         else
2571                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2572         BUG_ON(ret);
2573         btrfs_release_path(&path);
2574         btrfs_commit_transaction(trans, root);
2575         return ret;
2576 }
2577
2578 static int create_inode_item(struct btrfs_root *root,
2579                              struct inode_record *rec,
2580                              struct inode_backref *backref, int root_dir)
2581 {
2582         struct btrfs_trans_handle *trans;
2583         struct btrfs_inode_item inode_item;
2584         time_t now = time(NULL);
2585         int ret;
2586
2587         trans = btrfs_start_transaction(root, 1);
2588         if (IS_ERR(trans)) {
2589                 ret = PTR_ERR(trans);
2590                 return ret;
2591         }
2592
2593         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2594                 "be incomplete, please check permissions and content after "
2595                 "the fsck completes.\n", (unsigned long long)root->objectid,
2596                 (unsigned long long)rec->ino);
2597
2598         memset(&inode_item, 0, sizeof(inode_item));
2599         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2600         if (root_dir)
2601                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2602         else
2603                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2604         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2605         if (rec->found_dir_item) {
2606                 if (rec->found_file_extent)
2607                         fprintf(stderr, "root %llu inode %llu has both a dir "
2608                                 "item and extents, unsure if it is a dir or a "
2609                                 "regular file so setting it as a directory\n",
2610                                 (unsigned long long)root->objectid,
2611                                 (unsigned long long)rec->ino);
2612                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2613                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2614         } else if (!rec->found_dir_item) {
2615                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2616                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2617         }
2618         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2619         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2620         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2621         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2622         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2623         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2624         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2625         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2626
2627         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2628         BUG_ON(ret);
2629         btrfs_commit_transaction(trans, root);
2630         return 0;
2631 }
2632
2633 static int repair_inode_backrefs(struct btrfs_root *root,
2634                                  struct inode_record *rec,
2635                                  struct cache_tree *inode_cache,
2636                                  int delete)
2637 {
2638         struct inode_backref *tmp, *backref;
2639         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2640         int ret = 0;
2641         int repaired = 0;
2642
2643         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2644                 if (!delete && rec->ino == root_dirid) {
2645                         if (!rec->found_inode_item) {
2646                                 ret = create_inode_item(root, rec, backref, 1);
2647                                 if (ret)
2648                                         break;
2649                                 repaired++;
2650                         }
2651                 }
2652
2653                 /* Index 0 for root dir's are special, don't mess with it */
2654                 if (rec->ino == root_dirid && backref->index == 0)
2655                         continue;
2656
2657                 if (delete &&
2658                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2659                      (backref->found_dir_index && backref->found_inode_ref &&
2660                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2661                         ret = delete_dir_index(root, inode_cache, rec, backref);
2662                         if (ret)
2663                                 break;
2664                         repaired++;
2665                         list_del(&backref->list);
2666                         free(backref);
2667                 }
2668
2669                 if (!delete && !backref->found_dir_index &&
2670                     backref->found_dir_item && backref->found_inode_ref) {
2671                         ret = add_missing_dir_index(root, inode_cache, rec,
2672                                                     backref);
2673                         if (ret)
2674                                 break;
2675                         repaired++;
2676                         if (backref->found_dir_item &&
2677                             backref->found_dir_index &&
2678                             backref->found_dir_index) {
2679                                 if (!backref->errors &&
2680                                     backref->found_inode_ref) {
2681                                         list_del(&backref->list);
2682                                         free(backref);
2683                                 }
2684                         }
2685                 }
2686
2687                 if (!delete && (!backref->found_dir_index &&
2688                                 !backref->found_dir_item &&
2689                                 backref->found_inode_ref)) {
2690                         struct btrfs_trans_handle *trans;
2691                         struct btrfs_key location;
2692
2693                         ret = check_dir_conflict(root, backref->name,
2694                                                  backref->namelen,
2695                                                  backref->dir,
2696                                                  backref->index);
2697                         if (ret) {
2698                                 /*
2699                                  * let nlink fixing routine to handle it,
2700                                  * which can do it better.
2701                                  */
2702                                 ret = 0;
2703                                 break;
2704                         }
2705                         location.objectid = rec->ino;
2706                         location.type = BTRFS_INODE_ITEM_KEY;
2707                         location.offset = 0;
2708
2709                         trans = btrfs_start_transaction(root, 1);
2710                         if (IS_ERR(trans)) {
2711                                 ret = PTR_ERR(trans);
2712                                 break;
2713                         }
2714                         fprintf(stderr, "adding missing dir index/item pair "
2715                                 "for inode %llu\n",
2716                                 (unsigned long long)rec->ino);
2717                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2718                                                     backref->namelen,
2719                                                     backref->dir, &location,
2720                                                     imode_to_type(rec->imode),
2721                                                     backref->index);
2722                         BUG_ON(ret);
2723                         btrfs_commit_transaction(trans, root);
2724                         repaired++;
2725                 }
2726
2727                 if (!delete && (backref->found_inode_ref &&
2728                                 backref->found_dir_index &&
2729                                 backref->found_dir_item &&
2730                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2731                                 !rec->found_inode_item)) {
2732                         ret = create_inode_item(root, rec, backref, 0);
2733                         if (ret)
2734                                 break;
2735                         repaired++;
2736                 }
2737
2738         }
2739         return ret ? ret : repaired;
2740 }
2741
2742 /*
2743  * To determine the file type for nlink/inode_item repair
2744  *
2745  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2746  * Return -ENOENT if file type is not found.
2747  */
2748 static int find_file_type(struct inode_record *rec, u8 *type)
2749 {
2750         struct inode_backref *backref;
2751
2752         /* For inode item recovered case */
2753         if (rec->found_inode_item) {
2754                 *type = imode_to_type(rec->imode);
2755                 return 0;
2756         }
2757
2758         list_for_each_entry(backref, &rec->backrefs, list) {
2759                 if (backref->found_dir_index || backref->found_dir_item) {
2760                         *type = backref->filetype;
2761                         return 0;
2762                 }
2763         }
2764         return -ENOENT;
2765 }
2766
2767 /*
2768  * To determine the file name for nlink repair
2769  *
2770  * Return 0 if file name is found, set name and namelen.
2771  * Return -ENOENT if file name is not found.
2772  */
2773 static int find_file_name(struct inode_record *rec,
2774                           char *name, int *namelen)
2775 {
2776         struct inode_backref *backref;
2777
2778         list_for_each_entry(backref, &rec->backrefs, list) {
2779                 if (backref->found_dir_index || backref->found_dir_item ||
2780                     backref->found_inode_ref) {
2781                         memcpy(name, backref->name, backref->namelen);
2782                         *namelen = backref->namelen;
2783                         return 0;
2784                 }
2785         }
2786         return -ENOENT;
2787 }
2788
2789 /* Reset the nlink of the inode to the correct one */
2790 static int reset_nlink(struct btrfs_trans_handle *trans,
2791                        struct btrfs_root *root,
2792                        struct btrfs_path *path,
2793                        struct inode_record *rec)
2794 {
2795         struct inode_backref *backref;
2796         struct inode_backref *tmp;
2797         struct btrfs_key key;
2798         struct btrfs_inode_item *inode_item;
2799         int ret = 0;
2800
2801         /* We don't believe this either, reset it and iterate backref */
2802         rec->found_link = 0;
2803
2804         /* Remove all backref including the valid ones */
2805         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2806                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2807                                    backref->index, backref->name,
2808                                    backref->namelen, 0);
2809                 if (ret < 0)
2810                         goto out;
2811
2812                 /* remove invalid backref, so it won't be added back */
2813                 if (!(backref->found_dir_index &&
2814                       backref->found_dir_item &&
2815                       backref->found_inode_ref)) {
2816                         list_del(&backref->list);
2817                         free(backref);
2818                 } else {
2819                         rec->found_link++;
2820                 }
2821         }
2822
2823         /* Set nlink to 0 */
2824         key.objectid = rec->ino;
2825         key.type = BTRFS_INODE_ITEM_KEY;
2826         key.offset = 0;
2827         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2828         if (ret < 0)
2829                 goto out;
2830         if (ret > 0) {
2831                 ret = -ENOENT;
2832                 goto out;
2833         }
2834         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2835                                     struct btrfs_inode_item);
2836         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2837         btrfs_mark_buffer_dirty(path->nodes[0]);
2838         btrfs_release_path(path);
2839
2840         /*
2841          * Add back valid inode_ref/dir_item/dir_index,
2842          * add_link() will handle the nlink inc, so new nlink must be correct
2843          */
2844         list_for_each_entry(backref, &rec->backrefs, list) {
2845                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2846                                      backref->name, backref->namelen,
2847                                      backref->filetype, &backref->index, 1);
2848                 if (ret < 0)
2849                         goto out;
2850         }
2851 out:
2852         btrfs_release_path(path);
2853         return ret;
2854 }
2855
2856 static int get_highest_inode(struct btrfs_trans_handle *trans,
2857                                 struct btrfs_root *root,
2858                                 struct btrfs_path *path,
2859                                 u64 *highest_ino)
2860 {
2861         struct btrfs_key key, found_key;
2862         int ret;
2863
2864         btrfs_init_path(path);
2865         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2866         key.offset = -1;
2867         key.type = BTRFS_INODE_ITEM_KEY;
2868         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2869         if (ret == 1) {
2870                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2871                                 path->slots[0] - 1);
2872                 *highest_ino = found_key.objectid;
2873                 ret = 0;
2874         }
2875         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2876                 ret = -EOVERFLOW;
2877         btrfs_release_path(path);
2878         return ret;
2879 }
2880
2881 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2882                                struct btrfs_root *root,
2883                                struct btrfs_path *path,
2884                                struct inode_record *rec)
2885 {
2886         char *dir_name = "lost+found";
2887         char namebuf[BTRFS_NAME_LEN] = {0};
2888         u64 lost_found_ino;
2889         u32 mode = 0700;
2890         u8 type = 0;
2891         int namelen = 0;
2892         int name_recovered = 0;
2893         int type_recovered = 0;
2894         int ret = 0;
2895
2896         /*
2897          * Get file name and type first before these invalid inode ref
2898          * are deleted by remove_all_invalid_backref()
2899          */
2900         name_recovered = !find_file_name(rec, namebuf, &namelen);
2901         type_recovered = !find_file_type(rec, &type);
2902
2903         if (!name_recovered) {
2904                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2905                        rec->ino, rec->ino);
2906                 namelen = count_digits(rec->ino);
2907                 sprintf(namebuf, "%llu", rec->ino);
2908                 name_recovered = 1;
2909         }
2910         if (!type_recovered) {
2911                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2912                        rec->ino);
2913                 type = BTRFS_FT_REG_FILE;
2914                 type_recovered = 1;
2915         }
2916
2917         ret = reset_nlink(trans, root, path, rec);
2918         if (ret < 0) {
2919                 fprintf(stderr,
2920                         "Failed to reset nlink for inode %llu: %s\n",
2921                         rec->ino, strerror(-ret));
2922                 goto out;
2923         }
2924
2925         if (rec->found_link == 0) {
2926                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2927                 if (ret < 0)
2928                         goto out;
2929                 lost_found_ino++;
2930                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2931                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2932                                   mode);
2933                 if (ret < 0) {
2934                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2935                                 dir_name, strerror(-ret));
2936                         goto out;
2937                 }
2938                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2939                                      namebuf, namelen, type, NULL, 1);
2940                 /*
2941                  * Add ".INO" suffix several times to handle case where
2942                  * "FILENAME.INO" is already taken by another file.
2943                  */
2944                 while (ret == -EEXIST) {
2945                         /*
2946                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2947                          */
2948                         if (namelen + count_digits(rec->ino) + 1 >
2949                             BTRFS_NAME_LEN) {
2950                                 ret = -EFBIG;
2951                                 goto out;
2952                         }
2953                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2954                                  ".%llu", rec->ino);
2955                         namelen += count_digits(rec->ino) + 1;
2956                         ret = btrfs_add_link(trans, root, rec->ino,
2957                                              lost_found_ino, namebuf,
2958                                              namelen, type, NULL, 1);
2959                 }
2960                 if (ret < 0) {
2961                         fprintf(stderr,
2962                                 "Failed to link the inode %llu to %s dir: %s\n",
2963                                 rec->ino, dir_name, strerror(-ret));
2964                         goto out;
2965                 }
2966                 /*
2967                  * Just increase the found_link, don't actually add the
2968                  * backref. This will make things easier and this inode
2969                  * record will be freed after the repair is done.
2970                  * So fsck will not report problem about this inode.
2971                  */
2972                 rec->found_link++;
2973                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2974                        namelen, namebuf, dir_name);
2975         }
2976         printf("Fixed the nlink of inode %llu\n", rec->ino);
2977 out:
2978         /*
2979          * Clear the flag anyway, or we will loop forever for the same inode
2980          * as it will not be removed from the bad inode list and the dead loop
2981          * happens.
2982          */
2983         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2984         btrfs_release_path(path);
2985         return ret;
2986 }
2987
2988 /*
2989  * Check if there is any normal(reg or prealloc) file extent for given
2990  * ino.
2991  * This is used to determine the file type when neither its dir_index/item or
2992  * inode_item exists.
2993  *
2994  * This will *NOT* report error, if any error happens, just consider it does
2995  * not have any normal file extent.
2996  */
2997 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2998 {
2999         struct btrfs_path path;
3000         struct btrfs_key key;
3001         struct btrfs_key found_key;
3002         struct btrfs_file_extent_item *fi;
3003         u8 type;
3004         int ret = 0;
3005
3006         btrfs_init_path(&path);
3007         key.objectid = ino;
3008         key.type = BTRFS_EXTENT_DATA_KEY;
3009         key.offset = 0;
3010
3011         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3012         if (ret < 0) {
3013                 ret = 0;
3014                 goto out;
3015         }
3016         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3017                 ret = btrfs_next_leaf(root, &path);
3018                 if (ret) {
3019                         ret = 0;
3020                         goto out;
3021                 }
3022         }
3023         while (1) {
3024                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3025                                       path.slots[0]);
3026                 if (found_key.objectid != ino ||
3027                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3028                         break;
3029                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3030                                     struct btrfs_file_extent_item);
3031                 type = btrfs_file_extent_type(path.nodes[0], fi);
3032                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3033                         ret = 1;
3034                         goto out;
3035                 }
3036         }
3037 out:
3038         btrfs_release_path(&path);
3039         return ret;
3040 }
3041
3042 static u32 btrfs_type_to_imode(u8 type)
3043 {
3044         static u32 imode_by_btrfs_type[] = {
3045                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3046                 [BTRFS_FT_DIR]          = S_IFDIR,
3047                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3048                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3049                 [BTRFS_FT_FIFO]         = S_IFIFO,
3050                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3051                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3052         };
3053
3054         return imode_by_btrfs_type[(type)];
3055 }
3056
3057 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3058                                 struct btrfs_root *root,
3059                                 struct btrfs_path *path,
3060                                 struct inode_record *rec)
3061 {
3062         u8 filetype;
3063         u32 mode = 0700;
3064         int type_recovered = 0;
3065         int ret = 0;
3066
3067         printf("Trying to rebuild inode:%llu\n", rec->ino);
3068
3069         type_recovered = !find_file_type(rec, &filetype);
3070
3071         /*
3072          * Try to determine inode type if type not found.
3073          *
3074          * For found regular file extent, it must be FILE.
3075          * For found dir_item/index, it must be DIR.
3076          *
3077          * For undetermined one, use FILE as fallback.
3078          *
3079          * TODO:
3080          * 1. If found backref(inode_index/item is already handled) to it,
3081          *    it must be DIR.
3082          *    Need new inode-inode ref structure to allow search for that.
3083          */
3084         if (!type_recovered) {
3085                 if (rec->found_file_extent &&
3086                     find_normal_file_extent(root, rec->ino)) {
3087                         type_recovered = 1;
3088                         filetype = BTRFS_FT_REG_FILE;
3089                 } else if (rec->found_dir_item) {
3090                         type_recovered = 1;
3091                         filetype = BTRFS_FT_DIR;
3092                 } else if (!list_empty(&rec->orphan_extents)) {
3093                         type_recovered = 1;
3094                         filetype = BTRFS_FT_REG_FILE;
3095                 } else{
3096                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3097                                rec->ino);
3098                         type_recovered = 1;
3099                         filetype = BTRFS_FT_REG_FILE;
3100                 }
3101         }
3102
3103         ret = btrfs_new_inode(trans, root, rec->ino,
3104                               mode | btrfs_type_to_imode(filetype));
3105         if (ret < 0)
3106                 goto out;
3107
3108         /*
3109          * Here inode rebuild is done, we only rebuild the inode item,
3110          * don't repair the nlink(like move to lost+found).
3111          * That is the job of nlink repair.
3112          *
3113          * We just fill the record and return
3114          */
3115         rec->found_dir_item = 1;
3116         rec->imode = mode | btrfs_type_to_imode(filetype);
3117         rec->nlink = 0;
3118         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3119         /* Ensure the inode_nlinks repair function will be called */
3120         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3121 out:
3122         return ret;
3123 }
3124
3125 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3126                                       struct btrfs_root *root,
3127                                       struct btrfs_path *path,
3128                                       struct inode_record *rec)
3129 {
3130         struct orphan_data_extent *orphan;
3131         struct orphan_data_extent *tmp;
3132         int ret = 0;
3133
3134         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3135                 /*
3136                  * Check for conflicting file extents
3137                  *
3138                  * Here we don't know whether the extents is compressed or not,
3139                  * so we can only assume it not compressed nor data offset,
3140                  * and use its disk_len as extent length.
3141                  */
3142                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3143                                        orphan->offset, orphan->disk_len, 0);
3144                 btrfs_release_path(path);
3145                 if (ret < 0)
3146                         goto out;
3147                 if (!ret) {
3148                         fprintf(stderr,
3149                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3150                                 orphan->disk_bytenr, orphan->disk_len);
3151                         ret = btrfs_free_extent(trans,
3152                                         root->fs_info->extent_root,
3153                                         orphan->disk_bytenr, orphan->disk_len,
3154                                         0, root->objectid, orphan->objectid,
3155                                         orphan->offset);
3156                         if (ret < 0)
3157                                 goto out;
3158                 }
3159                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3160                                 orphan->offset, orphan->disk_bytenr,
3161                                 orphan->disk_len, orphan->disk_len);
3162                 if (ret < 0)
3163                         goto out;
3164
3165                 /* Update file size info */
3166                 rec->found_size += orphan->disk_len;
3167                 if (rec->found_size == rec->nbytes)
3168                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3169
3170                 /* Update the file extent hole info too */
3171                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3172                                            orphan->disk_len);
3173                 if (ret < 0)
3174                         goto out;
3175                 if (RB_EMPTY_ROOT(&rec->holes))
3176                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3177
3178                 list_del(&orphan->list);
3179                 free(orphan);
3180         }
3181         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3182 out:
3183         return ret;
3184 }
3185
3186 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3187                                         struct btrfs_root *root,
3188                                         struct btrfs_path *path,
3189                                         struct inode_record *rec)
3190 {
3191         struct rb_node *node;
3192         struct file_extent_hole *hole;
3193         int found = 0;
3194         int ret = 0;
3195
3196         node = rb_first(&rec->holes);
3197
3198         while (node) {
3199                 found = 1;
3200                 hole = rb_entry(node, struct file_extent_hole, node);
3201                 ret = btrfs_punch_hole(trans, root, rec->ino,
3202                                        hole->start, hole->len);
3203                 if (ret < 0)
3204                         goto out;
3205                 ret = del_file_extent_hole(&rec->holes, hole->start,
3206                                            hole->len);
3207                 if (ret < 0)
3208                         goto out;
3209                 if (RB_EMPTY_ROOT(&rec->holes))
3210                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3211                 node = rb_first(&rec->holes);
3212         }
3213         /* special case for a file losing all its file extent */
3214         if (!found) {
3215                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3216                                        round_up(rec->isize, root->sectorsize));
3217                 if (ret < 0)
3218                         goto out;
3219         }
3220         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3221                rec->ino, root->objectid);
3222 out:
3223         return ret;
3224 }
3225
3226 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3227 {
3228         struct btrfs_trans_handle *trans;
3229         struct btrfs_path path;
3230         int ret = 0;
3231
3232         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3233                              I_ERR_NO_ORPHAN_ITEM |
3234                              I_ERR_LINK_COUNT_WRONG |
3235                              I_ERR_NO_INODE_ITEM |
3236                              I_ERR_FILE_EXTENT_ORPHAN |
3237                              I_ERR_FILE_EXTENT_DISCOUNT|
3238                              I_ERR_FILE_NBYTES_WRONG)))
3239                 return rec->errors;
3240
3241         /*
3242          * For nlink repair, it may create a dir and add link, so
3243          * 2 for parent(256)'s dir_index and dir_item
3244          * 2 for lost+found dir's inode_item and inode_ref
3245          * 1 for the new inode_ref of the file
3246          * 2 for lost+found dir's dir_index and dir_item for the file
3247          */
3248         trans = btrfs_start_transaction(root, 7);
3249         if (IS_ERR(trans))
3250                 return PTR_ERR(trans);
3251
3252         btrfs_init_path(&path);
3253         if (rec->errors & I_ERR_NO_INODE_ITEM)
3254                 ret = repair_inode_no_item(trans, root, &path, rec);
3255         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3256                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3257         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3258                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3259         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3260                 ret = repair_inode_isize(trans, root, &path, rec);
3261         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3262                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3263         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3264                 ret = repair_inode_nlinks(trans, root, &path, rec);
3265         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3266                 ret = repair_inode_nbytes(trans, root, &path, rec);
3267         btrfs_commit_transaction(trans, root);
3268         btrfs_release_path(&path);
3269         return ret;
3270 }
3271
3272 static int check_inode_recs(struct btrfs_root *root,
3273                             struct cache_tree *inode_cache)
3274 {
3275         struct cache_extent *cache;
3276         struct ptr_node *node;
3277         struct inode_record *rec;
3278         struct inode_backref *backref;
3279         int stage = 0;
3280         int ret = 0;
3281         int err = 0;
3282         u64 error = 0;
3283         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3284
3285         if (btrfs_root_refs(&root->root_item) == 0) {
3286                 if (!cache_tree_empty(inode_cache))
3287                         fprintf(stderr, "warning line %d\n", __LINE__);
3288                 return 0;
3289         }
3290
3291         /*
3292          * We need to repair backrefs first because we could change some of the
3293          * errors in the inode recs.
3294          *
3295          * We also need to go through and delete invalid backrefs first and then
3296          * add the correct ones second.  We do this because we may get EEXIST
3297          * when adding back the correct index because we hadn't yet deleted the
3298          * invalid index.
3299          *
3300          * For example, if we were missing a dir index then the directories
3301          * isize would be wrong, so if we fixed the isize to what we thought it
3302          * would be and then fixed the backref we'd still have a invalid fs, so
3303          * we need to add back the dir index and then check to see if the isize
3304          * is still wrong.
3305          */
3306         while (stage < 3) {
3307                 stage++;
3308                 if (stage == 3 && !err)
3309                         break;
3310
3311                 cache = search_cache_extent(inode_cache, 0);
3312                 while (repair && cache) {
3313                         node = container_of(cache, struct ptr_node, cache);
3314                         rec = node->data;
3315                         cache = next_cache_extent(cache);
3316
3317                         /* Need to free everything up and rescan */
3318                         if (stage == 3) {
3319                                 remove_cache_extent(inode_cache, &node->cache);
3320                                 free(node);
3321                                 free_inode_rec(rec);
3322                                 continue;
3323                         }
3324
3325                         if (list_empty(&rec->backrefs))
3326                                 continue;
3327
3328                         ret = repair_inode_backrefs(root, rec, inode_cache,
3329                                                     stage == 1);
3330                         if (ret < 0) {
3331                                 err = ret;
3332                                 stage = 2;
3333                                 break;
3334                         } if (ret > 0) {
3335                                 err = -EAGAIN;
3336                         }
3337                 }
3338         }
3339         if (err)
3340                 return err;
3341
3342         rec = get_inode_rec(inode_cache, root_dirid, 0);
3343         BUG_ON(IS_ERR(rec));
3344         if (rec) {
3345                 ret = check_root_dir(rec);
3346                 if (ret) {
3347                         fprintf(stderr, "root %llu root dir %llu error\n",
3348                                 (unsigned long long)root->root_key.objectid,
3349                                 (unsigned long long)root_dirid);
3350                         print_inode_error(root, rec);
3351                         error++;
3352                 }
3353         } else {
3354                 if (repair) {
3355                         struct btrfs_trans_handle *trans;
3356
3357                         trans = btrfs_start_transaction(root, 1);
3358                         if (IS_ERR(trans)) {
3359                                 err = PTR_ERR(trans);
3360                                 return err;
3361                         }
3362
3363                         fprintf(stderr,
3364                                 "root %llu missing its root dir, recreating\n",
3365                                 (unsigned long long)root->objectid);
3366
3367                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3368                         BUG_ON(ret);
3369
3370                         btrfs_commit_transaction(trans, root);
3371                         return -EAGAIN;
3372                 }
3373
3374                 fprintf(stderr, "root %llu root dir %llu not found\n",
3375                         (unsigned long long)root->root_key.objectid,
3376                         (unsigned long long)root_dirid);
3377         }
3378
3379         while (1) {
3380                 cache = search_cache_extent(inode_cache, 0);
3381                 if (!cache)
3382                         break;
3383                 node = container_of(cache, struct ptr_node, cache);
3384                 rec = node->data;
3385                 remove_cache_extent(inode_cache, &node->cache);
3386                 free(node);
3387                 if (rec->ino == root_dirid ||
3388                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3389                         free_inode_rec(rec);
3390                         continue;
3391                 }
3392
3393                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3394                         ret = check_orphan_item(root, rec->ino);
3395                         if (ret == 0)
3396                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3397                         if (can_free_inode_rec(rec)) {
3398                                 free_inode_rec(rec);
3399                                 continue;
3400                         }
3401                 }
3402
3403                 if (!rec->found_inode_item)
3404                         rec->errors |= I_ERR_NO_INODE_ITEM;
3405                 if (rec->found_link != rec->nlink)
3406                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3407                 if (repair) {
3408                         ret = try_repair_inode(root, rec);
3409                         if (ret == 0 && can_free_inode_rec(rec)) {
3410                                 free_inode_rec(rec);
3411                                 continue;
3412                         }
3413                         ret = 0;
3414                 }
3415
3416                 if (!(repair && ret == 0))
3417                         error++;
3418                 print_inode_error(root, rec);
3419                 list_for_each_entry(backref, &rec->backrefs, list) {
3420                         if (!backref->found_dir_item)
3421                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3422                         if (!backref->found_dir_index)
3423                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3424                         if (!backref->found_inode_ref)
3425                                 backref->errors |= REF_ERR_NO_INODE_REF;
3426                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3427                                 " namelen %u name %s filetype %d errors %x",
3428                                 (unsigned long long)backref->dir,
3429                                 (unsigned long long)backref->index,
3430                                 backref->namelen, backref->name,
3431                                 backref->filetype, backref->errors);
3432                         print_ref_error(backref->errors);
3433                 }
3434                 free_inode_rec(rec);
3435         }
3436         return (error > 0) ? -1 : 0;
3437 }
3438
3439 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3440                                         u64 objectid)
3441 {
3442         struct cache_extent *cache;
3443         struct root_record *rec = NULL;
3444         int ret;
3445
3446         cache = lookup_cache_extent(root_cache, objectid, 1);
3447         if (cache) {
3448                 rec = container_of(cache, struct root_record, cache);
3449         } else {
3450                 rec = calloc(1, sizeof(*rec));
3451                 if (!rec)
3452                         return ERR_PTR(-ENOMEM);
3453                 rec->objectid = objectid;
3454                 INIT_LIST_HEAD(&rec->backrefs);
3455                 rec->cache.start = objectid;
3456                 rec->cache.size = 1;
3457
3458                 ret = insert_cache_extent(root_cache, &rec->cache);
3459                 if (ret)
3460                         return ERR_PTR(-EEXIST);
3461         }
3462         return rec;
3463 }
3464
3465 static struct root_backref *get_root_backref(struct root_record *rec,
3466                                              u64 ref_root, u64 dir, u64 index,
3467                                              const char *name, int namelen)
3468 {
3469         struct root_backref *backref;
3470
3471         list_for_each_entry(backref, &rec->backrefs, list) {
3472                 if (backref->ref_root != ref_root || backref->dir != dir ||
3473                     backref->namelen != namelen)
3474                         continue;
3475                 if (memcmp(name, backref->name, namelen))
3476                         continue;
3477                 return backref;
3478         }
3479
3480         backref = calloc(1, sizeof(*backref) + namelen + 1);
3481         if (!backref)
3482                 return NULL;
3483         backref->ref_root = ref_root;
3484         backref->dir = dir;
3485         backref->index = index;
3486         backref->namelen = namelen;
3487         memcpy(backref->name, name, namelen);
3488         backref->name[namelen] = '\0';
3489         list_add_tail(&backref->list, &rec->backrefs);
3490         return backref;
3491 }
3492
3493 static void free_root_record(struct cache_extent *cache)
3494 {
3495         struct root_record *rec;
3496         struct root_backref *backref;
3497
3498         rec = container_of(cache, struct root_record, cache);
3499         while (!list_empty(&rec->backrefs)) {
3500                 backref = to_root_backref(rec->backrefs.next);
3501                 list_del(&backref->list);
3502                 free(backref);
3503         }
3504
3505         free(rec);
3506 }
3507
3508 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3509
3510 static int add_root_backref(struct cache_tree *root_cache,
3511                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3512                             const char *name, int namelen,
3513                             int item_type, int errors)
3514 {
3515         struct root_record *rec;
3516         struct root_backref *backref;
3517
3518         rec = get_root_rec(root_cache, root_id);
3519         BUG_ON(IS_ERR(rec));
3520         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3521         BUG_ON(!backref);
3522
3523         backref->errors |= errors;
3524
3525         if (item_type != BTRFS_DIR_ITEM_KEY) {
3526                 if (backref->found_dir_index || backref->found_back_ref ||
3527                     backref->found_forward_ref) {
3528                         if (backref->index != index)
3529                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3530                 } else {
3531                         backref->index = index;
3532                 }
3533         }
3534
3535         if (item_type == BTRFS_DIR_ITEM_KEY) {
3536                 if (backref->found_forward_ref)
3537                         rec->found_ref++;
3538                 backref->found_dir_item = 1;
3539         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3540                 backref->found_dir_index = 1;
3541         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3542                 if (backref->found_forward_ref)
3543                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3544                 else if (backref->found_dir_item)
3545                         rec->found_ref++;
3546                 backref->found_forward_ref = 1;
3547         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3548                 if (backref->found_back_ref)
3549                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3550                 backref->found_back_ref = 1;
3551         } else {
3552                 BUG_ON(1);
3553         }
3554
3555         if (backref->found_forward_ref && backref->found_dir_item)
3556                 backref->reachable = 1;
3557         return 0;
3558 }
3559
3560 static int merge_root_recs(struct btrfs_root *root,
3561                            struct cache_tree *src_cache,
3562                            struct cache_tree *dst_cache)
3563 {
3564         struct cache_extent *cache;
3565         struct ptr_node *node;
3566         struct inode_record *rec;
3567         struct inode_backref *backref;
3568         int ret = 0;
3569
3570         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3571                 free_inode_recs_tree(src_cache);
3572                 return 0;
3573         }
3574
3575         while (1) {
3576                 cache = search_cache_extent(src_cache, 0);
3577                 if (!cache)
3578                         break;
3579                 node = container_of(cache, struct ptr_node, cache);
3580                 rec = node->data;
3581                 remove_cache_extent(src_cache, &node->cache);
3582                 free(node);
3583
3584                 ret = is_child_root(root, root->objectid, rec->ino);
3585                 if (ret < 0)
3586                         break;
3587                 else if (ret == 0)
3588                         goto skip;
3589
3590                 list_for_each_entry(backref, &rec->backrefs, list) {
3591                         BUG_ON(backref->found_inode_ref);
3592                         if (backref->found_dir_item)
3593                                 add_root_backref(dst_cache, rec->ino,
3594                                         root->root_key.objectid, backref->dir,
3595                                         backref->index, backref->name,
3596                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3597                                         backref->errors);
3598                         if (backref->found_dir_index)
3599                                 add_root_backref(dst_cache, rec->ino,
3600                                         root->root_key.objectid, backref->dir,
3601                                         backref->index, backref->name,
3602                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3603                                         backref->errors);
3604                 }
3605 skip:
3606                 free_inode_rec(rec);
3607         }
3608         if (ret < 0)
3609                 return ret;
3610         return 0;
3611 }
3612
3613 static int check_root_refs(struct btrfs_root *root,
3614                            struct cache_tree *root_cache)
3615 {
3616         struct root_record *rec;
3617         struct root_record *ref_root;
3618         struct root_backref *backref;
3619         struct cache_extent *cache;
3620         int loop = 1;
3621         int ret;
3622         int error;
3623         int errors = 0;
3624
3625         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3626         BUG_ON(IS_ERR(rec));
3627         rec->found_ref = 1;
3628
3629         /* fixme: this can not detect circular references */
3630         while (loop) {
3631                 loop = 0;
3632                 cache = search_cache_extent(root_cache, 0);
3633                 while (1) {
3634                         if (!cache)
3635                                 break;
3636                         rec = container_of(cache, struct root_record, cache);
3637                         cache = next_cache_extent(cache);
3638
3639                         if (rec->found_ref == 0)
3640                                 continue;
3641
3642                         list_for_each_entry(backref, &rec->backrefs, list) {
3643                                 if (!backref->reachable)
3644                                         continue;
3645
3646                                 ref_root = get_root_rec(root_cache,
3647                                                         backref->ref_root);
3648                                 BUG_ON(IS_ERR(ref_root));
3649                                 if (ref_root->found_ref > 0)
3650                                         continue;
3651
3652                                 backref->reachable = 0;
3653                                 rec->found_ref--;
3654                                 if (rec->found_ref == 0)
3655                                         loop = 1;
3656                         }
3657                 }
3658         }
3659
3660         cache = search_cache_extent(root_cache, 0);
3661         while (1) {
3662                 if (!cache)
3663                         break;
3664                 rec = container_of(cache, struct root_record, cache);
3665                 cache = next_cache_extent(cache);
3666
3667                 if (rec->found_ref == 0 &&
3668                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3669                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3670                         ret = check_orphan_item(root->fs_info->tree_root,
3671                                                 rec->objectid);
3672                         if (ret == 0)
3673                                 continue;
3674
3675                         /*
3676                          * If we don't have a root item then we likely just have
3677                          * a dir item in a snapshot for this root but no actual
3678                          * ref key or anything so it's meaningless.
3679                          */
3680                         if (!rec->found_root_item)
3681                                 continue;
3682                         errors++;
3683                         fprintf(stderr, "fs tree %llu not referenced\n",
3684                                 (unsigned long long)rec->objectid);
3685                 }
3686
3687                 error = 0;
3688                 if (rec->found_ref > 0 && !rec->found_root_item)
3689                         error = 1;
3690                 list_for_each_entry(backref, &rec->backrefs, list) {
3691                         if (!backref->found_dir_item)
3692                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3693                         if (!backref->found_dir_index)
3694                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3695                         if (!backref->found_back_ref)
3696                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3697                         if (!backref->found_forward_ref)
3698                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3699                         if (backref->reachable && backref->errors)
3700                                 error = 1;
3701                 }
3702                 if (!error)
3703                         continue;
3704
3705                 errors++;
3706                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3707                         (unsigned long long)rec->objectid, rec->found_ref,
3708                          rec->found_root_item ? "" : "not found");
3709
3710                 list_for_each_entry(backref, &rec->backrefs, list) {
3711                         if (!backref->reachable)
3712                                 continue;
3713                         if (!backref->errors && rec->found_root_item)
3714                                 continue;
3715                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3716                                 " index %llu namelen %u name %s errors %x\n",
3717                                 (unsigned long long)backref->ref_root,
3718                                 (unsigned long long)backref->dir,
3719                                 (unsigned long long)backref->index,
3720                                 backref->namelen, backref->name,
3721                                 backref->errors);
3722                         print_ref_error(backref->errors);
3723                 }
3724         }
3725         return errors > 0 ? 1 : 0;
3726 }
3727
3728 static int process_root_ref(struct extent_buffer *eb, int slot,
3729                             struct btrfs_key *key,
3730                             struct cache_tree *root_cache)
3731 {
3732         u64 dirid;
3733         u64 index;
3734         u32 len;
3735         u32 name_len;
3736         struct btrfs_root_ref *ref;
3737         char namebuf[BTRFS_NAME_LEN];
3738         int error;
3739
3740         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3741
3742         dirid = btrfs_root_ref_dirid(eb, ref);
3743         index = btrfs_root_ref_sequence(eb, ref);
3744         name_len = btrfs_root_ref_name_len(eb, ref);
3745
3746         if (name_len <= BTRFS_NAME_LEN) {
3747                 len = name_len;
3748                 error = 0;
3749         } else {
3750                 len = BTRFS_NAME_LEN;
3751                 error = REF_ERR_NAME_TOO_LONG;
3752         }
3753         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3754
3755         if (key->type == BTRFS_ROOT_REF_KEY) {
3756                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3757                                  index, namebuf, len, key->type, error);
3758         } else {
3759                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3760                                  index, namebuf, len, key->type, error);
3761         }
3762         return 0;
3763 }
3764
3765 static void free_corrupt_block(struct cache_extent *cache)
3766 {
3767         struct btrfs_corrupt_block *corrupt;
3768
3769         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3770         free(corrupt);
3771 }
3772
3773 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3774
3775 /*
3776  * Repair the btree of the given root.
3777  *
3778  * The fix is to remove the node key in corrupt_blocks cache_tree.
3779  * and rebalance the tree.
3780  * After the fix, the btree should be writeable.
3781  */
3782 static int repair_btree(struct btrfs_root *root,
3783                         struct cache_tree *corrupt_blocks)
3784 {
3785         struct btrfs_trans_handle *trans;
3786         struct btrfs_path path;
3787         struct btrfs_corrupt_block *corrupt;
3788         struct cache_extent *cache;
3789         struct btrfs_key key;
3790         u64 offset;
3791         int level;
3792         int ret = 0;
3793
3794         if (cache_tree_empty(corrupt_blocks))
3795                 return 0;
3796
3797         trans = btrfs_start_transaction(root, 1);
3798         if (IS_ERR(trans)) {
3799                 ret = PTR_ERR(trans);
3800                 fprintf(stderr, "Error starting transaction: %s\n",
3801                         strerror(-ret));
3802                 return ret;
3803         }
3804         btrfs_init_path(&path);
3805         cache = first_cache_extent(corrupt_blocks);
3806         while (cache) {
3807                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3808                                        cache);
3809                 level = corrupt->level;
3810                 path.lowest_level = level;
3811                 key.objectid = corrupt->key.objectid;
3812                 key.type = corrupt->key.type;
3813                 key.offset = corrupt->key.offset;
3814
3815                 /*
3816                  * Here we don't want to do any tree balance, since it may
3817                  * cause a balance with corrupted brother leaf/node,
3818                  * so ins_len set to 0 here.
3819                  * Balance will be done after all corrupt node/leaf is deleted.
3820                  */
3821                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3822                 if (ret < 0)
3823                         goto out;
3824                 offset = btrfs_node_blockptr(path.nodes[level],
3825                                              path.slots[level]);
3826
3827                 /* Remove the ptr */
3828                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3829                 if (ret < 0)
3830                         goto out;
3831                 /*
3832                  * Remove the corresponding extent
3833                  * return value is not concerned.
3834                  */
3835                 btrfs_release_path(&path);
3836                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3837                                         0, root->root_key.objectid,
3838                                         level - 1, 0);
3839                 cache = next_cache_extent(cache);
3840         }
3841
3842         /* Balance the btree using btrfs_search_slot() */
3843         cache = first_cache_extent(corrupt_blocks);
3844         while (cache) {
3845                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3846                                        cache);
3847                 memcpy(&key, &corrupt->key, sizeof(key));
3848                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3849                 if (ret < 0)
3850                         goto out;
3851                 /* return will always >0 since it won't find the item */
3852                 ret = 0;
3853                 btrfs_release_path(&path);
3854                 cache = next_cache_extent(cache);
3855         }
3856 out:
3857         btrfs_commit_transaction(trans, root);
3858         btrfs_release_path(&path);
3859         return ret;
3860 }
3861
3862 static int check_fs_root(struct btrfs_root *root,
3863                          struct cache_tree *root_cache,
3864                          struct walk_control *wc)
3865 {
3866         int ret = 0;
3867         int err = 0;
3868         int wret;
3869         int level;
3870         struct btrfs_path path;
3871         struct shared_node root_node;
3872         struct root_record *rec;
3873         struct btrfs_root_item *root_item = &root->root_item;
3874         struct cache_tree corrupt_blocks;
3875         struct orphan_data_extent *orphan;
3876         struct orphan_data_extent *tmp;
3877         enum btrfs_tree_block_status status;
3878         struct node_refs nrefs;
3879
3880         /*
3881          * Reuse the corrupt_block cache tree to record corrupted tree block
3882          *
3883          * Unlike the usage in extent tree check, here we do it in a per
3884          * fs/subvol tree base.
3885          */
3886         cache_tree_init(&corrupt_blocks);
3887         root->fs_info->corrupt_blocks = &corrupt_blocks;
3888
3889         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3890                 rec = get_root_rec(root_cache, root->root_key.objectid);
3891                 BUG_ON(IS_ERR(rec));
3892                 if (btrfs_root_refs(root_item) > 0)
3893                         rec->found_root_item = 1;
3894         }
3895
3896         btrfs_init_path(&path);
3897         memset(&root_node, 0, sizeof(root_node));
3898         cache_tree_init(&root_node.root_cache);
3899         cache_tree_init(&root_node.inode_cache);
3900         memset(&nrefs, 0, sizeof(nrefs));
3901
3902         /* Move the orphan extent record to corresponding inode_record */
3903         list_for_each_entry_safe(orphan, tmp,
3904                                  &root->orphan_data_extents, list) {
3905                 struct inode_record *inode;
3906
3907                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3908                                       1);
3909                 BUG_ON(IS_ERR(inode));
3910                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3911                 list_move(&orphan->list, &inode->orphan_extents);
3912         }
3913
3914         level = btrfs_header_level(root->node);
3915         memset(wc->nodes, 0, sizeof(wc->nodes));
3916         wc->nodes[level] = &root_node;
3917         wc->active_node = level;
3918         wc->root_level = level;
3919
3920         /* We may not have checked the root block, lets do that now */
3921         if (btrfs_is_leaf(root->node))
3922                 status = btrfs_check_leaf(root, NULL, root->node);
3923         else
3924                 status = btrfs_check_node(root, NULL, root->node);
3925         if (status != BTRFS_TREE_BLOCK_CLEAN)
3926                 return -EIO;
3927
3928         if (btrfs_root_refs(root_item) > 0 ||
3929             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3930                 path.nodes[level] = root->node;
3931                 extent_buffer_get(root->node);
3932                 path.slots[level] = 0;
3933         } else {
3934                 struct btrfs_key key;
3935                 struct btrfs_disk_key found_key;
3936
3937                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3938                 level = root_item->drop_level;
3939                 path.lowest_level = level;
3940                 if (level > btrfs_header_level(root->node) ||
3941                     level >= BTRFS_MAX_LEVEL) {
3942                         error("ignoring invalid drop level: %u", level);
3943                         goto skip_walking;
3944                 }
3945                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3946                 if (wret < 0)
3947                         goto skip_walking;
3948                 btrfs_node_key(path.nodes[level], &found_key,
3949                                 path.slots[level]);
3950                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3951                                         sizeof(found_key)));
3952         }
3953
3954         while (1) {
3955                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3956                 if (wret < 0)
3957                         ret = wret;
3958                 if (wret != 0)
3959                         break;
3960
3961                 wret = walk_up_tree(root, &path, wc, &level);
3962                 if (wret < 0)
3963                         ret = wret;
3964                 if (wret != 0)
3965                         break;
3966         }
3967 skip_walking:
3968         btrfs_release_path(&path);
3969
3970         if (!cache_tree_empty(&corrupt_blocks)) {
3971                 struct cache_extent *cache;
3972                 struct btrfs_corrupt_block *corrupt;
3973
3974                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3975                        root->root_key.objectid);
3976                 cache = first_cache_extent(&corrupt_blocks);
3977                 while (cache) {
3978                         corrupt = container_of(cache,
3979                                                struct btrfs_corrupt_block,
3980                                                cache);
3981                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3982                                cache->start, corrupt->level,
3983                                corrupt->key.objectid, corrupt->key.type,
3984                                corrupt->key.offset);
3985                         cache = next_cache_extent(cache);
3986                 }
3987                 if (repair) {
3988                         printf("Try to repair the btree for root %llu\n",
3989                                root->root_key.objectid);
3990                         ret = repair_btree(root, &corrupt_blocks);
3991                         if (ret < 0)
3992                                 fprintf(stderr, "Failed to repair btree: %s\n",
3993                                         strerror(-ret));
3994                         if (!ret)
3995                                 printf("Btree for root %llu is fixed\n",
3996                                        root->root_key.objectid);
3997                 }
3998         }
3999
4000         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4001         if (err < 0)
4002                 ret = err;
4003
4004         if (root_node.current) {
4005                 root_node.current->checked = 1;
4006                 maybe_free_inode_rec(&root_node.inode_cache,
4007                                 root_node.current);
4008         }
4009
4010         err = check_inode_recs(root, &root_node.inode_cache);
4011         if (!ret)
4012                 ret = err;
4013
4014         free_corrupt_blocks_tree(&corrupt_blocks);
4015         root->fs_info->corrupt_blocks = NULL;
4016         free_orphan_data_extents(&root->orphan_data_extents);
4017         return ret;
4018 }
4019
4020 static int fs_root_objectid(u64 objectid)
4021 {
4022         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4023             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4024                 return 1;
4025         return is_fstree(objectid);
4026 }
4027
4028 static int check_fs_roots(struct btrfs_root *root,
4029                           struct cache_tree *root_cache)
4030 {
4031         struct btrfs_path path;
4032         struct btrfs_key key;
4033         struct walk_control wc;
4034         struct extent_buffer *leaf, *tree_node;
4035         struct btrfs_root *tmp_root;
4036         struct btrfs_root *tree_root = root->fs_info->tree_root;
4037         int ret;
4038         int err = 0;
4039
4040         if (ctx.progress_enabled) {
4041                 ctx.tp = TASK_FS_ROOTS;
4042                 task_start(ctx.info);
4043         }
4044
4045         /*
4046          * Just in case we made any changes to the extent tree that weren't
4047          * reflected into the free space cache yet.
4048          */
4049         if (repair)
4050                 reset_cached_block_groups(root->fs_info);
4051         memset(&wc, 0, sizeof(wc));
4052         cache_tree_init(&wc.shared);
4053         btrfs_init_path(&path);
4054
4055 again:
4056         key.offset = 0;
4057         key.objectid = 0;
4058         key.type = BTRFS_ROOT_ITEM_KEY;
4059         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4060         if (ret < 0) {
4061                 err = 1;
4062                 goto out;
4063         }
4064         tree_node = tree_root->node;
4065         while (1) {
4066                 if (tree_node != tree_root->node) {
4067                         free_root_recs_tree(root_cache);
4068                         btrfs_release_path(&path);
4069                         goto again;
4070                 }
4071                 leaf = path.nodes[0];
4072                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4073                         ret = btrfs_next_leaf(tree_root, &path);
4074                         if (ret) {
4075                                 if (ret < 0)
4076                                         err = 1;
4077                                 break;
4078                         }
4079                         leaf = path.nodes[0];
4080                 }
4081                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4082                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4083                     fs_root_objectid(key.objectid)) {
4084                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4085                                 tmp_root = btrfs_read_fs_root_no_cache(
4086                                                 root->fs_info, &key);
4087                         } else {
4088                                 key.offset = (u64)-1;
4089                                 tmp_root = btrfs_read_fs_root(
4090                                                 root->fs_info, &key);
4091                         }
4092                         if (IS_ERR(tmp_root)) {
4093                                 err = 1;
4094                                 goto next;
4095                         }
4096                         ret = check_fs_root(tmp_root, root_cache, &wc);
4097                         if (ret == -EAGAIN) {
4098                                 free_root_recs_tree(root_cache);
4099                                 btrfs_release_path(&path);
4100                                 goto again;
4101                         }
4102                         if (ret)
4103                                 err = 1;
4104                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4105                                 btrfs_free_fs_root(tmp_root);
4106                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4107                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4108                         process_root_ref(leaf, path.slots[0], &key,
4109                                          root_cache);
4110                 }
4111 next:
4112                 path.slots[0]++;
4113         }
4114 out:
4115         btrfs_release_path(&path);
4116         if (err)
4117                 free_extent_cache_tree(&wc.shared);
4118         if (!cache_tree_empty(&wc.shared))
4119                 fprintf(stderr, "warning line %d\n", __LINE__);
4120
4121         task_stop(ctx.info);
4122
4123         return err;
4124 }
4125
4126 /*
4127  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4128  * INODE_REF/INODE_EXTREF match.
4129  *
4130  * @root:       the root of the fs/file tree
4131  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4132  * @key:        the key of the DIR_ITEM/DIR_INDEX
4133  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4134  *              distinguish root_dir between normal dir/file
4135  * @name:       the name in the INODE_REF/INODE_EXTREF
4136  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4137  * @mode:       the st_mode of INODE_ITEM
4138  *
4139  * Return 0 if no error occurred.
4140  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4141  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4142  * dir/file.
4143  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4144  * not match for normal dir/file.
4145  */
4146 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4147                          struct btrfs_key *key, u64 index, char *name,
4148                          u32 namelen, u32 mode)
4149 {
4150         struct btrfs_path path;
4151         struct extent_buffer *node;
4152         struct btrfs_dir_item *di;
4153         struct btrfs_key location;
4154         char namebuf[BTRFS_NAME_LEN] = {0};
4155         u32 total;
4156         u32 cur = 0;
4157         u32 len;
4158         u32 name_len;
4159         u32 data_len;
4160         u8 filetype;
4161         int slot;
4162         int ret;
4163
4164         btrfs_init_path(&path);
4165         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4166         if (ret < 0) {
4167                 ret = DIR_ITEM_MISSING;
4168                 goto out;
4169         }
4170
4171         /* Process root dir and goto out*/
4172         if (index == 0) {
4173                 if (ret == 0) {
4174                         ret = ROOT_DIR_ERROR;
4175                         error(
4176                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4177                                 root->objectid,
4178                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4179                                         "REF" : "EXTREF",
4180                                 ref_key->objectid, ref_key->offset,
4181                                 key->type == BTRFS_DIR_ITEM_KEY ?
4182                                         "DIR_ITEM" : "DIR_INDEX");
4183                 } else {
4184                         ret = 0;
4185                 }
4186
4187                 goto out;
4188         }
4189
4190         /* Process normal file/dir */
4191         if (ret > 0) {
4192                 ret = DIR_ITEM_MISSING;
4193                 error(
4194                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4195                         root->objectid,
4196                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4197                         ref_key->objectid, ref_key->offset,
4198                         key->type == BTRFS_DIR_ITEM_KEY ?
4199                                 "DIR_ITEM" : "DIR_INDEX",
4200                         key->objectid, key->offset, namelen, name,
4201                         imode_to_type(mode));
4202                 goto out;
4203         }
4204
4205         /* Check whether inode_id/filetype/name match */
4206         node = path.nodes[0];
4207         slot = path.slots[0];
4208         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4209         total = btrfs_item_size_nr(node, slot);
4210         while (cur < total) {
4211                 ret = DIR_ITEM_MISMATCH;
4212                 name_len = btrfs_dir_name_len(node, di);
4213                 data_len = btrfs_dir_data_len(node, di);
4214
4215                 btrfs_dir_item_key_to_cpu(node, di, &location);
4216                 if (location.objectid != ref_key->objectid ||
4217                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4218                     location.offset != 0)
4219                         goto next;
4220
4221                 filetype = btrfs_dir_type(node, di);
4222                 if (imode_to_type(mode) != filetype)
4223                         goto next;
4224
4225                 if (name_len <= BTRFS_NAME_LEN) {
4226                         len = name_len;
4227                 } else {
4228                         len = BTRFS_NAME_LEN;
4229                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4230                         root->objectid,
4231                         key->type == BTRFS_DIR_ITEM_KEY ?
4232                         "DIR_ITEM" : "DIR_INDEX",
4233                         key->objectid, key->offset, name_len);
4234                 }
4235                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4236                 if (len != namelen || strncmp(namebuf, name, len))
4237                         goto next;
4238
4239                 ret = 0;
4240                 goto out;
4241 next:
4242                 len = sizeof(*di) + name_len + data_len;
4243                 di = (struct btrfs_dir_item *)((char *)di + len);
4244                 cur += len;
4245         }
4246         if (ret == DIR_ITEM_MISMATCH)
4247                 error(
4248                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4249                         root->objectid,
4250                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4251                         ref_key->objectid, ref_key->offset,
4252                         key->type == BTRFS_DIR_ITEM_KEY ?
4253                                 "DIR_ITEM" : "DIR_INDEX",
4254                         key->objectid, key->offset, namelen, name,
4255                         imode_to_type(mode));
4256 out:
4257         btrfs_release_path(&path);
4258         return ret;
4259 }
4260
4261 /*
4262  * Traverse the given INODE_REF and call find_dir_item() to find related
4263  * DIR_ITEM/DIR_INDEX.
4264  *
4265  * @root:       the root of the fs/file tree
4266  * @ref_key:    the key of the INODE_REF
4267  * @refs:       the count of INODE_REF
4268  * @mode:       the st_mode of INODE_ITEM
4269  *
4270  * Return 0 if no error occurred.
4271  */
4272 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4273                            struct extent_buffer *node, int slot, u64 *refs,
4274                            int mode)
4275 {
4276         struct btrfs_key key;
4277         struct btrfs_inode_ref *ref;
4278         char namebuf[BTRFS_NAME_LEN] = {0};
4279         u32 total;
4280         u32 cur = 0;
4281         u32 len;
4282         u32 name_len;
4283         u64 index;
4284         int ret, err = 0;
4285
4286         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4287         total = btrfs_item_size_nr(node, slot);
4288
4289 next:
4290         /* Update inode ref count */
4291         (*refs)++;
4292
4293         index = btrfs_inode_ref_index(node, ref);
4294         name_len = btrfs_inode_ref_name_len(node, ref);
4295         if (name_len <= BTRFS_NAME_LEN) {
4296                 len = name_len;
4297         } else {
4298                 len = BTRFS_NAME_LEN;
4299                 warning("root %llu INODE_REF[%llu %llu] name too long",
4300                         root->objectid, ref_key->objectid, ref_key->offset);
4301         }
4302
4303         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4304
4305         /* Check root dir ref name */
4306         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4307                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4308                       root->objectid, ref_key->objectid, ref_key->offset,
4309                       namebuf);
4310                 err |= ROOT_DIR_ERROR;
4311         }
4312
4313         /* Find related DIR_INDEX */
4314         key.objectid = ref_key->offset;
4315         key.type = BTRFS_DIR_INDEX_KEY;
4316         key.offset = index;
4317         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4318         err |= ret;
4319
4320         /* Find related dir_item */
4321         key.objectid = ref_key->offset;
4322         key.type = BTRFS_DIR_ITEM_KEY;
4323         key.offset = btrfs_name_hash(namebuf, len);
4324         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4325         err |= ret;
4326
4327         len = sizeof(*ref) + name_len;
4328         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4329         cur += len;
4330         if (cur < total)
4331                 goto next;
4332
4333         return err;
4334 }
4335
4336 /*
4337  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4338  * DIR_ITEM/DIR_INDEX.
4339  *
4340  * @root:       the root of the fs/file tree
4341  * @ref_key:    the key of the INODE_EXTREF
4342  * @refs:       the count of INODE_EXTREF
4343  * @mode:       the st_mode of INODE_ITEM
4344  *
4345  * Return 0 if no error occurred.
4346  */
4347 static int check_inode_extref(struct btrfs_root *root,
4348                               struct btrfs_key *ref_key,
4349                               struct extent_buffer *node, int slot, u64 *refs,
4350                               int mode)
4351 {
4352         struct btrfs_key key;
4353         struct btrfs_inode_extref *extref;
4354         char namebuf[BTRFS_NAME_LEN] = {0};
4355         u32 total;
4356         u32 cur = 0;
4357         u32 len;
4358         u32 name_len;
4359         u64 index;
4360         u64 parent;
4361         int ret;
4362         int err = 0;
4363
4364         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4365         total = btrfs_item_size_nr(node, slot);
4366
4367 next:
4368         /* update inode ref count */
4369         (*refs)++;
4370         name_len = btrfs_inode_extref_name_len(node, extref);
4371         index = btrfs_inode_extref_index(node, extref);
4372         parent = btrfs_inode_extref_parent(node, extref);
4373         if (name_len <= BTRFS_NAME_LEN) {
4374                 len = name_len;
4375         } else {
4376                 len = BTRFS_NAME_LEN;
4377                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4378                         root->objectid, ref_key->objectid, ref_key->offset);
4379         }
4380         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4381
4382         /* Check root dir ref name */
4383         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4384                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4385                       root->objectid, ref_key->objectid, ref_key->offset,
4386                       namebuf);
4387                 err |= ROOT_DIR_ERROR;
4388         }
4389
4390         /* find related dir_index */
4391         key.objectid = parent;
4392         key.type = BTRFS_DIR_INDEX_KEY;
4393         key.offset = index;
4394         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4395         err |= ret;
4396
4397         /* find related dir_item */
4398         key.objectid = parent;
4399         key.type = BTRFS_DIR_ITEM_KEY;
4400         key.offset = btrfs_name_hash(namebuf, len);
4401         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4402         err |= ret;
4403
4404         len = sizeof(*extref) + name_len;
4405         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4406         cur += len;
4407
4408         if (cur < total)
4409                 goto next;
4410
4411         return err;
4412 }
4413
4414 /*
4415  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4416  * DIR_ITEM/DIR_INDEX match.
4417  *
4418  * @root:       the root of the fs/file tree
4419  * @key:        the key of the INODE_REF/INODE_EXTREF
4420  * @name:       the name in the INODE_REF/INODE_EXTREF
4421  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4422  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4423  * to (u64)-1
4424  * @ext_ref:    the EXTENDED_IREF feature
4425  *
4426  * Return 0 if no error occurred.
4427  * Return >0 for error bitmap
4428  */
4429 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4430                           char *name, int namelen, u64 index,
4431                           unsigned int ext_ref)
4432 {
4433         struct btrfs_path path;
4434         struct btrfs_inode_ref *ref;
4435         struct btrfs_inode_extref *extref;
4436         struct extent_buffer *node;
4437         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4438         u32 total;
4439         u32 cur = 0;
4440         u32 len;
4441         u32 ref_namelen;
4442         u64 ref_index;
4443         u64 parent;
4444         u64 dir_id;
4445         int slot;
4446         int ret;
4447
4448         btrfs_init_path(&path);
4449         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4450         if (ret) {
4451                 ret = INODE_REF_MISSING;
4452                 goto extref;
4453         }
4454
4455         node = path.nodes[0];
4456         slot = path.slots[0];
4457
4458         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4459         total = btrfs_item_size_nr(node, slot);
4460
4461         /* Iterate all entry of INODE_REF */
4462         while (cur < total) {
4463                 ret = INODE_REF_MISSING;
4464
4465                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4466                 ref_index = btrfs_inode_ref_index(node, ref);
4467                 if (index != (u64)-1 && index != ref_index)
4468                         goto next_ref;
4469
4470                 if (ref_namelen <= BTRFS_NAME_LEN) {
4471                         len = ref_namelen;
4472                 } else {
4473                         len = BTRFS_NAME_LEN;
4474                         warning("root %llu INODE %s[%llu %llu] name too long",
4475                                 root->objectid,
4476                                 key->type == BTRFS_INODE_REF_KEY ?
4477                                         "REF" : "EXTREF",
4478                                 key->objectid, key->offset);
4479                 }
4480                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4481                                    len);
4482
4483                 if (len != namelen || strncmp(ref_namebuf, name, len))
4484                         goto next_ref;
4485
4486                 ret = 0;
4487                 goto out;
4488 next_ref:
4489                 len = sizeof(*ref) + ref_namelen;
4490                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4491                 cur += len;
4492         }
4493
4494 extref:
4495         /* Skip if not support EXTENDED_IREF feature */
4496         if (!ext_ref)
4497                 goto out;
4498
4499         btrfs_release_path(&path);
4500         btrfs_init_path(&path);
4501
4502         dir_id = key->offset;
4503         key->type = BTRFS_INODE_EXTREF_KEY;
4504         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4505
4506         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4507         if (ret) {
4508                 ret = INODE_REF_MISSING;
4509                 goto out;
4510         }
4511
4512         node = path.nodes[0];
4513         slot = path.slots[0];
4514
4515         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4516         cur = 0;
4517         total = btrfs_item_size_nr(node, slot);
4518
4519         /* Iterate all entry of INODE_EXTREF */
4520         while (cur < total) {
4521                 ret = INODE_REF_MISSING;
4522
4523                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4524                 ref_index = btrfs_inode_extref_index(node, extref);
4525                 parent = btrfs_inode_extref_parent(node, extref);
4526                 if (index != (u64)-1 && index != ref_index)
4527                         goto next_extref;
4528
4529                 if (parent != dir_id)
4530                         goto next_extref;
4531
4532                 if (ref_namelen <= BTRFS_NAME_LEN) {
4533                         len = ref_namelen;
4534                 } else {
4535                         len = BTRFS_NAME_LEN;
4536                         warning("root %llu INODE %s[%llu %llu] name too long",
4537                                 root->objectid,
4538                                 key->type == BTRFS_INODE_REF_KEY ?
4539                                         "REF" : "EXTREF",
4540                                 key->objectid, key->offset);
4541                 }
4542                 read_extent_buffer(node, ref_namebuf,
4543                                    (unsigned long)(extref + 1), len);
4544
4545                 if (len != namelen || strncmp(ref_namebuf, name, len))
4546                         goto next_extref;
4547
4548                 ret = 0;
4549                 goto out;
4550
4551 next_extref:
4552                 len = sizeof(*extref) + ref_namelen;
4553                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4554                 cur += len;
4555
4556         }
4557 out:
4558         btrfs_release_path(&path);
4559         return ret;
4560 }
4561
4562 /*
4563  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4564  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4565  *
4566  * @root:       the root of the fs/file tree
4567  * @key:        the key of the INODE_REF/INODE_EXTREF
4568  * @size:       the st_size of the INODE_ITEM
4569  * @ext_ref:    the EXTENDED_IREF feature
4570  *
4571  * Return 0 if no error occurred.
4572  */
4573 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4574                           struct extent_buffer *node, int slot, u64 *size,
4575                           unsigned int ext_ref)
4576 {
4577         struct btrfs_dir_item *di;
4578         struct btrfs_inode_item *ii;
4579         struct btrfs_path path;
4580         struct btrfs_key location;
4581         char namebuf[BTRFS_NAME_LEN] = {0};
4582         u32 total;
4583         u32 cur = 0;
4584         u32 len;
4585         u32 name_len;
4586         u32 data_len;
4587         u8 filetype;
4588         u32 mode;
4589         u64 index;
4590         int ret;
4591         int err = 0;
4592
4593         /*
4594          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4595          * ignore index check.
4596          */
4597         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4598
4599         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4600         total = btrfs_item_size_nr(node, slot);
4601
4602         while (cur < total) {
4603                 data_len = btrfs_dir_data_len(node, di);
4604                 if (data_len)
4605                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4606                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4607                               "DIR_ITEM" : "DIR_INDEX",
4608                               key->objectid, key->offset, data_len);
4609
4610                 name_len = btrfs_dir_name_len(node, di);
4611                 if (name_len <= BTRFS_NAME_LEN) {
4612                         len = name_len;
4613                 } else {
4614                         len = BTRFS_NAME_LEN;
4615                         warning("root %llu %s[%llu %llu] name too long",
4616                                 root->objectid,
4617                                 key->type == BTRFS_DIR_ITEM_KEY ?
4618                                 "DIR_ITEM" : "DIR_INDEX",
4619                                 key->objectid, key->offset);
4620                 }
4621                 (*size) += name_len;
4622
4623                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4624                 filetype = btrfs_dir_type(node, di);
4625
4626                 btrfs_init_path(&path);
4627                 btrfs_dir_item_key_to_cpu(node, di, &location);
4628
4629                 /* Ignore related ROOT_ITEM check */
4630                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4631                         goto next;
4632
4633                 /* Check relative INODE_ITEM(existence/filetype) */
4634                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4635                 if (ret) {
4636                         err |= INODE_ITEM_MISSING;
4637                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4638                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4639                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4640                               key->offset, location.objectid, name_len,
4641                               namebuf, filetype);
4642                         goto next;
4643                 }
4644
4645                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4646                                     struct btrfs_inode_item);
4647                 mode = btrfs_inode_mode(path.nodes[0], ii);
4648
4649                 if (imode_to_type(mode) != filetype) {
4650                         err |= INODE_ITEM_MISMATCH;
4651                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4652                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4653                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4654                               key->offset, name_len, namebuf, filetype);
4655                 }
4656
4657                 /* Check relative INODE_REF/INODE_EXTREF */
4658                 location.type = BTRFS_INODE_REF_KEY;
4659                 location.offset = key->objectid;
4660                 ret = find_inode_ref(root, &location, namebuf, len,
4661                                        index, ext_ref);
4662                 err |= ret;
4663                 if (ret & INODE_REF_MISSING)
4664                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4665                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4666                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4667                               key->offset, name_len, namebuf, filetype);
4668
4669 next:
4670                 btrfs_release_path(&path);
4671                 len = sizeof(*di) + name_len + data_len;
4672                 di = (struct btrfs_dir_item *)((char *)di + len);
4673                 cur += len;
4674
4675                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4676                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4677                               root->objectid, key->objectid, key->offset);
4678                         break;
4679                 }
4680         }
4681
4682         return err;
4683 }
4684
4685 /*
4686  * Check file extent datasum/hole, update the size of the file extents,
4687  * check and update the last offset of the file extent.
4688  *
4689  * @root:       the root of fs/file tree.
4690  * @fkey:       the key of the file extent.
4691  * @nodatasum:  INODE_NODATASUM feature.
4692  * @size:       the sum of all EXTENT_DATA items size for this inode.
4693  * @end:        the offset of the last extent.
4694  *
4695  * Return 0 if no error occurred.
4696  */
4697 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4698                              struct extent_buffer *node, int slot,
4699                              unsigned int nodatasum, u64 *size, u64 *end)
4700 {
4701         struct btrfs_file_extent_item *fi;
4702         u64 disk_bytenr;
4703         u64 disk_num_bytes;
4704         u64 extent_num_bytes;
4705         u64 found;
4706         unsigned int extent_type;
4707         unsigned int is_hole;
4708         int ret;
4709         int err = 0;
4710
4711         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4712
4713         extent_type = btrfs_file_extent_type(node, fi);
4714         /* Skip if file extent is inline */
4715         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4716                 struct btrfs_item *e = btrfs_item_nr(slot);
4717                 u32 item_inline_len;
4718
4719                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4720                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4721                 if (extent_num_bytes == 0 ||
4722                     extent_num_bytes != item_inline_len)
4723                         err |= FILE_EXTENT_ERROR;
4724                 *size += extent_num_bytes;
4725                 return err;
4726         }
4727
4728         /* Check extent type */
4729         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4730                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4731                 err |= FILE_EXTENT_ERROR;
4732                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4733                       root->objectid, fkey->objectid, fkey->offset);
4734                 return err;
4735         }
4736
4737         /* Check REG_EXTENT/PREALLOC_EXTENT */
4738         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4739         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4740         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4741         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4742
4743         /* Check EXTENT_DATA datasum */
4744         ret = count_csum_range(root, disk_bytenr, disk_num_bytes, &found);
4745         if (found > 0 && nodatasum) {
4746                 err |= ODD_CSUM_ITEM;
4747                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4748                       root->objectid, fkey->objectid, fkey->offset);
4749         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4750                    !is_hole &&
4751                    (ret < 0 || found == 0 || found < disk_num_bytes)) {
4752                 err |= CSUM_ITEM_MISSING;
4753                 error("root %llu EXTENT_DATA[%llu %llu] datasum missing",
4754                       root->objectid, fkey->objectid, fkey->offset);
4755         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && found > 0) {
4756                 err |= ODD_CSUM_ITEM;
4757                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have datasum",
4758                       root->objectid, fkey->objectid, fkey->offset);
4759         }
4760
4761         /* Check EXTENT_DATA hole */
4762         if (no_holes && is_hole) {
4763                 err |= FILE_EXTENT_ERROR;
4764                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4765                       root->objectid, fkey->objectid, fkey->offset);
4766         } else if (!no_holes && *end != fkey->offset) {
4767                 err |= FILE_EXTENT_ERROR;
4768                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4769                       root->objectid, fkey->objectid, fkey->offset);
4770         }
4771
4772         *end += extent_num_bytes;
4773         if (!is_hole)
4774                 *size += extent_num_bytes;
4775
4776         return err;
4777 }
4778
4779 /*
4780  * Check INODE_ITEM and related ITEMs (the same inode number)
4781  * 1. check link count
4782  * 2. check inode ref/extref
4783  * 3. check dir item/index
4784  *
4785  * @ext_ref:    the EXTENDED_IREF feature
4786  *
4787  * Return 0 if no error occurred.
4788  * Return >0 for error or hit the traversal is done(by error bitmap)
4789  */
4790 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4791                             unsigned int ext_ref)
4792 {
4793         struct extent_buffer *node;
4794         struct btrfs_inode_item *ii;
4795         struct btrfs_key key;
4796         u64 inode_id;
4797         u32 mode;
4798         u64 nlink;
4799         u64 nbytes;
4800         u64 isize;
4801         u64 size = 0;
4802         u64 refs = 0;
4803         u64 extent_end = 0;
4804         u64 extent_size = 0;
4805         unsigned int dir;
4806         unsigned int nodatasum;
4807         int slot;
4808         int ret;
4809         int err = 0;
4810
4811         node = path->nodes[0];
4812         slot = path->slots[0];
4813
4814         btrfs_item_key_to_cpu(node, &key, slot);
4815         inode_id = key.objectid;
4816
4817         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4818                 ret = btrfs_next_item(root, path);
4819                 if (ret > 0)
4820                         err |= LAST_ITEM;
4821                 return err;
4822         }
4823
4824         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4825         isize = btrfs_inode_size(node, ii);
4826         nbytes = btrfs_inode_nbytes(node, ii);
4827         mode = btrfs_inode_mode(node, ii);
4828         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4829         nlink = btrfs_inode_nlink(node, ii);
4830         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4831
4832         while (1) {
4833                 ret = btrfs_next_item(root, path);
4834                 if (ret < 0) {
4835                         /* out will fill 'err' rusing current statistics */
4836                         goto out;
4837                 } else if (ret > 0) {
4838                         err |= LAST_ITEM;
4839                         goto out;
4840                 }
4841
4842                 node = path->nodes[0];
4843                 slot = path->slots[0];
4844                 btrfs_item_key_to_cpu(node, &key, slot);
4845                 if (key.objectid != inode_id)
4846                         goto out;
4847
4848                 switch (key.type) {
4849                 case BTRFS_INODE_REF_KEY:
4850                         ret = check_inode_ref(root, &key, node, slot, &refs,
4851                                               mode);
4852                         err |= ret;
4853                         break;
4854                 case BTRFS_INODE_EXTREF_KEY:
4855                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4856                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4857                                         root->objectid, key.objectid,
4858                                         key.offset);
4859                         ret = check_inode_extref(root, &key, node, slot, &refs,
4860                                                  mode);
4861                         err |= ret;
4862                         break;
4863                 case BTRFS_DIR_ITEM_KEY:
4864                 case BTRFS_DIR_INDEX_KEY:
4865                         if (!dir) {
4866                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4867                                         root->objectid, inode_id,
4868                                         imode_to_type(mode), key.objectid,
4869                                         key.offset);
4870                         }
4871                         ret = check_dir_item(root, &key, node, slot, &size,
4872                                              ext_ref);
4873                         err |= ret;
4874                         break;
4875                 case BTRFS_EXTENT_DATA_KEY:
4876                         if (dir) {
4877                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4878                                         root->objectid, inode_id, key.objectid,
4879                                         key.offset);
4880                         }
4881                         ret = check_file_extent(root, &key, node, slot,
4882                                                 nodatasum, &extent_size,
4883                                                 &extent_end);
4884                         err |= ret;
4885                         break;
4886                 case BTRFS_XATTR_ITEM_KEY:
4887                         break;
4888                 default:
4889                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4890                               key.objectid, key.type, key.offset);
4891                 }
4892         }
4893
4894 out:
4895         /* verify INODE_ITEM nlink/isize/nbytes */
4896         if (dir) {
4897                 if (nlink != 1) {
4898                         err |= LINK_COUNT_ERROR;
4899                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4900                               root->objectid, inode_id, nlink);
4901                 }
4902
4903                 /*
4904                  * Just a warning, as dir inode nbytes is just an
4905                  * instructive value.
4906                  */
4907                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4908                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4909                                 root->objectid, inode_id, root->nodesize);
4910                 }
4911
4912                 if (isize != size) {
4913                         err |= ISIZE_ERROR;
4914                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4915                               root->objectid, inode_id, isize, size);
4916                 }
4917         } else {
4918                 if (nlink != refs) {
4919                         err |= LINK_COUNT_ERROR;
4920                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4921                               root->objectid, inode_id, nlink, refs);
4922                 } else if (!nlink) {
4923                         err |= ORPHAN_ITEM;
4924                 }
4925
4926                 if (!nbytes && !no_holes && extent_end < isize) {
4927                         err |= NBYTES_ERROR;
4928                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4929                               root->objectid, inode_id, isize);
4930                 }
4931
4932                 if (nbytes != extent_size) {
4933                         err |= NBYTES_ERROR;
4934                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4935                               root->objectid, inode_id, nbytes, extent_size);
4936                 }
4937         }
4938
4939         return err;
4940 }
4941
4942 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4943 {
4944         struct btrfs_path path;
4945         struct btrfs_key key;
4946         int err = 0;
4947         int ret;
4948
4949         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4950         key.type = BTRFS_INODE_ITEM_KEY;
4951         key.offset = 0;
4952
4953         /* For root being dropped, we don't need to check first inode */
4954         if (btrfs_root_refs(&root->root_item) == 0 &&
4955             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
4956             key.objectid)
4957                 return 0;
4958
4959         btrfs_init_path(&path);
4960
4961         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4962         if (ret < 0)
4963                 goto out;
4964         if (ret > 0) {
4965                 ret = 0;
4966                 err |= INODE_ITEM_MISSING;
4967         }
4968
4969         err |= check_inode_item(root, &path, ext_ref);
4970         err &= ~LAST_ITEM;
4971         if (err && !ret)
4972                 ret = -EIO;
4973 out:
4974         btrfs_release_path(&path);
4975         return ret;
4976 }
4977
4978 /*
4979  * Iterate all item on the tree and call check_inode_item() to check.
4980  *
4981  * @root:       the root of the tree to be checked.
4982  * @ext_ref:    the EXTENDED_IREF feature
4983  *
4984  * Return 0 if no error found.
4985  * Return <0 for error.
4986  */
4987 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
4988 {
4989         struct btrfs_path path;
4990         struct node_refs nrefs;
4991         struct btrfs_root_item *root_item = &root->root_item;
4992         int ret, wret;
4993         int level;
4994
4995         /*
4996          * We need to manually check the first inode item(256)
4997          * As the following traversal function will only start from
4998          * the first inode item in the leaf, if inode item(256) is missing
4999          * we will just skip it forever.
5000          */
5001         ret = check_fs_first_inode(root, ext_ref);
5002         if (ret < 0)
5003                 return ret;
5004
5005         memset(&nrefs, 0, sizeof(nrefs));
5006         level = btrfs_header_level(root->node);
5007         btrfs_init_path(&path);
5008
5009         if (btrfs_root_refs(root_item) > 0 ||
5010             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5011                 path.nodes[level] = root->node;
5012                 path.slots[level] = 0;
5013                 extent_buffer_get(root->node);
5014         } else {
5015                 struct btrfs_key key;
5016
5017                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5018                 level = root_item->drop_level;
5019                 path.lowest_level = level;
5020                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5021                 if (ret < 0)
5022                         goto out;
5023                 ret = 0;
5024         }
5025
5026         while (1) {
5027                 wret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5028                 if (wret < 0)
5029                         ret = wret;
5030                 if (wret != 0)
5031                         break;
5032
5033                 wret = walk_up_tree_v2(root, &path, &level);
5034                 if (wret < 0)
5035                         ret = wret;
5036                 if (wret != 0)
5037                         break;
5038         }
5039
5040 out:
5041         btrfs_release_path(&path);
5042         return ret;
5043 }
5044
5045 /*
5046  * Find the relative ref for root_ref and root_backref.
5047  *
5048  * @root:       the root of the root tree.
5049  * @ref_key:    the key of the root ref.
5050  *
5051  * Return 0 if no error occurred.
5052  */
5053 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5054                           struct extent_buffer *node, int slot)
5055 {
5056         struct btrfs_path path;
5057         struct btrfs_key key;
5058         struct btrfs_root_ref *ref;
5059         struct btrfs_root_ref *backref;
5060         char ref_name[BTRFS_NAME_LEN] = {0};
5061         char backref_name[BTRFS_NAME_LEN] = {0};
5062         u64 ref_dirid;
5063         u64 ref_seq;
5064         u32 ref_namelen;
5065         u64 backref_dirid;
5066         u64 backref_seq;
5067         u32 backref_namelen;
5068         u32 len;
5069         int ret;
5070         int err = 0;
5071
5072         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5073         ref_dirid = btrfs_root_ref_dirid(node, ref);
5074         ref_seq = btrfs_root_ref_sequence(node, ref);
5075         ref_namelen = btrfs_root_ref_name_len(node, ref);
5076
5077         if (ref_namelen <= BTRFS_NAME_LEN) {
5078                 len = ref_namelen;
5079         } else {
5080                 len = BTRFS_NAME_LEN;
5081                 warning("%s[%llu %llu] ref_name too long",
5082                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5083                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5084                         ref_key->offset);
5085         }
5086         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5087
5088         /* Find relative root_ref */
5089         key.objectid = ref_key->offset;
5090         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5091         key.offset = ref_key->objectid;
5092
5093         btrfs_init_path(&path);
5094         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5095         if (ret) {
5096                 err |= ROOT_REF_MISSING;
5097                 error("%s[%llu %llu] couldn't find relative ref",
5098                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5099                       "ROOT_REF" : "ROOT_BACKREF",
5100                       ref_key->objectid, ref_key->offset);
5101                 goto out;
5102         }
5103
5104         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5105                                  struct btrfs_root_ref);
5106         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5107         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5108         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5109
5110         if (backref_namelen <= BTRFS_NAME_LEN) {
5111                 len = backref_namelen;
5112         } else {
5113                 len = BTRFS_NAME_LEN;
5114                 warning("%s[%llu %llu] ref_name too long",
5115                         key.type == BTRFS_ROOT_REF_KEY ?
5116                         "ROOT_REF" : "ROOT_BACKREF",
5117                         key.objectid, key.offset);
5118         }
5119         read_extent_buffer(path.nodes[0], backref_name,
5120                            (unsigned long)(backref + 1), len);
5121
5122         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5123             ref_namelen != backref_namelen ||
5124             strncmp(ref_name, backref_name, len)) {
5125                 err |= ROOT_REF_MISMATCH;
5126                 error("%s[%llu %llu] mismatch relative ref",
5127                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5128                       "ROOT_REF" : "ROOT_BACKREF",
5129                       ref_key->objectid, ref_key->offset);
5130         }
5131 out:
5132         btrfs_release_path(&path);
5133         return err;
5134 }
5135
5136 /*
5137  * Check all fs/file tree in low_memory mode.
5138  *
5139  * 1. for fs tree root item, call check_fs_root_v2()
5140  * 2. for fs tree root ref/backref, call check_root_ref()
5141  *
5142  * Return 0 if no error occurred.
5143  */
5144 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5145 {
5146         struct btrfs_root *tree_root = fs_info->tree_root;
5147         struct btrfs_root *cur_root = NULL;
5148         struct btrfs_path path;
5149         struct btrfs_key key;
5150         struct extent_buffer *node;
5151         unsigned int ext_ref;
5152         int slot;
5153         int ret;
5154         int err = 0;
5155
5156         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5157
5158         btrfs_init_path(&path);
5159         key.objectid = BTRFS_FS_TREE_OBJECTID;
5160         key.offset = 0;
5161         key.type = BTRFS_ROOT_ITEM_KEY;
5162
5163         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5164         if (ret < 0) {
5165                 err = ret;
5166                 goto out;
5167         } else if (ret > 0) {
5168                 err = -ENOENT;
5169                 goto out;
5170         }
5171
5172         while (1) {
5173                 node = path.nodes[0];
5174                 slot = path.slots[0];
5175                 btrfs_item_key_to_cpu(node, &key, slot);
5176                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5177                         goto out;
5178                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5179                     fs_root_objectid(key.objectid)) {
5180                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5181                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5182                                                                        &key);
5183                         } else {
5184                                 key.offset = (u64)-1;
5185                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5186                         }
5187
5188                         if (IS_ERR(cur_root)) {
5189                                 error("Fail to read fs/subvol tree: %lld",
5190                                       key.objectid);
5191                                 err = -EIO;
5192                                 goto next;
5193                         }
5194
5195                         ret = check_fs_root_v2(cur_root, ext_ref);
5196                         err |= ret;
5197
5198                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5199                                 btrfs_free_fs_root(cur_root);
5200                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5201                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5202                         ret = check_root_ref(tree_root, &key, node, slot);
5203                         err |= ret;
5204                 }
5205 next:
5206                 ret = btrfs_next_item(tree_root, &path);
5207                 if (ret > 0)
5208                         goto out;
5209                 if (ret < 0) {
5210                         err = ret;
5211                         goto out;
5212                 }
5213         }
5214
5215 out:
5216         btrfs_release_path(&path);
5217         return err;
5218 }
5219
5220 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5221 {
5222         struct list_head *cur = rec->backrefs.next;
5223         struct extent_backref *back;
5224         struct tree_backref *tback;
5225         struct data_backref *dback;
5226         u64 found = 0;
5227         int err = 0;
5228
5229         while(cur != &rec->backrefs) {
5230                 back = to_extent_backref(cur);
5231                 cur = cur->next;
5232                 if (!back->found_extent_tree) {
5233                         err = 1;
5234                         if (!print_errs)
5235                                 goto out;
5236                         if (back->is_data) {
5237                                 dback = to_data_backref(back);
5238                                 fprintf(stderr, "Backref %llu %s %llu"
5239                                         " owner %llu offset %llu num_refs %lu"
5240                                         " not found in extent tree\n",
5241                                         (unsigned long long)rec->start,
5242                                         back->full_backref ?
5243                                         "parent" : "root",
5244                                         back->full_backref ?
5245                                         (unsigned long long)dback->parent:
5246                                         (unsigned long long)dback->root,
5247                                         (unsigned long long)dback->owner,
5248                                         (unsigned long long)dback->offset,
5249                                         (unsigned long)dback->num_refs);
5250                         } else {
5251                                 tback = to_tree_backref(back);
5252                                 fprintf(stderr, "Backref %llu parent %llu"
5253                                         " root %llu not found in extent tree\n",
5254                                         (unsigned long long)rec->start,
5255                                         (unsigned long long)tback->parent,
5256                                         (unsigned long long)tback->root);
5257                         }
5258                 }
5259                 if (!back->is_data && !back->found_ref) {
5260                         err = 1;
5261                         if (!print_errs)
5262                                 goto out;
5263                         tback = to_tree_backref(back);
5264                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5265                                 (unsigned long long)rec->start,
5266                                 back->full_backref ? "parent" : "root",
5267                                 back->full_backref ?
5268                                 (unsigned long long)tback->parent :
5269                                 (unsigned long long)tback->root, back);
5270                 }
5271                 if (back->is_data) {
5272                         dback = to_data_backref(back);
5273                         if (dback->found_ref != dback->num_refs) {
5274                                 err = 1;
5275                                 if (!print_errs)
5276                                         goto out;
5277                                 fprintf(stderr, "Incorrect local backref count"
5278                                         " on %llu %s %llu owner %llu"
5279                                         " offset %llu found %u wanted %u back %p\n",
5280                                         (unsigned long long)rec->start,
5281                                         back->full_backref ?
5282                                         "parent" : "root",
5283                                         back->full_backref ?
5284                                         (unsigned long long)dback->parent:
5285                                         (unsigned long long)dback->root,
5286                                         (unsigned long long)dback->owner,
5287                                         (unsigned long long)dback->offset,
5288                                         dback->found_ref, dback->num_refs, back);
5289                         }
5290                         if (dback->disk_bytenr != rec->start) {
5291                                 err = 1;
5292                                 if (!print_errs)
5293                                         goto out;
5294                                 fprintf(stderr, "Backref disk bytenr does not"
5295                                         " match extent record, bytenr=%llu, "
5296                                         "ref bytenr=%llu\n",
5297                                         (unsigned long long)rec->start,
5298                                         (unsigned long long)dback->disk_bytenr);
5299                         }
5300
5301                         if (dback->bytes != rec->nr) {
5302                                 err = 1;
5303                                 if (!print_errs)
5304                                         goto out;
5305                                 fprintf(stderr, "Backref bytes do not match "
5306                                         "extent backref, bytenr=%llu, ref "
5307                                         "bytes=%llu, backref bytes=%llu\n",
5308                                         (unsigned long long)rec->start,
5309                                         (unsigned long long)rec->nr,
5310                                         (unsigned long long)dback->bytes);
5311                         }
5312                 }
5313                 if (!back->is_data) {
5314                         found += 1;
5315                 } else {
5316                         dback = to_data_backref(back);
5317                         found += dback->found_ref;
5318                 }
5319         }
5320         if (found != rec->refs) {
5321                 err = 1;
5322                 if (!print_errs)
5323                         goto out;
5324                 fprintf(stderr, "Incorrect global backref count "
5325                         "on %llu found %llu wanted %llu\n",
5326                         (unsigned long long)rec->start,
5327                         (unsigned long long)found,
5328                         (unsigned long long)rec->refs);
5329         }
5330 out:
5331         return err;
5332 }
5333
5334 static int free_all_extent_backrefs(struct extent_record *rec)
5335 {
5336         struct extent_backref *back;
5337         struct list_head *cur;
5338         while (!list_empty(&rec->backrefs)) {
5339                 cur = rec->backrefs.next;
5340                 back = to_extent_backref(cur);
5341                 list_del(cur);
5342                 free(back);
5343         }
5344         return 0;
5345 }
5346
5347 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
5348                                      struct cache_tree *extent_cache)
5349 {
5350         struct cache_extent *cache;
5351         struct extent_record *rec;
5352
5353         while (1) {
5354                 cache = first_cache_extent(extent_cache);
5355                 if (!cache)
5356                         break;
5357                 rec = container_of(cache, struct extent_record, cache);
5358                 remove_cache_extent(extent_cache, cache);
5359                 free_all_extent_backrefs(rec);
5360                 free(rec);
5361         }
5362 }
5363
5364 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5365                                  struct extent_record *rec)
5366 {
5367         if (rec->content_checked && rec->owner_ref_checked &&
5368             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5369             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5370             !rec->bad_full_backref && !rec->crossing_stripes &&
5371             !rec->wrong_chunk_type) {
5372                 remove_cache_extent(extent_cache, &rec->cache);
5373                 free_all_extent_backrefs(rec);
5374                 list_del_init(&rec->list);
5375                 free(rec);
5376         }
5377         return 0;
5378 }
5379
5380 static int check_owner_ref(struct btrfs_root *root,
5381                             struct extent_record *rec,
5382                             struct extent_buffer *buf)
5383 {
5384         struct extent_backref *node;
5385         struct tree_backref *back;
5386         struct btrfs_root *ref_root;
5387         struct btrfs_key key;
5388         struct btrfs_path path;
5389         struct extent_buffer *parent;
5390         int level;
5391         int found = 0;
5392         int ret;
5393
5394         list_for_each_entry(node, &rec->backrefs, list) {
5395                 if (node->is_data)
5396                         continue;
5397                 if (!node->found_ref)
5398                         continue;
5399                 if (node->full_backref)
5400                         continue;
5401                 back = to_tree_backref(node);
5402                 if (btrfs_header_owner(buf) == back->root)
5403                         return 0;
5404         }
5405         BUG_ON(rec->is_root);
5406
5407         /* try to find the block by search corresponding fs tree */
5408         key.objectid = btrfs_header_owner(buf);
5409         key.type = BTRFS_ROOT_ITEM_KEY;
5410         key.offset = (u64)-1;
5411
5412         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5413         if (IS_ERR(ref_root))
5414                 return 1;
5415
5416         level = btrfs_header_level(buf);
5417         if (level == 0)
5418                 btrfs_item_key_to_cpu(buf, &key, 0);
5419         else
5420                 btrfs_node_key_to_cpu(buf, &key, 0);
5421
5422         btrfs_init_path(&path);
5423         path.lowest_level = level + 1;
5424         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5425         if (ret < 0)
5426                 return 0;
5427
5428         parent = path.nodes[level + 1];
5429         if (parent && buf->start == btrfs_node_blockptr(parent,
5430                                                         path.slots[level + 1]))
5431                 found = 1;
5432
5433         btrfs_release_path(&path);
5434         return found ? 0 : 1;
5435 }
5436
5437 static int is_extent_tree_record(struct extent_record *rec)
5438 {
5439         struct list_head *cur = rec->backrefs.next;
5440         struct extent_backref *node;
5441         struct tree_backref *back;
5442         int is_extent = 0;
5443
5444         while(cur != &rec->backrefs) {
5445                 node = to_extent_backref(cur);
5446                 cur = cur->next;
5447                 if (node->is_data)
5448                         return 0;
5449                 back = to_tree_backref(node);
5450                 if (node->full_backref)
5451                         return 0;
5452                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5453                         is_extent = 1;
5454         }
5455         return is_extent;
5456 }
5457
5458
5459 static int record_bad_block_io(struct btrfs_fs_info *info,
5460                                struct cache_tree *extent_cache,
5461                                u64 start, u64 len)
5462 {
5463         struct extent_record *rec;
5464         struct cache_extent *cache;
5465         struct btrfs_key key;
5466
5467         cache = lookup_cache_extent(extent_cache, start, len);
5468         if (!cache)
5469                 return 0;
5470
5471         rec = container_of(cache, struct extent_record, cache);
5472         if (!is_extent_tree_record(rec))
5473                 return 0;
5474
5475         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5476         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5477 }
5478
5479 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5480                        struct extent_buffer *buf, int slot)
5481 {
5482         if (btrfs_header_level(buf)) {
5483                 struct btrfs_key_ptr ptr1, ptr2;
5484
5485                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5486                                    sizeof(struct btrfs_key_ptr));
5487                 read_extent_buffer(buf, &ptr2,
5488                                    btrfs_node_key_ptr_offset(slot + 1),
5489                                    sizeof(struct btrfs_key_ptr));
5490                 write_extent_buffer(buf, &ptr1,
5491                                     btrfs_node_key_ptr_offset(slot + 1),
5492                                     sizeof(struct btrfs_key_ptr));
5493                 write_extent_buffer(buf, &ptr2,
5494                                     btrfs_node_key_ptr_offset(slot),
5495                                     sizeof(struct btrfs_key_ptr));
5496                 if (slot == 0) {
5497                         struct btrfs_disk_key key;
5498                         btrfs_node_key(buf, &key, 0);
5499                         btrfs_fixup_low_keys(root, path, &key,
5500                                              btrfs_header_level(buf) + 1);
5501                 }
5502         } else {
5503                 struct btrfs_item *item1, *item2;
5504                 struct btrfs_key k1, k2;
5505                 char *item1_data, *item2_data;
5506                 u32 item1_offset, item2_offset, item1_size, item2_size;
5507
5508                 item1 = btrfs_item_nr(slot);
5509                 item2 = btrfs_item_nr(slot + 1);
5510                 btrfs_item_key_to_cpu(buf, &k1, slot);
5511                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5512                 item1_offset = btrfs_item_offset(buf, item1);
5513                 item2_offset = btrfs_item_offset(buf, item2);
5514                 item1_size = btrfs_item_size(buf, item1);
5515                 item2_size = btrfs_item_size(buf, item2);
5516
5517                 item1_data = malloc(item1_size);
5518                 if (!item1_data)
5519                         return -ENOMEM;
5520                 item2_data = malloc(item2_size);
5521                 if (!item2_data) {
5522                         free(item1_data);
5523                         return -ENOMEM;
5524                 }
5525
5526                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5527                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5528
5529                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5530                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5531                 free(item1_data);
5532                 free(item2_data);
5533
5534                 btrfs_set_item_offset(buf, item1, item2_offset);
5535                 btrfs_set_item_offset(buf, item2, item1_offset);
5536                 btrfs_set_item_size(buf, item1, item2_size);
5537                 btrfs_set_item_size(buf, item2, item1_size);
5538
5539                 path->slots[0] = slot;
5540                 btrfs_set_item_key_unsafe(root, path, &k2);
5541                 path->slots[0] = slot + 1;
5542                 btrfs_set_item_key_unsafe(root, path, &k1);
5543         }
5544         return 0;
5545 }
5546
5547 static int fix_key_order(struct btrfs_trans_handle *trans,
5548                          struct btrfs_root *root,
5549                          struct btrfs_path *path)
5550 {
5551         struct extent_buffer *buf;
5552         struct btrfs_key k1, k2;
5553         int i;
5554         int level = path->lowest_level;
5555         int ret = -EIO;
5556
5557         buf = path->nodes[level];
5558         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5559                 if (level) {
5560                         btrfs_node_key_to_cpu(buf, &k1, i);
5561                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5562                 } else {
5563                         btrfs_item_key_to_cpu(buf, &k1, i);
5564                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5565                 }
5566                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5567                         continue;
5568                 ret = swap_values(root, path, buf, i);
5569                 if (ret)
5570                         break;
5571                 btrfs_mark_buffer_dirty(buf);
5572                 i = 0;
5573         }
5574         return ret;
5575 }
5576
5577 static int delete_bogus_item(struct btrfs_trans_handle *trans,
5578                              struct btrfs_root *root,
5579                              struct btrfs_path *path,
5580                              struct extent_buffer *buf, int slot)
5581 {
5582         struct btrfs_key key;
5583         int nritems = btrfs_header_nritems(buf);
5584
5585         btrfs_item_key_to_cpu(buf, &key, slot);
5586
5587         /* These are all the keys we can deal with missing. */
5588         if (key.type != BTRFS_DIR_INDEX_KEY &&
5589             key.type != BTRFS_EXTENT_ITEM_KEY &&
5590             key.type != BTRFS_METADATA_ITEM_KEY &&
5591             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5592             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5593                 return -1;
5594
5595         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5596                (unsigned long long)key.objectid, key.type,
5597                (unsigned long long)key.offset, slot, buf->start);
5598         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5599                               btrfs_item_nr_offset(slot + 1),
5600                               sizeof(struct btrfs_item) *
5601                               (nritems - slot - 1));
5602         btrfs_set_header_nritems(buf, nritems - 1);
5603         if (slot == 0) {
5604                 struct btrfs_disk_key disk_key;
5605
5606                 btrfs_item_key(buf, &disk_key, 0);
5607                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5608         }
5609         btrfs_mark_buffer_dirty(buf);
5610         return 0;
5611 }
5612
5613 static int fix_item_offset(struct btrfs_trans_handle *trans,
5614                            struct btrfs_root *root,
5615                            struct btrfs_path *path)
5616 {
5617         struct extent_buffer *buf;
5618         int i;
5619         int ret = 0;
5620
5621         /* We should only get this for leaves */
5622         BUG_ON(path->lowest_level);
5623         buf = path->nodes[0];
5624 again:
5625         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5626                 unsigned int shift = 0, offset;
5627
5628                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5629                     BTRFS_LEAF_DATA_SIZE(root)) {
5630                         if (btrfs_item_end_nr(buf, i) >
5631                             BTRFS_LEAF_DATA_SIZE(root)) {
5632                                 ret = delete_bogus_item(trans, root, path,
5633                                                         buf, i);
5634                                 if (!ret)
5635                                         goto again;
5636                                 fprintf(stderr, "item is off the end of the "
5637                                         "leaf, can't fix\n");
5638                                 ret = -EIO;
5639                                 break;
5640                         }
5641                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5642                                 btrfs_item_end_nr(buf, i);
5643                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5644                            btrfs_item_offset_nr(buf, i - 1)) {
5645                         if (btrfs_item_end_nr(buf, i) >
5646                             btrfs_item_offset_nr(buf, i - 1)) {
5647                                 ret = delete_bogus_item(trans, root, path,
5648                                                         buf, i);
5649                                 if (!ret)
5650                                         goto again;
5651                                 fprintf(stderr, "items overlap, can't fix\n");
5652                                 ret = -EIO;
5653                                 break;
5654                         }
5655                         shift = btrfs_item_offset_nr(buf, i - 1) -
5656                                 btrfs_item_end_nr(buf, i);
5657                 }
5658                 if (!shift)
5659                         continue;
5660
5661                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5662                        i, shift, (unsigned long long)buf->start);
5663                 offset = btrfs_item_offset_nr(buf, i);
5664                 memmove_extent_buffer(buf,
5665                                       btrfs_leaf_data(buf) + offset + shift,
5666                                       btrfs_leaf_data(buf) + offset,
5667                                       btrfs_item_size_nr(buf, i));
5668                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5669                                       offset + shift);
5670                 btrfs_mark_buffer_dirty(buf);
5671         }
5672
5673         /*
5674          * We may have moved things, in which case we want to exit so we don't
5675          * write those changes out.  Once we have proper abort functionality in
5676          * progs this can be changed to something nicer.
5677          */
5678         BUG_ON(ret);
5679         return ret;
5680 }
5681
5682 /*
5683  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5684  * then just return -EIO.
5685  */
5686 static int try_to_fix_bad_block(struct btrfs_root *root,
5687                                 struct extent_buffer *buf,
5688                                 enum btrfs_tree_block_status status)
5689 {
5690         struct btrfs_trans_handle *trans;
5691         struct ulist *roots;
5692         struct ulist_node *node;
5693         struct btrfs_root *search_root;
5694         struct btrfs_path path;
5695         struct ulist_iterator iter;
5696         struct btrfs_key root_key, key;
5697         int ret;
5698
5699         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5700             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5701                 return -EIO;
5702
5703         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5704         if (ret)
5705                 return -EIO;
5706
5707         btrfs_init_path(&path);
5708         ULIST_ITER_INIT(&iter);
5709         while ((node = ulist_next(roots, &iter))) {
5710                 root_key.objectid = node->val;
5711                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5712                 root_key.offset = (u64)-1;
5713
5714                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5715                 if (IS_ERR(root)) {
5716                         ret = -EIO;
5717                         break;
5718                 }
5719
5720
5721                 trans = btrfs_start_transaction(search_root, 0);
5722                 if (IS_ERR(trans)) {
5723                         ret = PTR_ERR(trans);
5724                         break;
5725                 }
5726
5727                 path.lowest_level = btrfs_header_level(buf);
5728                 path.skip_check_block = 1;
5729                 if (path.lowest_level)
5730                         btrfs_node_key_to_cpu(buf, &key, 0);
5731                 else
5732                         btrfs_item_key_to_cpu(buf, &key, 0);
5733                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5734                 if (ret) {
5735                         ret = -EIO;
5736                         btrfs_commit_transaction(trans, search_root);
5737                         break;
5738                 }
5739                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5740                         ret = fix_key_order(trans, search_root, &path);
5741                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5742                         ret = fix_item_offset(trans, search_root, &path);
5743                 if (ret) {
5744                         btrfs_commit_transaction(trans, search_root);
5745                         break;
5746                 }
5747                 btrfs_release_path(&path);
5748                 btrfs_commit_transaction(trans, search_root);
5749         }
5750         ulist_free(roots);
5751         btrfs_release_path(&path);
5752         return ret;
5753 }
5754
5755 static int check_block(struct btrfs_root *root,
5756                        struct cache_tree *extent_cache,
5757                        struct extent_buffer *buf, u64 flags)
5758 {
5759         struct extent_record *rec;
5760         struct cache_extent *cache;
5761         struct btrfs_key key;
5762         enum btrfs_tree_block_status status;
5763         int ret = 0;
5764         int level;
5765
5766         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5767         if (!cache)
5768                 return 1;
5769         rec = container_of(cache, struct extent_record, cache);
5770         rec->generation = btrfs_header_generation(buf);
5771
5772         level = btrfs_header_level(buf);
5773         if (btrfs_header_nritems(buf) > 0) {
5774
5775                 if (level == 0)
5776                         btrfs_item_key_to_cpu(buf, &key, 0);
5777                 else
5778                         btrfs_node_key_to_cpu(buf, &key, 0);
5779
5780                 rec->info_objectid = key.objectid;
5781         }
5782         rec->info_level = level;
5783
5784         if (btrfs_is_leaf(buf))
5785                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5786         else
5787                 status = btrfs_check_node(root, &rec->parent_key, buf);
5788
5789         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5790                 if (repair)
5791                         status = try_to_fix_bad_block(root, buf, status);
5792                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5793                         ret = -EIO;
5794                         fprintf(stderr, "bad block %llu\n",
5795                                 (unsigned long long)buf->start);
5796                 } else {
5797                         /*
5798                          * Signal to callers we need to start the scan over
5799                          * again since we'll have cowed blocks.
5800                          */
5801                         ret = -EAGAIN;
5802                 }
5803         } else {
5804                 rec->content_checked = 1;
5805                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5806                         rec->owner_ref_checked = 1;
5807                 else {
5808                         ret = check_owner_ref(root, rec, buf);
5809                         if (!ret)
5810                                 rec->owner_ref_checked = 1;
5811                 }
5812         }
5813         if (!ret)
5814                 maybe_free_extent_rec(extent_cache, rec);
5815         return ret;
5816 }
5817
5818 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5819                                                 u64 parent, u64 root)
5820 {
5821         struct list_head *cur = rec->backrefs.next;
5822         struct extent_backref *node;
5823         struct tree_backref *back;
5824
5825         while(cur != &rec->backrefs) {
5826                 node = to_extent_backref(cur);
5827                 cur = cur->next;
5828                 if (node->is_data)
5829                         continue;
5830                 back = to_tree_backref(node);
5831                 if (parent > 0) {
5832                         if (!node->full_backref)
5833                                 continue;
5834                         if (parent == back->parent)
5835                                 return back;
5836                 } else {
5837                         if (node->full_backref)
5838                                 continue;
5839                         if (back->root == root)
5840                                 return back;
5841                 }
5842         }
5843         return NULL;
5844 }
5845
5846 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5847                                                 u64 parent, u64 root)
5848 {
5849         struct tree_backref *ref = malloc(sizeof(*ref));
5850
5851         if (!ref)
5852                 return NULL;
5853         memset(&ref->node, 0, sizeof(ref->node));
5854         if (parent > 0) {
5855                 ref->parent = parent;
5856                 ref->node.full_backref = 1;
5857         } else {
5858                 ref->root = root;
5859                 ref->node.full_backref = 0;
5860         }
5861         list_add_tail(&ref->node.list, &rec->backrefs);
5862
5863         return ref;
5864 }
5865
5866 static struct data_backref *find_data_backref(struct extent_record *rec,
5867                                                 u64 parent, u64 root,
5868                                                 u64 owner, u64 offset,
5869                                                 int found_ref,
5870                                                 u64 disk_bytenr, u64 bytes)
5871 {
5872         struct list_head *cur = rec->backrefs.next;
5873         struct extent_backref *node;
5874         struct data_backref *back;
5875
5876         while(cur != &rec->backrefs) {
5877                 node = to_extent_backref(cur);
5878                 cur = cur->next;
5879                 if (!node->is_data)
5880                         continue;
5881                 back = to_data_backref(node);
5882                 if (parent > 0) {
5883                         if (!node->full_backref)
5884                                 continue;
5885                         if (parent == back->parent)
5886                                 return back;
5887                 } else {
5888                         if (node->full_backref)
5889                                 continue;
5890                         if (back->root == root && back->owner == owner &&
5891                             back->offset == offset) {
5892                                 if (found_ref && node->found_ref &&
5893                                     (back->bytes != bytes ||
5894                                     back->disk_bytenr != disk_bytenr))
5895                                         continue;
5896                                 return back;
5897                         }
5898                 }
5899         }
5900         return NULL;
5901 }
5902
5903 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5904                                                 u64 parent, u64 root,
5905                                                 u64 owner, u64 offset,
5906                                                 u64 max_size)
5907 {
5908         struct data_backref *ref = malloc(sizeof(*ref));
5909
5910         if (!ref)
5911                 return NULL;
5912         memset(&ref->node, 0, sizeof(ref->node));
5913         ref->node.is_data = 1;
5914
5915         if (parent > 0) {
5916                 ref->parent = parent;
5917                 ref->owner = 0;
5918                 ref->offset = 0;
5919                 ref->node.full_backref = 1;
5920         } else {
5921                 ref->root = root;
5922                 ref->owner = owner;
5923                 ref->offset = offset;
5924                 ref->node.full_backref = 0;
5925         }
5926         ref->bytes = max_size;
5927         ref->found_ref = 0;
5928         ref->num_refs = 0;
5929         list_add_tail(&ref->node.list, &rec->backrefs);
5930         if (max_size > rec->max_size)
5931                 rec->max_size = max_size;
5932         return ref;
5933 }
5934
5935 /* Check if the type of extent matches with its chunk */
5936 static void check_extent_type(struct extent_record *rec)
5937 {
5938         struct btrfs_block_group_cache *bg_cache;
5939
5940         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5941         if (!bg_cache)
5942                 return;
5943
5944         /* data extent, check chunk directly*/
5945         if (!rec->metadata) {
5946                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5947                         rec->wrong_chunk_type = 1;
5948                 return;
5949         }
5950
5951         /* metadata extent, check the obvious case first */
5952         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5953                                  BTRFS_BLOCK_GROUP_METADATA))) {
5954                 rec->wrong_chunk_type = 1;
5955                 return;
5956         }
5957
5958         /*
5959          * Check SYSTEM extent, as it's also marked as metadata, we can only
5960          * make sure it's a SYSTEM extent by its backref
5961          */
5962         if (!list_empty(&rec->backrefs)) {
5963                 struct extent_backref *node;
5964                 struct tree_backref *tback;
5965                 u64 bg_type;
5966
5967                 node = to_extent_backref(rec->backrefs.next);
5968                 if (node->is_data) {
5969                         /* tree block shouldn't have data backref */
5970                         rec->wrong_chunk_type = 1;
5971                         return;
5972                 }
5973                 tback = container_of(node, struct tree_backref, node);
5974
5975                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5976                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5977                 else
5978                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
5979                 if (!(bg_cache->flags & bg_type))
5980                         rec->wrong_chunk_type = 1;
5981         }
5982 }
5983
5984 /*
5985  * Allocate a new extent record, fill default values from @tmpl and insert int
5986  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
5987  * the cache, otherwise it fails.
5988  */
5989 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
5990                 struct extent_record *tmpl)
5991 {
5992         struct extent_record *rec;
5993         int ret = 0;
5994
5995         rec = malloc(sizeof(*rec));
5996         if (!rec)
5997                 return -ENOMEM;
5998         rec->start = tmpl->start;
5999         rec->max_size = tmpl->max_size;
6000         rec->nr = max(tmpl->nr, tmpl->max_size);
6001         rec->found_rec = tmpl->found_rec;
6002         rec->content_checked = tmpl->content_checked;
6003         rec->owner_ref_checked = tmpl->owner_ref_checked;
6004         rec->num_duplicates = 0;
6005         rec->metadata = tmpl->metadata;
6006         rec->flag_block_full_backref = FLAG_UNSET;
6007         rec->bad_full_backref = 0;
6008         rec->crossing_stripes = 0;
6009         rec->wrong_chunk_type = 0;
6010         rec->is_root = tmpl->is_root;
6011         rec->refs = tmpl->refs;
6012         rec->extent_item_refs = tmpl->extent_item_refs;
6013         rec->parent_generation = tmpl->parent_generation;
6014         INIT_LIST_HEAD(&rec->backrefs);
6015         INIT_LIST_HEAD(&rec->dups);
6016         INIT_LIST_HEAD(&rec->list);
6017         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6018         rec->cache.start = tmpl->start;
6019         rec->cache.size = tmpl->nr;
6020         ret = insert_cache_extent(extent_cache, &rec->cache);
6021         if (ret) {
6022                 free(rec);
6023                 return ret;
6024         }
6025         bytes_used += rec->nr;
6026
6027         if (tmpl->metadata)
6028                 rec->crossing_stripes = check_crossing_stripes(global_info,
6029                                 rec->start, global_info->tree_root->nodesize);
6030         check_extent_type(rec);
6031         return ret;
6032 }
6033
6034 /*
6035  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6036  * some are hints:
6037  * - refs              - if found, increase refs
6038  * - is_root           - if found, set
6039  * - content_checked   - if found, set
6040  * - owner_ref_checked - if found, set
6041  *
6042  * If not found, create a new one, initialize and insert.
6043  */
6044 static int add_extent_rec(struct cache_tree *extent_cache,
6045                 struct extent_record *tmpl)
6046 {
6047         struct extent_record *rec;
6048         struct cache_extent *cache;
6049         int ret = 0;
6050         int dup = 0;
6051
6052         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6053         if (cache) {
6054                 rec = container_of(cache, struct extent_record, cache);
6055                 if (tmpl->refs)
6056                         rec->refs++;
6057                 if (rec->nr == 1)
6058                         rec->nr = max(tmpl->nr, tmpl->max_size);
6059
6060                 /*
6061                  * We need to make sure to reset nr to whatever the extent
6062                  * record says was the real size, this way we can compare it to
6063                  * the backrefs.
6064                  */
6065                 if (tmpl->found_rec) {
6066                         if (tmpl->start != rec->start || rec->found_rec) {
6067                                 struct extent_record *tmp;
6068
6069                                 dup = 1;
6070                                 if (list_empty(&rec->list))
6071                                         list_add_tail(&rec->list,
6072                                                       &duplicate_extents);
6073
6074                                 /*
6075                                  * We have to do this song and dance in case we
6076                                  * find an extent record that falls inside of
6077                                  * our current extent record but does not have
6078                                  * the same objectid.
6079                                  */
6080                                 tmp = malloc(sizeof(*tmp));
6081                                 if (!tmp)
6082                                         return -ENOMEM;
6083                                 tmp->start = tmpl->start;
6084                                 tmp->max_size = tmpl->max_size;
6085                                 tmp->nr = tmpl->nr;
6086                                 tmp->found_rec = 1;
6087                                 tmp->metadata = tmpl->metadata;
6088                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6089                                 INIT_LIST_HEAD(&tmp->list);
6090                                 list_add_tail(&tmp->list, &rec->dups);
6091                                 rec->num_duplicates++;
6092                         } else {
6093                                 rec->nr = tmpl->nr;
6094                                 rec->found_rec = 1;
6095                         }
6096                 }
6097
6098                 if (tmpl->extent_item_refs && !dup) {
6099                         if (rec->extent_item_refs) {
6100                                 fprintf(stderr, "block %llu rec "
6101                                         "extent_item_refs %llu, passed %llu\n",
6102                                         (unsigned long long)tmpl->start,
6103                                         (unsigned long long)
6104                                                         rec->extent_item_refs,
6105                                         (unsigned long long)tmpl->extent_item_refs);
6106                         }
6107                         rec->extent_item_refs = tmpl->extent_item_refs;
6108                 }
6109                 if (tmpl->is_root)
6110                         rec->is_root = 1;
6111                 if (tmpl->content_checked)
6112                         rec->content_checked = 1;
6113                 if (tmpl->owner_ref_checked)
6114                         rec->owner_ref_checked = 1;
6115                 memcpy(&rec->parent_key, &tmpl->parent_key,
6116                                 sizeof(tmpl->parent_key));
6117                 if (tmpl->parent_generation)
6118                         rec->parent_generation = tmpl->parent_generation;
6119                 if (rec->max_size < tmpl->max_size)
6120                         rec->max_size = tmpl->max_size;
6121
6122                 /*
6123                  * A metadata extent can't cross stripe_len boundary, otherwise
6124                  * kernel scrub won't be able to handle it.
6125                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6126                  * it.
6127                  */
6128                 if (tmpl->metadata)
6129                         rec->crossing_stripes = check_crossing_stripes(
6130                                         global_info, rec->start,
6131                                         global_info->tree_root->nodesize);
6132                 check_extent_type(rec);
6133                 maybe_free_extent_rec(extent_cache, rec);
6134                 return ret;
6135         }
6136
6137         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6138
6139         return ret;
6140 }
6141
6142 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6143                             u64 parent, u64 root, int found_ref)
6144 {
6145         struct extent_record *rec;
6146         struct tree_backref *back;
6147         struct cache_extent *cache;
6148         int ret;
6149
6150         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6151         if (!cache) {
6152                 struct extent_record tmpl;
6153
6154                 memset(&tmpl, 0, sizeof(tmpl));
6155                 tmpl.start = bytenr;
6156                 tmpl.nr = 1;
6157                 tmpl.metadata = 1;
6158
6159                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6160                 if (ret)
6161                         return ret;
6162
6163                 /* really a bug in cache_extent implement now */
6164                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6165                 if (!cache)
6166                         return -ENOENT;
6167         }
6168
6169         rec = container_of(cache, struct extent_record, cache);
6170         if (rec->start != bytenr) {
6171                 /*
6172                  * Several cause, from unaligned bytenr to over lapping extents
6173                  */
6174                 return -EEXIST;
6175         }
6176
6177         back = find_tree_backref(rec, parent, root);
6178         if (!back) {
6179                 back = alloc_tree_backref(rec, parent, root);
6180                 if (!back)
6181                         return -ENOMEM;
6182         }
6183
6184         if (found_ref) {
6185                 if (back->node.found_ref) {
6186                         fprintf(stderr, "Extent back ref already exists "
6187                                 "for %llu parent %llu root %llu \n",
6188                                 (unsigned long long)bytenr,
6189                                 (unsigned long long)parent,
6190                                 (unsigned long long)root);
6191                 }
6192                 back->node.found_ref = 1;
6193         } else {
6194                 if (back->node.found_extent_tree) {
6195                         fprintf(stderr, "Extent back ref already exists "
6196                                 "for %llu parent %llu root %llu \n",
6197                                 (unsigned long long)bytenr,
6198                                 (unsigned long long)parent,
6199                                 (unsigned long long)root);
6200                 }
6201                 back->node.found_extent_tree = 1;
6202         }
6203         check_extent_type(rec);
6204         maybe_free_extent_rec(extent_cache, rec);
6205         return 0;
6206 }
6207
6208 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6209                             u64 parent, u64 root, u64 owner, u64 offset,
6210                             u32 num_refs, int found_ref, u64 max_size)
6211 {
6212         struct extent_record *rec;
6213         struct data_backref *back;
6214         struct cache_extent *cache;
6215         int ret;
6216
6217         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6218         if (!cache) {
6219                 struct extent_record tmpl;
6220
6221                 memset(&tmpl, 0, sizeof(tmpl));
6222                 tmpl.start = bytenr;
6223                 tmpl.nr = 1;
6224                 tmpl.max_size = max_size;
6225
6226                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6227                 if (ret)
6228                         return ret;
6229
6230                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6231                 if (!cache)
6232                         abort();
6233         }
6234
6235         rec = container_of(cache, struct extent_record, cache);
6236         if (rec->max_size < max_size)
6237                 rec->max_size = max_size;
6238
6239         /*
6240          * If found_ref is set then max_size is the real size and must match the
6241          * existing refs.  So if we have already found a ref then we need to
6242          * make sure that this ref matches the existing one, otherwise we need
6243          * to add a new backref so we can notice that the backrefs don't match
6244          * and we need to figure out who is telling the truth.  This is to
6245          * account for that awful fsync bug I introduced where we'd end up with
6246          * a btrfs_file_extent_item that would have its length include multiple
6247          * prealloc extents or point inside of a prealloc extent.
6248          */
6249         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6250                                  bytenr, max_size);
6251         if (!back) {
6252                 back = alloc_data_backref(rec, parent, root, owner, offset,
6253                                           max_size);
6254                 BUG_ON(!back);
6255         }
6256
6257         if (found_ref) {
6258                 BUG_ON(num_refs != 1);
6259                 if (back->node.found_ref)
6260                         BUG_ON(back->bytes != max_size);
6261                 back->node.found_ref = 1;
6262                 back->found_ref += 1;
6263                 back->bytes = max_size;
6264                 back->disk_bytenr = bytenr;
6265                 rec->refs += 1;
6266                 rec->content_checked = 1;
6267                 rec->owner_ref_checked = 1;
6268         } else {
6269                 if (back->node.found_extent_tree) {
6270                         fprintf(stderr, "Extent back ref already exists "
6271                                 "for %llu parent %llu root %llu "
6272                                 "owner %llu offset %llu num_refs %lu\n",
6273                                 (unsigned long long)bytenr,
6274                                 (unsigned long long)parent,
6275                                 (unsigned long long)root,
6276                                 (unsigned long long)owner,
6277                                 (unsigned long long)offset,
6278                                 (unsigned long)num_refs);
6279                 }
6280                 back->num_refs = num_refs;
6281                 back->node.found_extent_tree = 1;
6282         }
6283         maybe_free_extent_rec(extent_cache, rec);
6284         return 0;
6285 }
6286
6287 static int add_pending(struct cache_tree *pending,
6288                        struct cache_tree *seen, u64 bytenr, u32 size)
6289 {
6290         int ret;
6291         ret = add_cache_extent(seen, bytenr, size);
6292         if (ret)
6293                 return ret;
6294         add_cache_extent(pending, bytenr, size);
6295         return 0;
6296 }
6297
6298 static int pick_next_pending(struct cache_tree *pending,
6299                         struct cache_tree *reada,
6300                         struct cache_tree *nodes,
6301                         u64 last, struct block_info *bits, int bits_nr,
6302                         int *reada_bits)
6303 {
6304         unsigned long node_start = last;
6305         struct cache_extent *cache;
6306         int ret;
6307
6308         cache = search_cache_extent(reada, 0);
6309         if (cache) {
6310                 bits[0].start = cache->start;
6311                 bits[0].size = cache->size;
6312                 *reada_bits = 1;
6313                 return 1;
6314         }
6315         *reada_bits = 0;
6316         if (node_start > 32768)
6317                 node_start -= 32768;
6318
6319         cache = search_cache_extent(nodes, node_start);
6320         if (!cache)
6321                 cache = search_cache_extent(nodes, 0);
6322
6323         if (!cache) {
6324                  cache = search_cache_extent(pending, 0);
6325                  if (!cache)
6326                          return 0;
6327                  ret = 0;
6328                  do {
6329                          bits[ret].start = cache->start;
6330                          bits[ret].size = cache->size;
6331                          cache = next_cache_extent(cache);
6332                          ret++;
6333                  } while (cache && ret < bits_nr);
6334                  return ret;
6335         }
6336
6337         ret = 0;
6338         do {
6339                 bits[ret].start = cache->start;
6340                 bits[ret].size = cache->size;
6341                 cache = next_cache_extent(cache);
6342                 ret++;
6343         } while (cache && ret < bits_nr);
6344
6345         if (bits_nr - ret > 8) {
6346                 u64 lookup = bits[0].start + bits[0].size;
6347                 struct cache_extent *next;
6348                 next = search_cache_extent(pending, lookup);
6349                 while(next) {
6350                         if (next->start - lookup > 32768)
6351                                 break;
6352                         bits[ret].start = next->start;
6353                         bits[ret].size = next->size;
6354                         lookup = next->start + next->size;
6355                         ret++;
6356                         if (ret == bits_nr)
6357                                 break;
6358                         next = next_cache_extent(next);
6359                         if (!next)
6360                                 break;
6361                 }
6362         }
6363         return ret;
6364 }
6365
6366 static void free_chunk_record(struct cache_extent *cache)
6367 {
6368         struct chunk_record *rec;
6369
6370         rec = container_of(cache, struct chunk_record, cache);
6371         list_del_init(&rec->list);
6372         list_del_init(&rec->dextents);
6373         free(rec);
6374 }
6375
6376 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6377 {
6378         cache_tree_free_extents(chunk_cache, free_chunk_record);
6379 }
6380
6381 static void free_device_record(struct rb_node *node)
6382 {
6383         struct device_record *rec;
6384
6385         rec = container_of(node, struct device_record, node);
6386         free(rec);
6387 }
6388
6389 FREE_RB_BASED_TREE(device_cache, free_device_record);
6390
6391 int insert_block_group_record(struct block_group_tree *tree,
6392                               struct block_group_record *bg_rec)
6393 {
6394         int ret;
6395
6396         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6397         if (ret)
6398                 return ret;
6399
6400         list_add_tail(&bg_rec->list, &tree->block_groups);
6401         return 0;
6402 }
6403
6404 static void free_block_group_record(struct cache_extent *cache)
6405 {
6406         struct block_group_record *rec;
6407
6408         rec = container_of(cache, struct block_group_record, cache);
6409         list_del_init(&rec->list);
6410         free(rec);
6411 }
6412
6413 void free_block_group_tree(struct block_group_tree *tree)
6414 {
6415         cache_tree_free_extents(&tree->tree, free_block_group_record);
6416 }
6417
6418 int insert_device_extent_record(struct device_extent_tree *tree,
6419                                 struct device_extent_record *de_rec)
6420 {
6421         int ret;
6422
6423         /*
6424          * Device extent is a bit different from the other extents, because
6425          * the extents which belong to the different devices may have the
6426          * same start and size, so we need use the special extent cache
6427          * search/insert functions.
6428          */
6429         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6430         if (ret)
6431                 return ret;
6432
6433         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6434         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6435         return 0;
6436 }
6437
6438 static void free_device_extent_record(struct cache_extent *cache)
6439 {
6440         struct device_extent_record *rec;
6441
6442         rec = container_of(cache, struct device_extent_record, cache);
6443         if (!list_empty(&rec->chunk_list))
6444                 list_del_init(&rec->chunk_list);
6445         if (!list_empty(&rec->device_list))
6446                 list_del_init(&rec->device_list);
6447         free(rec);
6448 }
6449
6450 void free_device_extent_tree(struct device_extent_tree *tree)
6451 {
6452         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6453 }
6454
6455 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6456 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6457                                  struct extent_buffer *leaf, int slot)
6458 {
6459         struct btrfs_extent_ref_v0 *ref0;
6460         struct btrfs_key key;
6461         int ret;
6462
6463         btrfs_item_key_to_cpu(leaf, &key, slot);
6464         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6465         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6466                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6467                                 0, 0);
6468         } else {
6469                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6470                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6471         }
6472         return ret;
6473 }
6474 #endif
6475
6476 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6477                                             struct btrfs_key *key,
6478                                             int slot)
6479 {
6480         struct btrfs_chunk *ptr;
6481         struct chunk_record *rec;
6482         int num_stripes, i;
6483
6484         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6485         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6486
6487         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6488         if (!rec) {
6489                 fprintf(stderr, "memory allocation failed\n");
6490                 exit(-1);
6491         }
6492
6493         INIT_LIST_HEAD(&rec->list);
6494         INIT_LIST_HEAD(&rec->dextents);
6495         rec->bg_rec = NULL;
6496
6497         rec->cache.start = key->offset;
6498         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6499
6500         rec->generation = btrfs_header_generation(leaf);
6501
6502         rec->objectid = key->objectid;
6503         rec->type = key->type;
6504         rec->offset = key->offset;
6505
6506         rec->length = rec->cache.size;
6507         rec->owner = btrfs_chunk_owner(leaf, ptr);
6508         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6509         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6510         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6511         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6512         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6513         rec->num_stripes = num_stripes;
6514         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6515
6516         for (i = 0; i < rec->num_stripes; ++i) {
6517                 rec->stripes[i].devid =
6518                         btrfs_stripe_devid_nr(leaf, ptr, i);
6519                 rec->stripes[i].offset =
6520                         btrfs_stripe_offset_nr(leaf, ptr, i);
6521                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6522                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6523                                 BTRFS_UUID_SIZE);
6524         }
6525
6526         return rec;
6527 }
6528
6529 static int process_chunk_item(struct cache_tree *chunk_cache,
6530                               struct btrfs_key *key, struct extent_buffer *eb,
6531                               int slot)
6532 {
6533         struct chunk_record *rec;
6534         struct btrfs_chunk *chunk;
6535         int ret = 0;
6536
6537         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6538         /*
6539          * Do extra check for this chunk item,
6540          *
6541          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6542          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6543          * and owner<->key_type check.
6544          */
6545         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6546                                       key->offset);
6547         if (ret < 0) {
6548                 error("chunk(%llu, %llu) is not valid, ignore it",
6549                       key->offset, btrfs_chunk_length(eb, chunk));
6550                 return 0;
6551         }
6552         rec = btrfs_new_chunk_record(eb, key, slot);
6553         ret = insert_cache_extent(chunk_cache, &rec->cache);
6554         if (ret) {
6555                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6556                         rec->offset, rec->length);
6557                 free(rec);
6558         }
6559
6560         return ret;
6561 }
6562
6563 static int process_device_item(struct rb_root *dev_cache,
6564                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6565 {
6566         struct btrfs_dev_item *ptr;
6567         struct device_record *rec;
6568         int ret = 0;
6569
6570         ptr = btrfs_item_ptr(eb,
6571                 slot, struct btrfs_dev_item);
6572
6573         rec = malloc(sizeof(*rec));
6574         if (!rec) {
6575                 fprintf(stderr, "memory allocation failed\n");
6576                 return -ENOMEM;
6577         }
6578
6579         rec->devid = key->offset;
6580         rec->generation = btrfs_header_generation(eb);
6581
6582         rec->objectid = key->objectid;
6583         rec->type = key->type;
6584         rec->offset = key->offset;
6585
6586         rec->devid = btrfs_device_id(eb, ptr);
6587         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6588         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6589
6590         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6591         if (ret) {
6592                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6593                 free(rec);
6594         }
6595
6596         return ret;
6597 }
6598
6599 struct block_group_record *
6600 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6601                              int slot)
6602 {
6603         struct btrfs_block_group_item *ptr;
6604         struct block_group_record *rec;
6605
6606         rec = calloc(1, sizeof(*rec));
6607         if (!rec) {
6608                 fprintf(stderr, "memory allocation failed\n");
6609                 exit(-1);
6610         }
6611
6612         rec->cache.start = key->objectid;
6613         rec->cache.size = key->offset;
6614
6615         rec->generation = btrfs_header_generation(leaf);
6616
6617         rec->objectid = key->objectid;
6618         rec->type = key->type;
6619         rec->offset = key->offset;
6620
6621         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6622         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6623
6624         INIT_LIST_HEAD(&rec->list);
6625
6626         return rec;
6627 }
6628
6629 static int process_block_group_item(struct block_group_tree *block_group_cache,
6630                                     struct btrfs_key *key,
6631                                     struct extent_buffer *eb, int slot)
6632 {
6633         struct block_group_record *rec;
6634         int ret = 0;
6635
6636         rec = btrfs_new_block_group_record(eb, key, slot);
6637         ret = insert_block_group_record(block_group_cache, rec);
6638         if (ret) {
6639                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6640                         rec->objectid, rec->offset);
6641                 free(rec);
6642         }
6643
6644         return ret;
6645 }
6646
6647 struct device_extent_record *
6648 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6649                                struct btrfs_key *key, int slot)
6650 {
6651         struct device_extent_record *rec;
6652         struct btrfs_dev_extent *ptr;
6653
6654         rec = calloc(1, sizeof(*rec));
6655         if (!rec) {
6656                 fprintf(stderr, "memory allocation failed\n");
6657                 exit(-1);
6658         }
6659
6660         rec->cache.objectid = key->objectid;
6661         rec->cache.start = key->offset;
6662
6663         rec->generation = btrfs_header_generation(leaf);
6664
6665         rec->objectid = key->objectid;
6666         rec->type = key->type;
6667         rec->offset = key->offset;
6668
6669         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6670         rec->chunk_objecteid =
6671                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6672         rec->chunk_offset =
6673                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6674         rec->length = btrfs_dev_extent_length(leaf, ptr);
6675         rec->cache.size = rec->length;
6676
6677         INIT_LIST_HEAD(&rec->chunk_list);
6678         INIT_LIST_HEAD(&rec->device_list);
6679
6680         return rec;
6681 }
6682
6683 static int
6684 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6685                            struct btrfs_key *key, struct extent_buffer *eb,
6686                            int slot)
6687 {
6688         struct device_extent_record *rec;
6689         int ret;
6690
6691         rec = btrfs_new_device_extent_record(eb, key, slot);
6692         ret = insert_device_extent_record(dev_extent_cache, rec);
6693         if (ret) {
6694                 fprintf(stderr,
6695                         "Device extent[%llu, %llu, %llu] existed.\n",
6696                         rec->objectid, rec->offset, rec->length);
6697                 free(rec);
6698         }
6699
6700         return ret;
6701 }
6702
6703 static int process_extent_item(struct btrfs_root *root,
6704                                struct cache_tree *extent_cache,
6705                                struct extent_buffer *eb, int slot)
6706 {
6707         struct btrfs_extent_item *ei;
6708         struct btrfs_extent_inline_ref *iref;
6709         struct btrfs_extent_data_ref *dref;
6710         struct btrfs_shared_data_ref *sref;
6711         struct btrfs_key key;
6712         struct extent_record tmpl;
6713         unsigned long end;
6714         unsigned long ptr;
6715         int ret;
6716         int type;
6717         u32 item_size = btrfs_item_size_nr(eb, slot);
6718         u64 refs = 0;
6719         u64 offset;
6720         u64 num_bytes;
6721         int metadata = 0;
6722
6723         btrfs_item_key_to_cpu(eb, &key, slot);
6724
6725         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6726                 metadata = 1;
6727                 num_bytes = root->nodesize;
6728         } else {
6729                 num_bytes = key.offset;
6730         }
6731
6732         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6733                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6734                       key.objectid, root->sectorsize);
6735                 return -EIO;
6736         }
6737         if (item_size < sizeof(*ei)) {
6738 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6739                 struct btrfs_extent_item_v0 *ei0;
6740                 BUG_ON(item_size != sizeof(*ei0));
6741                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6742                 refs = btrfs_extent_refs_v0(eb, ei0);
6743 #else
6744                 BUG();
6745 #endif
6746                 memset(&tmpl, 0, sizeof(tmpl));
6747                 tmpl.start = key.objectid;
6748                 tmpl.nr = num_bytes;
6749                 tmpl.extent_item_refs = refs;
6750                 tmpl.metadata = metadata;
6751                 tmpl.found_rec = 1;
6752                 tmpl.max_size = num_bytes;
6753
6754                 return add_extent_rec(extent_cache, &tmpl);
6755         }
6756
6757         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6758         refs = btrfs_extent_refs(eb, ei);
6759         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6760                 metadata = 1;
6761         else
6762                 metadata = 0;
6763         if (metadata && num_bytes != root->nodesize) {
6764                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6765                       num_bytes, root->nodesize);
6766                 return -EIO;
6767         }
6768         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6769                 error("ignore invalid data extent, length %llu is not aligned to %u",
6770                       num_bytes, root->sectorsize);
6771                 return -EIO;
6772         }
6773
6774         memset(&tmpl, 0, sizeof(tmpl));
6775         tmpl.start = key.objectid;
6776         tmpl.nr = num_bytes;
6777         tmpl.extent_item_refs = refs;
6778         tmpl.metadata = metadata;
6779         tmpl.found_rec = 1;
6780         tmpl.max_size = num_bytes;
6781         add_extent_rec(extent_cache, &tmpl);
6782
6783         ptr = (unsigned long)(ei + 1);
6784         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6785             key.type == BTRFS_EXTENT_ITEM_KEY)
6786                 ptr += sizeof(struct btrfs_tree_block_info);
6787
6788         end = (unsigned long)ei + item_size;
6789         while (ptr < end) {
6790                 iref = (struct btrfs_extent_inline_ref *)ptr;
6791                 type = btrfs_extent_inline_ref_type(eb, iref);
6792                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6793                 switch (type) {
6794                 case BTRFS_TREE_BLOCK_REF_KEY:
6795                         ret = add_tree_backref(extent_cache, key.objectid,
6796                                         0, offset, 0);
6797                         if (ret < 0)
6798                                 error("add_tree_backref failed: %s",
6799                                       strerror(-ret));
6800                         break;
6801                 case BTRFS_SHARED_BLOCK_REF_KEY:
6802                         ret = add_tree_backref(extent_cache, key.objectid,
6803                                         offset, 0, 0);
6804                         if (ret < 0)
6805                                 error("add_tree_backref failed: %s",
6806                                       strerror(-ret));
6807                         break;
6808                 case BTRFS_EXTENT_DATA_REF_KEY:
6809                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6810                         add_data_backref(extent_cache, key.objectid, 0,
6811                                         btrfs_extent_data_ref_root(eb, dref),
6812                                         btrfs_extent_data_ref_objectid(eb,
6813                                                                        dref),
6814                                         btrfs_extent_data_ref_offset(eb, dref),
6815                                         btrfs_extent_data_ref_count(eb, dref),
6816                                         0, num_bytes);
6817                         break;
6818                 case BTRFS_SHARED_DATA_REF_KEY:
6819                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6820                         add_data_backref(extent_cache, key.objectid, offset,
6821                                         0, 0, 0,
6822                                         btrfs_shared_data_ref_count(eb, sref),
6823                                         0, num_bytes);
6824                         break;
6825                 default:
6826                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6827                                 key.objectid, key.type, num_bytes);
6828                         goto out;
6829                 }
6830                 ptr += btrfs_extent_inline_ref_size(type);
6831         }
6832         WARN_ON(ptr > end);
6833 out:
6834         return 0;
6835 }
6836
6837 static int check_cache_range(struct btrfs_root *root,
6838                              struct btrfs_block_group_cache *cache,
6839                              u64 offset, u64 bytes)
6840 {
6841         struct btrfs_free_space *entry;
6842         u64 *logical;
6843         u64 bytenr;
6844         int stripe_len;
6845         int i, nr, ret;
6846
6847         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6848                 bytenr = btrfs_sb_offset(i);
6849                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6850                                        cache->key.objectid, bytenr, 0,
6851                                        &logical, &nr, &stripe_len);
6852                 if (ret)
6853                         return ret;
6854
6855                 while (nr--) {
6856                         if (logical[nr] + stripe_len <= offset)
6857                                 continue;
6858                         if (offset + bytes <= logical[nr])
6859                                 continue;
6860                         if (logical[nr] == offset) {
6861                                 if (stripe_len >= bytes) {
6862                                         free(logical);
6863                                         return 0;
6864                                 }
6865                                 bytes -= stripe_len;
6866                                 offset += stripe_len;
6867                         } else if (logical[nr] < offset) {
6868                                 if (logical[nr] + stripe_len >=
6869                                     offset + bytes) {
6870                                         free(logical);
6871                                         return 0;
6872                                 }
6873                                 bytes = (offset + bytes) -
6874                                         (logical[nr] + stripe_len);
6875                                 offset = logical[nr] + stripe_len;
6876                         } else {
6877                                 /*
6878                                  * Could be tricky, the super may land in the
6879                                  * middle of the area we're checking.  First
6880                                  * check the easiest case, it's at the end.
6881                                  */
6882                                 if (logical[nr] + stripe_len >=
6883                                     bytes + offset) {
6884                                         bytes = logical[nr] - offset;
6885                                         continue;
6886                                 }
6887
6888                                 /* Check the left side */
6889                                 ret = check_cache_range(root, cache,
6890                                                         offset,
6891                                                         logical[nr] - offset);
6892                                 if (ret) {
6893                                         free(logical);
6894                                         return ret;
6895                                 }
6896
6897                                 /* Now we continue with the right side */
6898                                 bytes = (offset + bytes) -
6899                                         (logical[nr] + stripe_len);
6900                                 offset = logical[nr] + stripe_len;
6901                         }
6902                 }
6903
6904                 free(logical);
6905         }
6906
6907         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6908         if (!entry) {
6909                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6910                         offset, offset+bytes);
6911                 return -EINVAL;
6912         }
6913
6914         if (entry->offset != offset) {
6915                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6916                         entry->offset);
6917                 return -EINVAL;
6918         }
6919
6920         if (entry->bytes != bytes) {
6921                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6922                         bytes, entry->bytes, offset);
6923                 return -EINVAL;
6924         }
6925
6926         unlink_free_space(cache->free_space_ctl, entry);
6927         free(entry);
6928         return 0;
6929 }
6930
6931 static int verify_space_cache(struct btrfs_root *root,
6932                               struct btrfs_block_group_cache *cache)
6933 {
6934         struct btrfs_path path;
6935         struct extent_buffer *leaf;
6936         struct btrfs_key key;
6937         u64 last;
6938         int ret = 0;
6939
6940         root = root->fs_info->extent_root;
6941
6942         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6943
6944         btrfs_init_path(&path);
6945         key.objectid = last;
6946         key.offset = 0;
6947         key.type = BTRFS_EXTENT_ITEM_KEY;
6948         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6949         if (ret < 0)
6950                 goto out;
6951         ret = 0;
6952         while (1) {
6953                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6954                         ret = btrfs_next_leaf(root, &path);
6955                         if (ret < 0)
6956                                 goto out;
6957                         if (ret > 0) {
6958                                 ret = 0;
6959                                 break;
6960                         }
6961                 }
6962                 leaf = path.nodes[0];
6963                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6964                 if (key.objectid >= cache->key.offset + cache->key.objectid)
6965                         break;
6966                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6967                     key.type != BTRFS_METADATA_ITEM_KEY) {
6968                         path.slots[0]++;
6969                         continue;
6970                 }
6971
6972                 if (last == key.objectid) {
6973                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
6974                                 last = key.objectid + key.offset;
6975                         else
6976                                 last = key.objectid + root->nodesize;
6977                         path.slots[0]++;
6978                         continue;
6979                 }
6980
6981                 ret = check_cache_range(root, cache, last,
6982                                         key.objectid - last);
6983                 if (ret)
6984                         break;
6985                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6986                         last = key.objectid + key.offset;
6987                 else
6988                         last = key.objectid + root->nodesize;
6989                 path.slots[0]++;
6990         }
6991
6992         if (last < cache->key.objectid + cache->key.offset)
6993                 ret = check_cache_range(root, cache, last,
6994                                         cache->key.objectid +
6995                                         cache->key.offset - last);
6996
6997 out:
6998         btrfs_release_path(&path);
6999
7000         if (!ret &&
7001             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7002                 fprintf(stderr, "There are still entries left in the space "
7003                         "cache\n");
7004                 ret = -EINVAL;
7005         }
7006
7007         return ret;
7008 }
7009
7010 static int check_space_cache(struct btrfs_root *root)
7011 {
7012         struct btrfs_block_group_cache *cache;
7013         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7014         int ret;
7015         int error = 0;
7016
7017         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7018             btrfs_super_generation(root->fs_info->super_copy) !=
7019             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7020                 printf("cache and super generation don't match, space cache "
7021                        "will be invalidated\n");
7022                 return 0;
7023         }
7024
7025         if (ctx.progress_enabled) {
7026                 ctx.tp = TASK_FREE_SPACE;
7027                 task_start(ctx.info);
7028         }
7029
7030         while (1) {
7031                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7032                 if (!cache)
7033                         break;
7034
7035                 start = cache->key.objectid + cache->key.offset;
7036                 if (!cache->free_space_ctl) {
7037                         if (btrfs_init_free_space_ctl(cache,
7038                                                       root->sectorsize)) {
7039                                 ret = -ENOMEM;
7040                                 break;
7041                         }
7042                 } else {
7043                         btrfs_remove_free_space_cache(cache);
7044                 }
7045
7046                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7047                         ret = exclude_super_stripes(root, cache);
7048                         if (ret) {
7049                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7050                                         strerror(-ret));
7051                                 error++;
7052                                 continue;
7053                         }
7054                         ret = load_free_space_tree(root->fs_info, cache);
7055                         free_excluded_extents(root, cache);
7056                         if (ret < 0) {
7057                                 fprintf(stderr, "could not load free space tree: %s\n",
7058                                         strerror(-ret));
7059                                 error++;
7060                                 continue;
7061                         }
7062                         error += ret;
7063                 } else {
7064                         ret = load_free_space_cache(root->fs_info, cache);
7065                         if (!ret)
7066                                 continue;
7067                 }
7068
7069                 ret = verify_space_cache(root, cache);
7070                 if (ret) {
7071                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7072                                 cache->key.objectid);
7073                         error++;
7074                 }
7075         }
7076
7077         task_stop(ctx.info);
7078
7079         return error ? -EINVAL : 0;
7080 }
7081
7082 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7083                         u64 num_bytes, unsigned long leaf_offset,
7084                         struct extent_buffer *eb) {
7085
7086         u64 offset = 0;
7087         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7088         char *data;
7089         unsigned long csum_offset;
7090         u32 csum;
7091         u32 csum_expected;
7092         u64 read_len;
7093         u64 data_checked = 0;
7094         u64 tmp;
7095         int ret = 0;
7096         int mirror;
7097         int num_copies;
7098
7099         if (num_bytes % root->sectorsize)
7100                 return -EINVAL;
7101
7102         data = malloc(num_bytes);
7103         if (!data)
7104                 return -ENOMEM;
7105
7106         while (offset < num_bytes) {
7107                 mirror = 0;
7108 again:
7109                 read_len = num_bytes - offset;
7110                 /* read as much space once a time */
7111                 ret = read_extent_data(root, data + offset,
7112                                 bytenr + offset, &read_len, mirror);
7113                 if (ret)
7114                         goto out;
7115                 data_checked = 0;
7116                 /* verify every 4k data's checksum */
7117                 while (data_checked < read_len) {
7118                         csum = ~(u32)0;
7119                         tmp = offset + data_checked;
7120
7121                         csum = btrfs_csum_data((char *)data + tmp,
7122                                                csum, root->sectorsize);
7123                         btrfs_csum_final(csum, (u8 *)&csum);
7124
7125                         csum_offset = leaf_offset +
7126                                  tmp / root->sectorsize * csum_size;
7127                         read_extent_buffer(eb, (char *)&csum_expected,
7128                                            csum_offset, csum_size);
7129                         /* try another mirror */
7130                         if (csum != csum_expected) {
7131                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7132                                                 mirror, bytenr + tmp,
7133                                                 csum, csum_expected);
7134                                 num_copies = btrfs_num_copies(
7135                                                 &root->fs_info->mapping_tree,
7136                                                 bytenr, num_bytes);
7137                                 if (mirror < num_copies - 1) {
7138                                         mirror += 1;
7139                                         goto again;
7140                                 }
7141                         }
7142                         data_checked += root->sectorsize;
7143                 }
7144                 offset += read_len;
7145         }
7146 out:
7147         free(data);
7148         return ret;
7149 }
7150
7151 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7152                                u64 num_bytes)
7153 {
7154         struct btrfs_path path;
7155         struct extent_buffer *leaf;
7156         struct btrfs_key key;
7157         int ret;
7158
7159         btrfs_init_path(&path);
7160         key.objectid = bytenr;
7161         key.type = BTRFS_EXTENT_ITEM_KEY;
7162         key.offset = (u64)-1;
7163
7164 again:
7165         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7166                                 0, 0);
7167         if (ret < 0) {
7168                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7169                 btrfs_release_path(&path);
7170                 return ret;
7171         } else if (ret) {
7172                 if (path.slots[0] > 0) {
7173                         path.slots[0]--;
7174                 } else {
7175                         ret = btrfs_prev_leaf(root, &path);
7176                         if (ret < 0) {
7177                                 goto out;
7178                         } else if (ret > 0) {
7179                                 ret = 0;
7180                                 goto out;
7181                         }
7182                 }
7183         }
7184
7185         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7186
7187         /*
7188          * Block group items come before extent items if they have the same
7189          * bytenr, so walk back one more just in case.  Dear future traveller,
7190          * first congrats on mastering time travel.  Now if it's not too much
7191          * trouble could you go back to 2006 and tell Chris to make the
7192          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7193          * EXTENT_ITEM_KEY please?
7194          */
7195         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7196                 if (path.slots[0] > 0) {
7197                         path.slots[0]--;
7198                 } else {
7199                         ret = btrfs_prev_leaf(root, &path);
7200                         if (ret < 0) {
7201                                 goto out;
7202                         } else if (ret > 0) {
7203                                 ret = 0;
7204                                 goto out;
7205                         }
7206                 }
7207                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7208         }
7209
7210         while (num_bytes) {
7211                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7212                         ret = btrfs_next_leaf(root, &path);
7213                         if (ret < 0) {
7214                                 fprintf(stderr, "Error going to next leaf "
7215                                         "%d\n", ret);
7216                                 btrfs_release_path(&path);
7217                                 return ret;
7218                         } else if (ret) {
7219                                 break;
7220                         }
7221                 }
7222                 leaf = path.nodes[0];
7223                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7224                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7225                         path.slots[0]++;
7226                         continue;
7227                 }
7228                 if (key.objectid + key.offset < bytenr) {
7229                         path.slots[0]++;
7230                         continue;
7231                 }
7232                 if (key.objectid > bytenr + num_bytes)
7233                         break;
7234
7235                 if (key.objectid == bytenr) {
7236                         if (key.offset >= num_bytes) {
7237                                 num_bytes = 0;
7238                                 break;
7239                         }
7240                         num_bytes -= key.offset;
7241                         bytenr += key.offset;
7242                 } else if (key.objectid < bytenr) {
7243                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7244                                 num_bytes = 0;
7245                                 break;
7246                         }
7247                         num_bytes = (bytenr + num_bytes) -
7248                                 (key.objectid + key.offset);
7249                         bytenr = key.objectid + key.offset;
7250                 } else {
7251                         if (key.objectid + key.offset < bytenr + num_bytes) {
7252                                 u64 new_start = key.objectid + key.offset;
7253                                 u64 new_bytes = bytenr + num_bytes - new_start;
7254
7255                                 /*
7256                                  * Weird case, the extent is in the middle of
7257                                  * our range, we'll have to search one side
7258                                  * and then the other.  Not sure if this happens
7259                                  * in real life, but no harm in coding it up
7260                                  * anyway just in case.
7261                                  */
7262                                 btrfs_release_path(&path);
7263                                 ret = check_extent_exists(root, new_start,
7264                                                           new_bytes);
7265                                 if (ret) {
7266                                         fprintf(stderr, "Right section didn't "
7267                                                 "have a record\n");
7268                                         break;
7269                                 }
7270                                 num_bytes = key.objectid - bytenr;
7271                                 goto again;
7272                         }
7273                         num_bytes = key.objectid - bytenr;
7274                 }
7275                 path.slots[0]++;
7276         }
7277         ret = 0;
7278
7279 out:
7280         if (num_bytes && !ret) {
7281                 fprintf(stderr, "There are no extents for csum range "
7282                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7283                 ret = 1;
7284         }
7285
7286         btrfs_release_path(&path);
7287         return ret;
7288 }
7289
7290 static int check_csums(struct btrfs_root *root)
7291 {
7292         struct btrfs_path path;
7293         struct extent_buffer *leaf;
7294         struct btrfs_key key;
7295         u64 offset = 0, num_bytes = 0;
7296         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7297         int errors = 0;
7298         int ret;
7299         u64 data_len;
7300         unsigned long leaf_offset;
7301
7302         root = root->fs_info->csum_root;
7303         if (!extent_buffer_uptodate(root->node)) {
7304                 fprintf(stderr, "No valid csum tree found\n");
7305                 return -ENOENT;
7306         }
7307
7308         btrfs_init_path(&path);
7309         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7310         key.type = BTRFS_EXTENT_CSUM_KEY;
7311         key.offset = 0;
7312         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7313         if (ret < 0) {
7314                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7315                 btrfs_release_path(&path);
7316                 return ret;
7317         }
7318
7319         if (ret > 0 && path.slots[0])
7320                 path.slots[0]--;
7321         ret = 0;
7322
7323         while (1) {
7324                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7325                         ret = btrfs_next_leaf(root, &path);
7326                         if (ret < 0) {
7327                                 fprintf(stderr, "Error going to next leaf "
7328                                         "%d\n", ret);
7329                                 break;
7330                         }
7331                         if (ret)
7332                                 break;
7333                 }
7334                 leaf = path.nodes[0];
7335
7336                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7337                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7338                         path.slots[0]++;
7339                         continue;
7340                 }
7341
7342                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7343                               csum_size) * root->sectorsize;
7344                 if (!check_data_csum)
7345                         goto skip_csum_check;
7346                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7347                 ret = check_extent_csums(root, key.offset, data_len,
7348                                          leaf_offset, leaf);
7349                 if (ret)
7350                         break;
7351 skip_csum_check:
7352                 if (!num_bytes) {
7353                         offset = key.offset;
7354                 } else if (key.offset != offset + num_bytes) {
7355                         ret = check_extent_exists(root, offset, num_bytes);
7356                         if (ret) {
7357                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7358                                         "there is no extent record\n",
7359                                         offset, offset+num_bytes);
7360                                 errors++;
7361                         }
7362                         offset = key.offset;
7363                         num_bytes = 0;
7364                 }
7365                 num_bytes += data_len;
7366                 path.slots[0]++;
7367         }
7368
7369         btrfs_release_path(&path);
7370         return errors;
7371 }
7372
7373 static int is_dropped_key(struct btrfs_key *key,
7374                           struct btrfs_key *drop_key) {
7375         if (key->objectid < drop_key->objectid)
7376                 return 1;
7377         else if (key->objectid == drop_key->objectid) {
7378                 if (key->type < drop_key->type)
7379                         return 1;
7380                 else if (key->type == drop_key->type) {
7381                         if (key->offset < drop_key->offset)
7382                                 return 1;
7383                 }
7384         }
7385         return 0;
7386 }
7387
7388 /*
7389  * Here are the rules for FULL_BACKREF.
7390  *
7391  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7392  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7393  *      FULL_BACKREF set.
7394  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7395  *    if it happened after the relocation occurred since we'll have dropped the
7396  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7397  *    have no real way to know for sure.
7398  *
7399  * We process the blocks one root at a time, and we start from the lowest root
7400  * objectid and go to the highest.  So we can just lookup the owner backref for
7401  * the record and if we don't find it then we know it doesn't exist and we have
7402  * a FULL BACKREF.
7403  *
7404  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7405  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7406  * be set or not and then we can check later once we've gathered all the refs.
7407  */
7408 static int calc_extent_flag(struct btrfs_root *root,
7409                            struct cache_tree *extent_cache,
7410                            struct extent_buffer *buf,
7411                            struct root_item_record *ri,
7412                            u64 *flags)
7413 {
7414         struct extent_record *rec;
7415         struct cache_extent *cache;
7416         struct tree_backref *tback;
7417         u64 owner = 0;
7418
7419         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7420         /* we have added this extent before */
7421         if (!cache)
7422                 return -ENOENT;
7423
7424         rec = container_of(cache, struct extent_record, cache);
7425
7426         /*
7427          * Except file/reloc tree, we can not have
7428          * FULL BACKREF MODE
7429          */
7430         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7431                 goto normal;
7432         /*
7433          * root node
7434          */
7435         if (buf->start == ri->bytenr)
7436                 goto normal;
7437
7438         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7439                 goto full_backref;
7440
7441         owner = btrfs_header_owner(buf);
7442         if (owner == ri->objectid)
7443                 goto normal;
7444
7445         tback = find_tree_backref(rec, 0, owner);
7446         if (!tback)
7447                 goto full_backref;
7448 normal:
7449         *flags = 0;
7450         if (rec->flag_block_full_backref != FLAG_UNSET &&
7451             rec->flag_block_full_backref != 0)
7452                 rec->bad_full_backref = 1;
7453         return 0;
7454 full_backref:
7455         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7456         if (rec->flag_block_full_backref != FLAG_UNSET &&
7457             rec->flag_block_full_backref != 1)
7458                 rec->bad_full_backref = 1;
7459         return 0;
7460 }
7461
7462 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7463 {
7464         fprintf(stderr, "Invalid key type(");
7465         print_key_type(stderr, 0, key_type);
7466         fprintf(stderr, ") found in root(");
7467         print_objectid(stderr, rootid, 0);
7468         fprintf(stderr, ")\n");
7469 }
7470
7471 /*
7472  * Check if the key is valid with its extent buffer.
7473  *
7474  * This is a early check in case invalid key exists in a extent buffer
7475  * This is not comprehensive yet, but should prevent wrong key/item passed
7476  * further
7477  */
7478 static int check_type_with_root(u64 rootid, u8 key_type)
7479 {
7480         switch (key_type) {
7481         /* Only valid in chunk tree */
7482         case BTRFS_DEV_ITEM_KEY:
7483         case BTRFS_CHUNK_ITEM_KEY:
7484                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7485                         goto err;
7486                 break;
7487         /* valid in csum and log tree */
7488         case BTRFS_CSUM_TREE_OBJECTID:
7489                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7490                       is_fstree(rootid)))
7491                         goto err;
7492                 break;
7493         case BTRFS_EXTENT_ITEM_KEY:
7494         case BTRFS_METADATA_ITEM_KEY:
7495         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7496                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7497                         goto err;
7498                 break;
7499         case BTRFS_ROOT_ITEM_KEY:
7500                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7501                         goto err;
7502                 break;
7503         case BTRFS_DEV_EXTENT_KEY:
7504                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7505                         goto err;
7506                 break;
7507         }
7508         return 0;
7509 err:
7510         report_mismatch_key_root(key_type, rootid);
7511         return -EINVAL;
7512 }
7513
7514 static int run_next_block(struct btrfs_root *root,
7515                           struct block_info *bits,
7516                           int bits_nr,
7517                           u64 *last,
7518                           struct cache_tree *pending,
7519                           struct cache_tree *seen,
7520                           struct cache_tree *reada,
7521                           struct cache_tree *nodes,
7522                           struct cache_tree *extent_cache,
7523                           struct cache_tree *chunk_cache,
7524                           struct rb_root *dev_cache,
7525                           struct block_group_tree *block_group_cache,
7526                           struct device_extent_tree *dev_extent_cache,
7527                           struct root_item_record *ri)
7528 {
7529         struct extent_buffer *buf;
7530         struct extent_record *rec = NULL;
7531         u64 bytenr;
7532         u32 size;
7533         u64 parent;
7534         u64 owner;
7535         u64 flags;
7536         u64 ptr;
7537         u64 gen = 0;
7538         int ret = 0;
7539         int i;
7540         int nritems;
7541         struct btrfs_key key;
7542         struct cache_extent *cache;
7543         int reada_bits;
7544
7545         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7546                                     bits_nr, &reada_bits);
7547         if (nritems == 0)
7548                 return 1;
7549
7550         if (!reada_bits) {
7551                 for(i = 0; i < nritems; i++) {
7552                         ret = add_cache_extent(reada, bits[i].start,
7553                                                bits[i].size);
7554                         if (ret == -EEXIST)
7555                                 continue;
7556
7557                         /* fixme, get the parent transid */
7558                         readahead_tree_block(root, bits[i].start,
7559                                              bits[i].size, 0);
7560                 }
7561         }
7562         *last = bits[0].start;
7563         bytenr = bits[0].start;
7564         size = bits[0].size;
7565
7566         cache = lookup_cache_extent(pending, bytenr, size);
7567         if (cache) {
7568                 remove_cache_extent(pending, cache);
7569                 free(cache);
7570         }
7571         cache = lookup_cache_extent(reada, bytenr, size);
7572         if (cache) {
7573                 remove_cache_extent(reada, cache);
7574                 free(cache);
7575         }
7576         cache = lookup_cache_extent(nodes, bytenr, size);
7577         if (cache) {
7578                 remove_cache_extent(nodes, cache);
7579                 free(cache);
7580         }
7581         cache = lookup_cache_extent(extent_cache, bytenr, size);
7582         if (cache) {
7583                 rec = container_of(cache, struct extent_record, cache);
7584                 gen = rec->parent_generation;
7585         }
7586
7587         /* fixme, get the real parent transid */
7588         buf = read_tree_block(root, bytenr, size, gen);
7589         if (!extent_buffer_uptodate(buf)) {
7590                 record_bad_block_io(root->fs_info,
7591                                     extent_cache, bytenr, size);
7592                 goto out;
7593         }
7594
7595         nritems = btrfs_header_nritems(buf);
7596
7597         flags = 0;
7598         if (!init_extent_tree) {
7599                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7600                                        btrfs_header_level(buf), 1, NULL,
7601                                        &flags);
7602                 if (ret < 0) {
7603                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7604                         if (ret < 0) {
7605                                 fprintf(stderr, "Couldn't calc extent flags\n");
7606                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7607                         }
7608                 }
7609         } else {
7610                 flags = 0;
7611                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7612                 if (ret < 0) {
7613                         fprintf(stderr, "Couldn't calc extent flags\n");
7614                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7615                 }
7616         }
7617
7618         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7619                 if (ri != NULL &&
7620                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7621                     ri->objectid == btrfs_header_owner(buf)) {
7622                         /*
7623                          * Ok we got to this block from it's original owner and
7624                          * we have FULL_BACKREF set.  Relocation can leave
7625                          * converted blocks over so this is altogether possible,
7626                          * however it's not possible if the generation > the
7627                          * last snapshot, so check for this case.
7628                          */
7629                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7630                             btrfs_header_generation(buf) > ri->last_snapshot) {
7631                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7632                                 rec->bad_full_backref = 1;
7633                         }
7634                 }
7635         } else {
7636                 if (ri != NULL &&
7637                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7638                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7639                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7640                         rec->bad_full_backref = 1;
7641                 }
7642         }
7643
7644         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7645                 rec->flag_block_full_backref = 1;
7646                 parent = bytenr;
7647                 owner = 0;
7648         } else {
7649                 rec->flag_block_full_backref = 0;
7650                 parent = 0;
7651                 owner = btrfs_header_owner(buf);
7652         }
7653
7654         ret = check_block(root, extent_cache, buf, flags);
7655         if (ret)
7656                 goto out;
7657
7658         if (btrfs_is_leaf(buf)) {
7659                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7660                 for (i = 0; i < nritems; i++) {
7661                         struct btrfs_file_extent_item *fi;
7662                         btrfs_item_key_to_cpu(buf, &key, i);
7663                         /*
7664                          * Check key type against the leaf owner.
7665                          * Could filter quite a lot of early error if
7666                          * owner is correct
7667                          */
7668                         if (check_type_with_root(btrfs_header_owner(buf),
7669                                                  key.type)) {
7670                                 fprintf(stderr, "ignoring invalid key\n");
7671                                 continue;
7672                         }
7673                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7674                                 process_extent_item(root, extent_cache, buf,
7675                                                     i);
7676                                 continue;
7677                         }
7678                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7679                                 process_extent_item(root, extent_cache, buf,
7680                                                     i);
7681                                 continue;
7682                         }
7683                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7684                                 total_csum_bytes +=
7685                                         btrfs_item_size_nr(buf, i);
7686                                 continue;
7687                         }
7688                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7689                                 process_chunk_item(chunk_cache, &key, buf, i);
7690                                 continue;
7691                         }
7692                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7693                                 process_device_item(dev_cache, &key, buf, i);
7694                                 continue;
7695                         }
7696                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7697                                 process_block_group_item(block_group_cache,
7698                                         &key, buf, i);
7699                                 continue;
7700                         }
7701                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7702                                 process_device_extent_item(dev_extent_cache,
7703                                         &key, buf, i);
7704                                 continue;
7705
7706                         }
7707                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7708 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7709                                 process_extent_ref_v0(extent_cache, buf, i);
7710 #else
7711                                 BUG();
7712 #endif
7713                                 continue;
7714                         }
7715
7716                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7717                                 ret = add_tree_backref(extent_cache,
7718                                                 key.objectid, 0, key.offset, 0);
7719                                 if (ret < 0)
7720                                         error("add_tree_backref failed: %s",
7721                                               strerror(-ret));
7722                                 continue;
7723                         }
7724                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7725                                 ret = add_tree_backref(extent_cache,
7726                                                 key.objectid, key.offset, 0, 0);
7727                                 if (ret < 0)
7728                                         error("add_tree_backref failed: %s",
7729                                               strerror(-ret));
7730                                 continue;
7731                         }
7732                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7733                                 struct btrfs_extent_data_ref *ref;
7734                                 ref = btrfs_item_ptr(buf, i,
7735                                                 struct btrfs_extent_data_ref);
7736                                 add_data_backref(extent_cache,
7737                                         key.objectid, 0,
7738                                         btrfs_extent_data_ref_root(buf, ref),
7739                                         btrfs_extent_data_ref_objectid(buf,
7740                                                                        ref),
7741                                         btrfs_extent_data_ref_offset(buf, ref),
7742                                         btrfs_extent_data_ref_count(buf, ref),
7743                                         0, root->sectorsize);
7744                                 continue;
7745                         }
7746                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7747                                 struct btrfs_shared_data_ref *ref;
7748                                 ref = btrfs_item_ptr(buf, i,
7749                                                 struct btrfs_shared_data_ref);
7750                                 add_data_backref(extent_cache,
7751                                         key.objectid, key.offset, 0, 0, 0,
7752                                         btrfs_shared_data_ref_count(buf, ref),
7753                                         0, root->sectorsize);
7754                                 continue;
7755                         }
7756                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7757                                 struct bad_item *bad;
7758
7759                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7760                                         continue;
7761                                 if (!owner)
7762                                         continue;
7763                                 bad = malloc(sizeof(struct bad_item));
7764                                 if (!bad)
7765                                         continue;
7766                                 INIT_LIST_HEAD(&bad->list);
7767                                 memcpy(&bad->key, &key,
7768                                        sizeof(struct btrfs_key));
7769                                 bad->root_id = owner;
7770                                 list_add_tail(&bad->list, &delete_items);
7771                                 continue;
7772                         }
7773                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7774                                 continue;
7775                         fi = btrfs_item_ptr(buf, i,
7776                                             struct btrfs_file_extent_item);
7777                         if (btrfs_file_extent_type(buf, fi) ==
7778                             BTRFS_FILE_EXTENT_INLINE)
7779                                 continue;
7780                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7781                                 continue;
7782
7783                         data_bytes_allocated +=
7784                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7785                         if (data_bytes_allocated < root->sectorsize) {
7786                                 abort();
7787                         }
7788                         data_bytes_referenced +=
7789                                 btrfs_file_extent_num_bytes(buf, fi);
7790                         add_data_backref(extent_cache,
7791                                 btrfs_file_extent_disk_bytenr(buf, fi),
7792                                 parent, owner, key.objectid, key.offset -
7793                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7794                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7795                 }
7796         } else {
7797                 int level;
7798                 struct btrfs_key first_key;
7799
7800                 first_key.objectid = 0;
7801
7802                 if (nritems > 0)
7803                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7804                 level = btrfs_header_level(buf);
7805                 for (i = 0; i < nritems; i++) {
7806                         struct extent_record tmpl;
7807
7808                         ptr = btrfs_node_blockptr(buf, i);
7809                         size = root->nodesize;
7810                         btrfs_node_key_to_cpu(buf, &key, i);
7811                         if (ri != NULL) {
7812                                 if ((level == ri->drop_level)
7813                                     && is_dropped_key(&key, &ri->drop_key)) {
7814                                         continue;
7815                                 }
7816                         }
7817
7818                         memset(&tmpl, 0, sizeof(tmpl));
7819                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7820                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7821                         tmpl.start = ptr;
7822                         tmpl.nr = size;
7823                         tmpl.refs = 1;
7824                         tmpl.metadata = 1;
7825                         tmpl.max_size = size;
7826                         ret = add_extent_rec(extent_cache, &tmpl);
7827                         if (ret < 0)
7828                                 goto out;
7829
7830                         ret = add_tree_backref(extent_cache, ptr, parent,
7831                                         owner, 1);
7832                         if (ret < 0) {
7833                                 error("add_tree_backref failed: %s",
7834                                       strerror(-ret));
7835                                 continue;
7836                         }
7837
7838                         if (level > 1) {
7839                                 add_pending(nodes, seen, ptr, size);
7840                         } else {
7841                                 add_pending(pending, seen, ptr, size);
7842                         }
7843                 }
7844                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7845                                       nritems) * sizeof(struct btrfs_key_ptr);
7846         }
7847         total_btree_bytes += buf->len;
7848         if (fs_root_objectid(btrfs_header_owner(buf)))
7849                 total_fs_tree_bytes += buf->len;
7850         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7851                 total_extent_tree_bytes += buf->len;
7852         if (!found_old_backref &&
7853             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7854             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7855             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7856                 found_old_backref = 1;
7857 out:
7858         free_extent_buffer(buf);
7859         return ret;
7860 }
7861
7862 static int add_root_to_pending(struct extent_buffer *buf,
7863                                struct cache_tree *extent_cache,
7864                                struct cache_tree *pending,
7865                                struct cache_tree *seen,
7866                                struct cache_tree *nodes,
7867                                u64 objectid)
7868 {
7869         struct extent_record tmpl;
7870         int ret;
7871
7872         if (btrfs_header_level(buf) > 0)
7873                 add_pending(nodes, seen, buf->start, buf->len);
7874         else
7875                 add_pending(pending, seen, buf->start, buf->len);
7876
7877         memset(&tmpl, 0, sizeof(tmpl));
7878         tmpl.start = buf->start;
7879         tmpl.nr = buf->len;
7880         tmpl.is_root = 1;
7881         tmpl.refs = 1;
7882         tmpl.metadata = 1;
7883         tmpl.max_size = buf->len;
7884         add_extent_rec(extent_cache, &tmpl);
7885
7886         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7887             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7888                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7889                                 0, 1);
7890         else
7891                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7892                                 1);
7893         return ret;
7894 }
7895
7896 /* as we fix the tree, we might be deleting blocks that
7897  * we're tracking for repair.  This hook makes sure we
7898  * remove any backrefs for blocks as we are fixing them.
7899  */
7900 static int free_extent_hook(struct btrfs_trans_handle *trans,
7901                             struct btrfs_root *root,
7902                             u64 bytenr, u64 num_bytes, u64 parent,
7903                             u64 root_objectid, u64 owner, u64 offset,
7904                             int refs_to_drop)
7905 {
7906         struct extent_record *rec;
7907         struct cache_extent *cache;
7908         int is_data;
7909         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7910
7911         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7912         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7913         if (!cache)
7914                 return 0;
7915
7916         rec = container_of(cache, struct extent_record, cache);
7917         if (is_data) {
7918                 struct data_backref *back;
7919                 back = find_data_backref(rec, parent, root_objectid, owner,
7920                                          offset, 1, bytenr, num_bytes);
7921                 if (!back)
7922                         goto out;
7923                 if (back->node.found_ref) {
7924                         back->found_ref -= refs_to_drop;
7925                         if (rec->refs)
7926                                 rec->refs -= refs_to_drop;
7927                 }
7928                 if (back->node.found_extent_tree) {
7929                         back->num_refs -= refs_to_drop;
7930                         if (rec->extent_item_refs)
7931                                 rec->extent_item_refs -= refs_to_drop;
7932                 }
7933                 if (back->found_ref == 0)
7934                         back->node.found_ref = 0;
7935                 if (back->num_refs == 0)
7936                         back->node.found_extent_tree = 0;
7937
7938                 if (!back->node.found_extent_tree && back->node.found_ref) {
7939                         list_del(&back->node.list);
7940                         free(back);
7941                 }
7942         } else {
7943                 struct tree_backref *back;
7944                 back = find_tree_backref(rec, parent, root_objectid);
7945                 if (!back)
7946                         goto out;
7947                 if (back->node.found_ref) {
7948                         if (rec->refs)
7949                                 rec->refs--;
7950                         back->node.found_ref = 0;
7951                 }
7952                 if (back->node.found_extent_tree) {
7953                         if (rec->extent_item_refs)
7954                                 rec->extent_item_refs--;
7955                         back->node.found_extent_tree = 0;
7956                 }
7957                 if (!back->node.found_extent_tree && back->node.found_ref) {
7958                         list_del(&back->node.list);
7959                         free(back);
7960                 }
7961         }
7962         maybe_free_extent_rec(extent_cache, rec);
7963 out:
7964         return 0;
7965 }
7966
7967 static int delete_extent_records(struct btrfs_trans_handle *trans,
7968                                  struct btrfs_root *root,
7969                                  struct btrfs_path *path,
7970                                  u64 bytenr)
7971 {
7972         struct btrfs_key key;
7973         struct btrfs_key found_key;
7974         struct extent_buffer *leaf;
7975         int ret;
7976         int slot;
7977
7978
7979         key.objectid = bytenr;
7980         key.type = (u8)-1;
7981         key.offset = (u64)-1;
7982
7983         while(1) {
7984                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7985                                         &key, path, 0, 1);
7986                 if (ret < 0)
7987                         break;
7988
7989                 if (ret > 0) {
7990                         ret = 0;
7991                         if (path->slots[0] == 0)
7992                                 break;
7993                         path->slots[0]--;
7994                 }
7995                 ret = 0;
7996
7997                 leaf = path->nodes[0];
7998                 slot = path->slots[0];
7999
8000                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8001                 if (found_key.objectid != bytenr)
8002                         break;
8003
8004                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8005                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8006                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8007                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8008                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8009                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8010                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8011                         btrfs_release_path(path);
8012                         if (found_key.type == 0) {
8013                                 if (found_key.offset == 0)
8014                                         break;
8015                                 key.offset = found_key.offset - 1;
8016                                 key.type = found_key.type;
8017                         }
8018                         key.type = found_key.type - 1;
8019                         key.offset = (u64)-1;
8020                         continue;
8021                 }
8022
8023                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8024                         found_key.objectid, found_key.type, found_key.offset);
8025
8026                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8027                 if (ret)
8028                         break;
8029                 btrfs_release_path(path);
8030
8031                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8032                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8033                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8034                                 found_key.offset : root->nodesize;
8035
8036                         ret = btrfs_update_block_group(trans, root, bytenr,
8037                                                        bytes, 0, 0);
8038                         if (ret)
8039                                 break;
8040                 }
8041         }
8042
8043         btrfs_release_path(path);
8044         return ret;
8045 }
8046
8047 /*
8048  * for a single backref, this will allocate a new extent
8049  * and add the backref to it.
8050  */
8051 static int record_extent(struct btrfs_trans_handle *trans,
8052                          struct btrfs_fs_info *info,
8053                          struct btrfs_path *path,
8054                          struct extent_record *rec,
8055                          struct extent_backref *back,
8056                          int allocated, u64 flags)
8057 {
8058         int ret = 0;
8059         struct btrfs_root *extent_root = info->extent_root;
8060         struct extent_buffer *leaf;
8061         struct btrfs_key ins_key;
8062         struct btrfs_extent_item *ei;
8063         struct data_backref *dback;
8064         struct btrfs_tree_block_info *bi;
8065
8066         if (!back->is_data)
8067                 rec->max_size = max_t(u64, rec->max_size,
8068                                     info->extent_root->nodesize);
8069
8070         if (!allocated) {
8071                 u32 item_size = sizeof(*ei);
8072
8073                 if (!back->is_data)
8074                         item_size += sizeof(*bi);
8075
8076                 ins_key.objectid = rec->start;
8077                 ins_key.offset = rec->max_size;
8078                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8079
8080                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8081                                         &ins_key, item_size);
8082                 if (ret)
8083                         goto fail;
8084
8085                 leaf = path->nodes[0];
8086                 ei = btrfs_item_ptr(leaf, path->slots[0],
8087                                     struct btrfs_extent_item);
8088
8089                 btrfs_set_extent_refs(leaf, ei, 0);
8090                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8091
8092                 if (back->is_data) {
8093                         btrfs_set_extent_flags(leaf, ei,
8094                                                BTRFS_EXTENT_FLAG_DATA);
8095                 } else {
8096                         struct btrfs_disk_key copy_key;;
8097
8098                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8099                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8100                                              sizeof(*bi));
8101
8102                         btrfs_set_disk_key_objectid(&copy_key,
8103                                                     rec->info_objectid);
8104                         btrfs_set_disk_key_type(&copy_key, 0);
8105                         btrfs_set_disk_key_offset(&copy_key, 0);
8106
8107                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8108                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8109
8110                         btrfs_set_extent_flags(leaf, ei,
8111                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8112                 }
8113
8114                 btrfs_mark_buffer_dirty(leaf);
8115                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8116                                                rec->max_size, 1, 0);
8117                 if (ret)
8118                         goto fail;
8119                 btrfs_release_path(path);
8120         }
8121
8122         if (back->is_data) {
8123                 u64 parent;
8124                 int i;
8125
8126                 dback = to_data_backref(back);
8127                 if (back->full_backref)
8128                         parent = dback->parent;
8129                 else
8130                         parent = 0;
8131
8132                 for (i = 0; i < dback->found_ref; i++) {
8133                         /* if parent != 0, we're doing a full backref
8134                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8135                          * just makes the backref allocator create a data
8136                          * backref
8137                          */
8138                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8139                                                    rec->start, rec->max_size,
8140                                                    parent,
8141                                                    dback->root,
8142                                                    parent ?
8143                                                    BTRFS_FIRST_FREE_OBJECTID :
8144                                                    dback->owner,
8145                                                    dback->offset);
8146                         if (ret)
8147                                 break;
8148                 }
8149                 fprintf(stderr, "adding new data backref"
8150                                 " on %llu %s %llu owner %llu"
8151                                 " offset %llu found %d\n",
8152                                 (unsigned long long)rec->start,
8153                                 back->full_backref ?
8154                                 "parent" : "root",
8155                                 back->full_backref ?
8156                                 (unsigned long long)parent :
8157                                 (unsigned long long)dback->root,
8158                                 (unsigned long long)dback->owner,
8159                                 (unsigned long long)dback->offset,
8160                                 dback->found_ref);
8161         } else {
8162                 u64 parent;
8163                 struct tree_backref *tback;
8164
8165                 tback = to_tree_backref(back);
8166                 if (back->full_backref)
8167                         parent = tback->parent;
8168                 else
8169                         parent = 0;
8170
8171                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8172                                            rec->start, rec->max_size,
8173                                            parent, tback->root, 0, 0);
8174                 fprintf(stderr, "adding new tree backref on "
8175                         "start %llu len %llu parent %llu root %llu\n",
8176                         rec->start, rec->max_size, parent, tback->root);
8177         }
8178 fail:
8179         btrfs_release_path(path);
8180         return ret;
8181 }
8182
8183 static struct extent_entry *find_entry(struct list_head *entries,
8184                                        u64 bytenr, u64 bytes)
8185 {
8186         struct extent_entry *entry = NULL;
8187
8188         list_for_each_entry(entry, entries, list) {
8189                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8190                         return entry;
8191         }
8192
8193         return NULL;
8194 }
8195
8196 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8197 {
8198         struct extent_entry *entry, *best = NULL, *prev = NULL;
8199
8200         list_for_each_entry(entry, entries, list) {
8201                 /*
8202                  * If there are as many broken entries as entries then we know
8203                  * not to trust this particular entry.
8204                  */
8205                 if (entry->broken == entry->count)
8206                         continue;
8207
8208                 /*
8209                  * Special case, when there are only two entries and 'best' is
8210                  * the first one
8211                  */
8212                 if (!prev) {
8213                         best = entry;
8214                         prev = entry;
8215                         continue;
8216                 }
8217
8218                 /*
8219                  * If our current entry == best then we can't be sure our best
8220                  * is really the best, so we need to keep searching.
8221                  */
8222                 if (best && best->count == entry->count) {
8223                         prev = entry;
8224                         best = NULL;
8225                         continue;
8226                 }
8227
8228                 /* Prev == entry, not good enough, have to keep searching */
8229                 if (!prev->broken && prev->count == entry->count)
8230                         continue;
8231
8232                 if (!best)
8233                         best = (prev->count > entry->count) ? prev : entry;
8234                 else if (best->count < entry->count)
8235                         best = entry;
8236                 prev = entry;
8237         }
8238
8239         return best;
8240 }
8241
8242 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8243                       struct data_backref *dback, struct extent_entry *entry)
8244 {
8245         struct btrfs_trans_handle *trans;
8246         struct btrfs_root *root;
8247         struct btrfs_file_extent_item *fi;
8248         struct extent_buffer *leaf;
8249         struct btrfs_key key;
8250         u64 bytenr, bytes;
8251         int ret, err;
8252
8253         key.objectid = dback->root;
8254         key.type = BTRFS_ROOT_ITEM_KEY;
8255         key.offset = (u64)-1;
8256         root = btrfs_read_fs_root(info, &key);
8257         if (IS_ERR(root)) {
8258                 fprintf(stderr, "Couldn't find root for our ref\n");
8259                 return -EINVAL;
8260         }
8261
8262         /*
8263          * The backref points to the original offset of the extent if it was
8264          * split, so we need to search down to the offset we have and then walk
8265          * forward until we find the backref we're looking for.
8266          */
8267         key.objectid = dback->owner;
8268         key.type = BTRFS_EXTENT_DATA_KEY;
8269         key.offset = dback->offset;
8270         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8271         if (ret < 0) {
8272                 fprintf(stderr, "Error looking up ref %d\n", ret);
8273                 return ret;
8274         }
8275
8276         while (1) {
8277                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8278                         ret = btrfs_next_leaf(root, path);
8279                         if (ret) {
8280                                 fprintf(stderr, "Couldn't find our ref, next\n");
8281                                 return -EINVAL;
8282                         }
8283                 }
8284                 leaf = path->nodes[0];
8285                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8286                 if (key.objectid != dback->owner ||
8287                     key.type != BTRFS_EXTENT_DATA_KEY) {
8288                         fprintf(stderr, "Couldn't find our ref, search\n");
8289                         return -EINVAL;
8290                 }
8291                 fi = btrfs_item_ptr(leaf, path->slots[0],
8292                                     struct btrfs_file_extent_item);
8293                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8294                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8295
8296                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8297                         break;
8298                 path->slots[0]++;
8299         }
8300
8301         btrfs_release_path(path);
8302
8303         trans = btrfs_start_transaction(root, 1);
8304         if (IS_ERR(trans))
8305                 return PTR_ERR(trans);
8306
8307         /*
8308          * Ok we have the key of the file extent we want to fix, now we can cow
8309          * down to the thing and fix it.
8310          */
8311         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8312         if (ret < 0) {
8313                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8314                         key.objectid, key.type, key.offset, ret);
8315                 goto out;
8316         }
8317         if (ret > 0) {
8318                 fprintf(stderr, "Well that's odd, we just found this key "
8319                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8320                         key.offset);
8321                 ret = -EINVAL;
8322                 goto out;
8323         }
8324         leaf = path->nodes[0];
8325         fi = btrfs_item_ptr(leaf, path->slots[0],
8326                             struct btrfs_file_extent_item);
8327
8328         if (btrfs_file_extent_compression(leaf, fi) &&
8329             dback->disk_bytenr != entry->bytenr) {
8330                 fprintf(stderr, "Ref doesn't match the record start and is "
8331                         "compressed, please take a btrfs-image of this file "
8332                         "system and send it to a btrfs developer so they can "
8333                         "complete this functionality for bytenr %Lu\n",
8334                         dback->disk_bytenr);
8335                 ret = -EINVAL;
8336                 goto out;
8337         }
8338
8339         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8340                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8341         } else if (dback->disk_bytenr > entry->bytenr) {
8342                 u64 off_diff, offset;
8343
8344                 off_diff = dback->disk_bytenr - entry->bytenr;
8345                 offset = btrfs_file_extent_offset(leaf, fi);
8346                 if (dback->disk_bytenr + offset +
8347                     btrfs_file_extent_num_bytes(leaf, fi) >
8348                     entry->bytenr + entry->bytes) {
8349                         fprintf(stderr, "Ref is past the entry end, please "
8350                                 "take a btrfs-image of this file system and "
8351                                 "send it to a btrfs developer, ref %Lu\n",
8352                                 dback->disk_bytenr);
8353                         ret = -EINVAL;
8354                         goto out;
8355                 }
8356                 offset += off_diff;
8357                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8358                 btrfs_set_file_extent_offset(leaf, fi, offset);
8359         } else if (dback->disk_bytenr < entry->bytenr) {
8360                 u64 offset;
8361
8362                 offset = btrfs_file_extent_offset(leaf, fi);
8363                 if (dback->disk_bytenr + offset < entry->bytenr) {
8364                         fprintf(stderr, "Ref is before the entry start, please"
8365                                 " take a btrfs-image of this file system and "
8366                                 "send it to a btrfs developer, ref %Lu\n",
8367                                 dback->disk_bytenr);
8368                         ret = -EINVAL;
8369                         goto out;
8370                 }
8371
8372                 offset += dback->disk_bytenr;
8373                 offset -= entry->bytenr;
8374                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8375                 btrfs_set_file_extent_offset(leaf, fi, offset);
8376         }
8377
8378         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8379
8380         /*
8381          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8382          * only do this if we aren't using compression, otherwise it's a
8383          * trickier case.
8384          */
8385         if (!btrfs_file_extent_compression(leaf, fi))
8386                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8387         else
8388                 printf("ram bytes may be wrong?\n");
8389         btrfs_mark_buffer_dirty(leaf);
8390 out:
8391         err = btrfs_commit_transaction(trans, root);
8392         btrfs_release_path(path);
8393         return ret ? ret : err;
8394 }
8395
8396 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8397                            struct extent_record *rec)
8398 {
8399         struct extent_backref *back;
8400         struct data_backref *dback;
8401         struct extent_entry *entry, *best = NULL;
8402         LIST_HEAD(entries);
8403         int nr_entries = 0;
8404         int broken_entries = 0;
8405         int ret = 0;
8406         short mismatch = 0;
8407
8408         /*
8409          * Metadata is easy and the backrefs should always agree on bytenr and
8410          * size, if not we've got bigger issues.
8411          */
8412         if (rec->metadata)
8413                 return 0;
8414
8415         list_for_each_entry(back, &rec->backrefs, list) {
8416                 if (back->full_backref || !back->is_data)
8417                         continue;
8418
8419                 dback = to_data_backref(back);
8420
8421                 /*
8422                  * We only pay attention to backrefs that we found a real
8423                  * backref for.
8424                  */
8425                 if (dback->found_ref == 0)
8426                         continue;
8427
8428                 /*
8429                  * For now we only catch when the bytes don't match, not the
8430                  * bytenr.  We can easily do this at the same time, but I want
8431                  * to have a fs image to test on before we just add repair
8432                  * functionality willy-nilly so we know we won't screw up the
8433                  * repair.
8434                  */
8435
8436                 entry = find_entry(&entries, dback->disk_bytenr,
8437                                    dback->bytes);
8438                 if (!entry) {
8439                         entry = malloc(sizeof(struct extent_entry));
8440                         if (!entry) {
8441                                 ret = -ENOMEM;
8442                                 goto out;
8443                         }
8444                         memset(entry, 0, sizeof(*entry));
8445                         entry->bytenr = dback->disk_bytenr;
8446                         entry->bytes = dback->bytes;
8447                         list_add_tail(&entry->list, &entries);
8448                         nr_entries++;
8449                 }
8450
8451                 /*
8452                  * If we only have on entry we may think the entries agree when
8453                  * in reality they don't so we have to do some extra checking.
8454                  */
8455                 if (dback->disk_bytenr != rec->start ||
8456                     dback->bytes != rec->nr || back->broken)
8457                         mismatch = 1;
8458
8459                 if (back->broken) {
8460                         entry->broken++;
8461                         broken_entries++;
8462                 }
8463
8464                 entry->count++;
8465         }
8466
8467         /* Yay all the backrefs agree, carry on good sir */
8468         if (nr_entries <= 1 && !mismatch)
8469                 goto out;
8470
8471         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8472                 "%Lu\n", rec->start);
8473
8474         /*
8475          * First we want to see if the backrefs can agree amongst themselves who
8476          * is right, so figure out which one of the entries has the highest
8477          * count.
8478          */
8479         best = find_most_right_entry(&entries);
8480
8481         /*
8482          * Ok so we may have an even split between what the backrefs think, so
8483          * this is where we use the extent ref to see what it thinks.
8484          */
8485         if (!best) {
8486                 entry = find_entry(&entries, rec->start, rec->nr);
8487                 if (!entry && (!broken_entries || !rec->found_rec)) {
8488                         fprintf(stderr, "Backrefs don't agree with each other "
8489                                 "and extent record doesn't agree with anybody,"
8490                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8491                                 rec->start, rec->nr);
8492                         ret = -EINVAL;
8493                         goto out;
8494                 } else if (!entry) {
8495                         /*
8496                          * Ok our backrefs were broken, we'll assume this is the
8497                          * correct value and add an entry for this range.
8498                          */
8499                         entry = malloc(sizeof(struct extent_entry));
8500                         if (!entry) {
8501                                 ret = -ENOMEM;
8502                                 goto out;
8503                         }
8504                         memset(entry, 0, sizeof(*entry));
8505                         entry->bytenr = rec->start;
8506                         entry->bytes = rec->nr;
8507                         list_add_tail(&entry->list, &entries);
8508                         nr_entries++;
8509                 }
8510                 entry->count++;
8511                 best = find_most_right_entry(&entries);
8512                 if (!best) {
8513                         fprintf(stderr, "Backrefs and extent record evenly "
8514                                 "split on who is right, this is going to "
8515                                 "require user input to fix bytenr %Lu bytes "
8516                                 "%Lu\n", rec->start, rec->nr);
8517                         ret = -EINVAL;
8518                         goto out;
8519                 }
8520         }
8521
8522         /*
8523          * I don't think this can happen currently as we'll abort() if we catch
8524          * this case higher up, but in case somebody removes that we still can't
8525          * deal with it properly here yet, so just bail out of that's the case.
8526          */
8527         if (best->bytenr != rec->start) {
8528                 fprintf(stderr, "Extent start and backref starts don't match, "
8529                         "please use btrfs-image on this file system and send "
8530                         "it to a btrfs developer so they can make fsck fix "
8531                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8532                         rec->start, rec->nr);
8533                 ret = -EINVAL;
8534                 goto out;
8535         }
8536
8537         /*
8538          * Ok great we all agreed on an extent record, let's go find the real
8539          * references and fix up the ones that don't match.
8540          */
8541         list_for_each_entry(back, &rec->backrefs, list) {
8542                 if (back->full_backref || !back->is_data)
8543                         continue;
8544
8545                 dback = to_data_backref(back);
8546
8547                 /*
8548                  * Still ignoring backrefs that don't have a real ref attached
8549                  * to them.
8550                  */
8551                 if (dback->found_ref == 0)
8552                         continue;
8553
8554                 if (dback->bytes == best->bytes &&
8555                     dback->disk_bytenr == best->bytenr)
8556                         continue;
8557
8558                 ret = repair_ref(info, path, dback, best);
8559                 if (ret)
8560                         goto out;
8561         }
8562
8563         /*
8564          * Ok we messed with the actual refs, which means we need to drop our
8565          * entire cache and go back and rescan.  I know this is a huge pain and
8566          * adds a lot of extra work, but it's the only way to be safe.  Once all
8567          * the backrefs agree we may not need to do anything to the extent
8568          * record itself.
8569          */
8570         ret = -EAGAIN;
8571 out:
8572         while (!list_empty(&entries)) {
8573                 entry = list_entry(entries.next, struct extent_entry, list);
8574                 list_del_init(&entry->list);
8575                 free(entry);
8576         }
8577         return ret;
8578 }
8579
8580 static int process_duplicates(struct btrfs_root *root,
8581                               struct cache_tree *extent_cache,
8582                               struct extent_record *rec)
8583 {
8584         struct extent_record *good, *tmp;
8585         struct cache_extent *cache;
8586         int ret;
8587
8588         /*
8589          * If we found a extent record for this extent then return, or if we
8590          * have more than one duplicate we are likely going to need to delete
8591          * something.
8592          */
8593         if (rec->found_rec || rec->num_duplicates > 1)
8594                 return 0;
8595
8596         /* Shouldn't happen but just in case */
8597         BUG_ON(!rec->num_duplicates);
8598
8599         /*
8600          * So this happens if we end up with a backref that doesn't match the
8601          * actual extent entry.  So either the backref is bad or the extent
8602          * entry is bad.  Either way we want to have the extent_record actually
8603          * reflect what we found in the extent_tree, so we need to take the
8604          * duplicate out and use that as the extent_record since the only way we
8605          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8606          */
8607         remove_cache_extent(extent_cache, &rec->cache);
8608
8609         good = to_extent_record(rec->dups.next);
8610         list_del_init(&good->list);
8611         INIT_LIST_HEAD(&good->backrefs);
8612         INIT_LIST_HEAD(&good->dups);
8613         good->cache.start = good->start;
8614         good->cache.size = good->nr;
8615         good->content_checked = 0;
8616         good->owner_ref_checked = 0;
8617         good->num_duplicates = 0;
8618         good->refs = rec->refs;
8619         list_splice_init(&rec->backrefs, &good->backrefs);
8620         while (1) {
8621                 cache = lookup_cache_extent(extent_cache, good->start,
8622                                             good->nr);
8623                 if (!cache)
8624                         break;
8625                 tmp = container_of(cache, struct extent_record, cache);
8626
8627                 /*
8628                  * If we find another overlapping extent and it's found_rec is
8629                  * set then it's a duplicate and we need to try and delete
8630                  * something.
8631                  */
8632                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8633                         if (list_empty(&good->list))
8634                                 list_add_tail(&good->list,
8635                                               &duplicate_extents);
8636                         good->num_duplicates += tmp->num_duplicates + 1;
8637                         list_splice_init(&tmp->dups, &good->dups);
8638                         list_del_init(&tmp->list);
8639                         list_add_tail(&tmp->list, &good->dups);
8640                         remove_cache_extent(extent_cache, &tmp->cache);
8641                         continue;
8642                 }
8643
8644                 /*
8645                  * Ok we have another non extent item backed extent rec, so lets
8646                  * just add it to this extent and carry on like we did above.
8647                  */
8648                 good->refs += tmp->refs;
8649                 list_splice_init(&tmp->backrefs, &good->backrefs);
8650                 remove_cache_extent(extent_cache, &tmp->cache);
8651                 free(tmp);
8652         }
8653         ret = insert_cache_extent(extent_cache, &good->cache);
8654         BUG_ON(ret);
8655         free(rec);
8656         return good->num_duplicates ? 0 : 1;
8657 }
8658
8659 static int delete_duplicate_records(struct btrfs_root *root,
8660                                     struct extent_record *rec)
8661 {
8662         struct btrfs_trans_handle *trans;
8663         LIST_HEAD(delete_list);
8664         struct btrfs_path path;
8665         struct extent_record *tmp, *good, *n;
8666         int nr_del = 0;
8667         int ret = 0, err;
8668         struct btrfs_key key;
8669
8670         btrfs_init_path(&path);
8671
8672         good = rec;
8673         /* Find the record that covers all of the duplicates. */
8674         list_for_each_entry(tmp, &rec->dups, list) {
8675                 if (good->start < tmp->start)
8676                         continue;
8677                 if (good->nr > tmp->nr)
8678                         continue;
8679
8680                 if (tmp->start + tmp->nr < good->start + good->nr) {
8681                         fprintf(stderr, "Ok we have overlapping extents that "
8682                                 "aren't completely covered by each other, this "
8683                                 "is going to require more careful thought.  "
8684                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8685                                 tmp->start, tmp->nr, good->start, good->nr);
8686                         abort();
8687                 }
8688                 good = tmp;
8689         }
8690
8691         if (good != rec)
8692                 list_add_tail(&rec->list, &delete_list);
8693
8694         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8695                 if (tmp == good)
8696                         continue;
8697                 list_move_tail(&tmp->list, &delete_list);
8698         }
8699
8700         root = root->fs_info->extent_root;
8701         trans = btrfs_start_transaction(root, 1);
8702         if (IS_ERR(trans)) {
8703                 ret = PTR_ERR(trans);
8704                 goto out;
8705         }
8706
8707         list_for_each_entry(tmp, &delete_list, list) {
8708                 if (tmp->found_rec == 0)
8709                         continue;
8710                 key.objectid = tmp->start;
8711                 key.type = BTRFS_EXTENT_ITEM_KEY;
8712                 key.offset = tmp->nr;
8713
8714                 /* Shouldn't happen but just in case */
8715                 if (tmp->metadata) {
8716                         fprintf(stderr, "Well this shouldn't happen, extent "
8717                                 "record overlaps but is metadata? "
8718                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8719                         abort();
8720                 }
8721
8722                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8723                 if (ret) {
8724                         if (ret > 0)
8725                                 ret = -EINVAL;
8726                         break;
8727                 }
8728                 ret = btrfs_del_item(trans, root, &path);
8729                 if (ret)
8730                         break;
8731                 btrfs_release_path(&path);
8732                 nr_del++;
8733         }
8734         err = btrfs_commit_transaction(trans, root);
8735         if (err && !ret)
8736                 ret = err;
8737 out:
8738         while (!list_empty(&delete_list)) {
8739                 tmp = to_extent_record(delete_list.next);
8740                 list_del_init(&tmp->list);
8741                 if (tmp == rec)
8742                         continue;
8743                 free(tmp);
8744         }
8745
8746         while (!list_empty(&rec->dups)) {
8747                 tmp = to_extent_record(rec->dups.next);
8748                 list_del_init(&tmp->list);
8749                 free(tmp);
8750         }
8751
8752         btrfs_release_path(&path);
8753
8754         if (!ret && !nr_del)
8755                 rec->num_duplicates = 0;
8756
8757         return ret ? ret : nr_del;
8758 }
8759
8760 static int find_possible_backrefs(struct btrfs_fs_info *info,
8761                                   struct btrfs_path *path,
8762                                   struct cache_tree *extent_cache,
8763                                   struct extent_record *rec)
8764 {
8765         struct btrfs_root *root;
8766         struct extent_backref *back;
8767         struct data_backref *dback;
8768         struct cache_extent *cache;
8769         struct btrfs_file_extent_item *fi;
8770         struct btrfs_key key;
8771         u64 bytenr, bytes;
8772         int ret;
8773
8774         list_for_each_entry(back, &rec->backrefs, list) {
8775                 /* Don't care about full backrefs (poor unloved backrefs) */
8776                 if (back->full_backref || !back->is_data)
8777                         continue;
8778
8779                 dback = to_data_backref(back);
8780
8781                 /* We found this one, we don't need to do a lookup */
8782                 if (dback->found_ref)
8783                         continue;
8784
8785                 key.objectid = dback->root;
8786                 key.type = BTRFS_ROOT_ITEM_KEY;
8787                 key.offset = (u64)-1;
8788
8789                 root = btrfs_read_fs_root(info, &key);
8790
8791                 /* No root, definitely a bad ref, skip */
8792                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8793                         continue;
8794                 /* Other err, exit */
8795                 if (IS_ERR(root))
8796                         return PTR_ERR(root);
8797
8798                 key.objectid = dback->owner;
8799                 key.type = BTRFS_EXTENT_DATA_KEY;
8800                 key.offset = dback->offset;
8801                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8802                 if (ret) {
8803                         btrfs_release_path(path);
8804                         if (ret < 0)
8805                                 return ret;
8806                         /* Didn't find it, we can carry on */
8807                         ret = 0;
8808                         continue;
8809                 }
8810
8811                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8812                                     struct btrfs_file_extent_item);
8813                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8814                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8815                 btrfs_release_path(path);
8816                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8817                 if (cache) {
8818                         struct extent_record *tmp;
8819                         tmp = container_of(cache, struct extent_record, cache);
8820
8821                         /*
8822                          * If we found an extent record for the bytenr for this
8823                          * particular backref then we can't add it to our
8824                          * current extent record.  We only want to add backrefs
8825                          * that don't have a corresponding extent item in the
8826                          * extent tree since they likely belong to this record
8827                          * and we need to fix it if it doesn't match bytenrs.
8828                          */
8829                         if  (tmp->found_rec)
8830                                 continue;
8831                 }
8832
8833                 dback->found_ref += 1;
8834                 dback->disk_bytenr = bytenr;
8835                 dback->bytes = bytes;
8836
8837                 /*
8838                  * Set this so the verify backref code knows not to trust the
8839                  * values in this backref.
8840                  */
8841                 back->broken = 1;
8842         }
8843
8844         return 0;
8845 }
8846
8847 /*
8848  * Record orphan data ref into corresponding root.
8849  *
8850  * Return 0 if the extent item contains data ref and recorded.
8851  * Return 1 if the extent item contains no useful data ref
8852  *   On that case, it may contains only shared_dataref or metadata backref
8853  *   or the file extent exists(this should be handled by the extent bytenr
8854  *   recovery routine)
8855  * Return <0 if something goes wrong.
8856  */
8857 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8858                                       struct extent_record *rec)
8859 {
8860         struct btrfs_key key;
8861         struct btrfs_root *dest_root;
8862         struct extent_backref *back;
8863         struct data_backref *dback;
8864         struct orphan_data_extent *orphan;
8865         struct btrfs_path path;
8866         int recorded_data_ref = 0;
8867         int ret = 0;
8868
8869         if (rec->metadata)
8870                 return 1;
8871         btrfs_init_path(&path);
8872         list_for_each_entry(back, &rec->backrefs, list) {
8873                 if (back->full_backref || !back->is_data ||
8874                     !back->found_extent_tree)
8875                         continue;
8876                 dback = to_data_backref(back);
8877                 if (dback->found_ref)
8878                         continue;
8879                 key.objectid = dback->root;
8880                 key.type = BTRFS_ROOT_ITEM_KEY;
8881                 key.offset = (u64)-1;
8882
8883                 dest_root = btrfs_read_fs_root(fs_info, &key);
8884
8885                 /* For non-exist root we just skip it */
8886                 if (IS_ERR(dest_root) || !dest_root)
8887                         continue;
8888
8889                 key.objectid = dback->owner;
8890                 key.type = BTRFS_EXTENT_DATA_KEY;
8891                 key.offset = dback->offset;
8892
8893                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8894                 btrfs_release_path(&path);
8895                 /*
8896                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8897                  * we need to record it for inode/file extent rebuild.
8898                  * For ret > 0, we record it only for file extent rebuild.
8899                  * For ret == 0, the file extent exists but only bytenr
8900                  * mismatch, let the original bytenr fix routine to handle,
8901                  * don't record it.
8902                  */
8903                 if (ret == 0)
8904                         continue;
8905                 ret = 0;
8906                 orphan = malloc(sizeof(*orphan));
8907                 if (!orphan) {
8908                         ret = -ENOMEM;
8909                         goto out;
8910                 }
8911                 INIT_LIST_HEAD(&orphan->list);
8912                 orphan->root = dback->root;
8913                 orphan->objectid = dback->owner;
8914                 orphan->offset = dback->offset;
8915                 orphan->disk_bytenr = rec->cache.start;
8916                 orphan->disk_len = rec->cache.size;
8917                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8918                 recorded_data_ref = 1;
8919         }
8920 out:
8921         btrfs_release_path(&path);
8922         if (!ret)
8923                 return !recorded_data_ref;
8924         else
8925                 return ret;
8926 }
8927
8928 /*
8929  * when an incorrect extent item is found, this will delete
8930  * all of the existing entries for it and recreate them
8931  * based on what the tree scan found.
8932  */
8933 static int fixup_extent_refs(struct btrfs_fs_info *info,
8934                              struct cache_tree *extent_cache,
8935                              struct extent_record *rec)
8936 {
8937         struct btrfs_trans_handle *trans = NULL;
8938         int ret;
8939         struct btrfs_path path;
8940         struct list_head *cur = rec->backrefs.next;
8941         struct cache_extent *cache;
8942         struct extent_backref *back;
8943         int allocated = 0;
8944         u64 flags = 0;
8945
8946         if (rec->flag_block_full_backref)
8947                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8948
8949         btrfs_init_path(&path);
8950         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8951                 /*
8952                  * Sometimes the backrefs themselves are so broken they don't
8953                  * get attached to any meaningful rec, so first go back and
8954                  * check any of our backrefs that we couldn't find and throw
8955                  * them into the list if we find the backref so that
8956                  * verify_backrefs can figure out what to do.
8957                  */
8958                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8959                 if (ret < 0)
8960                         goto out;
8961         }
8962
8963         /* step one, make sure all of the backrefs agree */
8964         ret = verify_backrefs(info, &path, rec);
8965         if (ret < 0)
8966                 goto out;
8967
8968         trans = btrfs_start_transaction(info->extent_root, 1);
8969         if (IS_ERR(trans)) {
8970                 ret = PTR_ERR(trans);
8971                 goto out;
8972         }
8973
8974         /* step two, delete all the existing records */
8975         ret = delete_extent_records(trans, info->extent_root, &path,
8976                                     rec->start);
8977
8978         if (ret < 0)
8979                 goto out;
8980
8981         /* was this block corrupt?  If so, don't add references to it */
8982         cache = lookup_cache_extent(info->corrupt_blocks,
8983                                     rec->start, rec->max_size);
8984         if (cache) {
8985                 ret = 0;
8986                 goto out;
8987         }
8988
8989         /* step three, recreate all the refs we did find */
8990         while(cur != &rec->backrefs) {
8991                 back = to_extent_backref(cur);
8992                 cur = cur->next;
8993
8994                 /*
8995                  * if we didn't find any references, don't create a
8996                  * new extent record
8997                  */
8998                 if (!back->found_ref)
8999                         continue;
9000
9001                 rec->bad_full_backref = 0;
9002                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9003                 allocated = 1;
9004
9005                 if (ret)
9006                         goto out;
9007         }
9008 out:
9009         if (trans) {
9010                 int err = btrfs_commit_transaction(trans, info->extent_root);
9011                 if (!ret)
9012                         ret = err;
9013         }
9014
9015         if (!ret)
9016                 fprintf(stderr, "Repaired extent references for %llu\n",
9017                                 (unsigned long long)rec->start);
9018
9019         btrfs_release_path(&path);
9020         return ret;
9021 }
9022
9023 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9024                               struct extent_record *rec)
9025 {
9026         struct btrfs_trans_handle *trans;
9027         struct btrfs_root *root = fs_info->extent_root;
9028         struct btrfs_path path;
9029         struct btrfs_extent_item *ei;
9030         struct btrfs_key key;
9031         u64 flags;
9032         int ret = 0;
9033
9034         key.objectid = rec->start;
9035         if (rec->metadata) {
9036                 key.type = BTRFS_METADATA_ITEM_KEY;
9037                 key.offset = rec->info_level;
9038         } else {
9039                 key.type = BTRFS_EXTENT_ITEM_KEY;
9040                 key.offset = rec->max_size;
9041         }
9042
9043         trans = btrfs_start_transaction(root, 0);
9044         if (IS_ERR(trans))
9045                 return PTR_ERR(trans);
9046
9047         btrfs_init_path(&path);
9048         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9049         if (ret < 0) {
9050                 btrfs_release_path(&path);
9051                 btrfs_commit_transaction(trans, root);
9052                 return ret;
9053         } else if (ret) {
9054                 fprintf(stderr, "Didn't find extent for %llu\n",
9055                         (unsigned long long)rec->start);
9056                 btrfs_release_path(&path);
9057                 btrfs_commit_transaction(trans, root);
9058                 return -ENOENT;
9059         }
9060
9061         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9062                             struct btrfs_extent_item);
9063         flags = btrfs_extent_flags(path.nodes[0], ei);
9064         if (rec->flag_block_full_backref) {
9065                 fprintf(stderr, "setting full backref on %llu\n",
9066                         (unsigned long long)key.objectid);
9067                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9068         } else {
9069                 fprintf(stderr, "clearing full backref on %llu\n",
9070                         (unsigned long long)key.objectid);
9071                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9072         }
9073         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9074         btrfs_mark_buffer_dirty(path.nodes[0]);
9075         btrfs_release_path(&path);
9076         ret = btrfs_commit_transaction(trans, root);
9077         if (!ret)
9078                 fprintf(stderr, "Repaired extent flags for %llu\n",
9079                                 (unsigned long long)rec->start);
9080
9081         return ret;
9082 }
9083
9084 /* right now we only prune from the extent allocation tree */
9085 static int prune_one_block(struct btrfs_trans_handle *trans,
9086                            struct btrfs_fs_info *info,
9087                            struct btrfs_corrupt_block *corrupt)
9088 {
9089         int ret;
9090         struct btrfs_path path;
9091         struct extent_buffer *eb;
9092         u64 found;
9093         int slot;
9094         int nritems;
9095         int level = corrupt->level + 1;
9096
9097         btrfs_init_path(&path);
9098 again:
9099         /* we want to stop at the parent to our busted block */
9100         path.lowest_level = level;
9101
9102         ret = btrfs_search_slot(trans, info->extent_root,
9103                                 &corrupt->key, &path, -1, 1);
9104
9105         if (ret < 0)
9106                 goto out;
9107
9108         eb = path.nodes[level];
9109         if (!eb) {
9110                 ret = -ENOENT;
9111                 goto out;
9112         }
9113
9114         /*
9115          * hopefully the search gave us the block we want to prune,
9116          * lets try that first
9117          */
9118         slot = path.slots[level];
9119         found =  btrfs_node_blockptr(eb, slot);
9120         if (found == corrupt->cache.start)
9121                 goto del_ptr;
9122
9123         nritems = btrfs_header_nritems(eb);
9124
9125         /* the search failed, lets scan this node and hope we find it */
9126         for (slot = 0; slot < nritems; slot++) {
9127                 found =  btrfs_node_blockptr(eb, slot);
9128                 if (found == corrupt->cache.start)
9129                         goto del_ptr;
9130         }
9131         /*
9132          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9133          * to this block
9134          */
9135         if (eb == info->extent_root->node) {
9136                 ret = -ENOENT;
9137                 goto out;
9138         } else {
9139                 level++;
9140                 btrfs_release_path(&path);
9141                 goto again;
9142         }
9143
9144 del_ptr:
9145         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9146         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9147
9148 out:
9149         btrfs_release_path(&path);
9150         return ret;
9151 }
9152
9153 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9154 {
9155         struct btrfs_trans_handle *trans = NULL;
9156         struct cache_extent *cache;
9157         struct btrfs_corrupt_block *corrupt;
9158
9159         while (1) {
9160                 cache = search_cache_extent(info->corrupt_blocks, 0);
9161                 if (!cache)
9162                         break;
9163                 if (!trans) {
9164                         trans = btrfs_start_transaction(info->extent_root, 1);
9165                         if (IS_ERR(trans))
9166                                 return PTR_ERR(trans);
9167                 }
9168                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9169                 prune_one_block(trans, info, corrupt);
9170                 remove_cache_extent(info->corrupt_blocks, cache);
9171         }
9172         if (trans)
9173                 return btrfs_commit_transaction(trans, info->extent_root);
9174         return 0;
9175 }
9176
9177 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9178 {
9179         struct btrfs_block_group_cache *cache;
9180         u64 start, end;
9181         int ret;
9182
9183         while (1) {
9184                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9185                                             &start, &end, EXTENT_DIRTY);
9186                 if (ret)
9187                         break;
9188                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9189         }
9190
9191         start = 0;
9192         while (1) {
9193                 cache = btrfs_lookup_first_block_group(fs_info, start);
9194                 if (!cache)
9195                         break;
9196                 if (cache->cached)
9197                         cache->cached = 0;
9198                 start = cache->key.objectid + cache->key.offset;
9199         }
9200 }
9201
9202 static int check_extent_refs(struct btrfs_root *root,
9203                              struct cache_tree *extent_cache)
9204 {
9205         struct extent_record *rec;
9206         struct cache_extent *cache;
9207         int ret = 0;
9208         int had_dups = 0;
9209
9210         if (repair) {
9211                 /*
9212                  * if we're doing a repair, we have to make sure
9213                  * we don't allocate from the problem extents.
9214                  * In the worst case, this will be all the
9215                  * extents in the FS
9216                  */
9217                 cache = search_cache_extent(extent_cache, 0);
9218                 while(cache) {
9219                         rec = container_of(cache, struct extent_record, cache);
9220                         set_extent_dirty(root->fs_info->excluded_extents,
9221                                          rec->start,
9222                                          rec->start + rec->max_size - 1);
9223                         cache = next_cache_extent(cache);
9224                 }
9225
9226                 /* pin down all the corrupted blocks too */
9227                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9228                 while(cache) {
9229                         set_extent_dirty(root->fs_info->excluded_extents,
9230                                          cache->start,
9231                                          cache->start + cache->size - 1);
9232                         cache = next_cache_extent(cache);
9233                 }
9234                 prune_corrupt_blocks(root->fs_info);
9235                 reset_cached_block_groups(root->fs_info);
9236         }
9237
9238         reset_cached_block_groups(root->fs_info);
9239
9240         /*
9241          * We need to delete any duplicate entries we find first otherwise we
9242          * could mess up the extent tree when we have backrefs that actually
9243          * belong to a different extent item and not the weird duplicate one.
9244          */
9245         while (repair && !list_empty(&duplicate_extents)) {
9246                 rec = to_extent_record(duplicate_extents.next);
9247                 list_del_init(&rec->list);
9248
9249                 /* Sometimes we can find a backref before we find an actual
9250                  * extent, so we need to process it a little bit to see if there
9251                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9252                  * if this is a backref screwup.  If we need to delete stuff
9253                  * process_duplicates() will return 0, otherwise it will return
9254                  * 1 and we
9255                  */
9256                 if (process_duplicates(root, extent_cache, rec))
9257                         continue;
9258                 ret = delete_duplicate_records(root, rec);
9259                 if (ret < 0)
9260                         return ret;
9261                 /*
9262                  * delete_duplicate_records will return the number of entries
9263                  * deleted, so if it's greater than 0 then we know we actually
9264                  * did something and we need to remove.
9265                  */
9266                 if (ret)
9267                         had_dups = 1;
9268         }
9269
9270         if (had_dups)
9271                 return -EAGAIN;
9272
9273         while(1) {
9274                 int cur_err = 0;
9275                 int fix = 0;
9276
9277                 cache = search_cache_extent(extent_cache, 0);
9278                 if (!cache)
9279                         break;
9280                 rec = container_of(cache, struct extent_record, cache);
9281                 if (rec->num_duplicates) {
9282                         fprintf(stderr, "extent item %llu has multiple extent "
9283                                 "items\n", (unsigned long long)rec->start);
9284                         cur_err = 1;
9285                 }
9286
9287                 if (rec->refs != rec->extent_item_refs) {
9288                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9289                                 (unsigned long long)rec->start,
9290                                 (unsigned long long)rec->nr);
9291                         fprintf(stderr, "extent item %llu, found %llu\n",
9292                                 (unsigned long long)rec->extent_item_refs,
9293                                 (unsigned long long)rec->refs);
9294                         ret = record_orphan_data_extents(root->fs_info, rec);
9295                         if (ret < 0)
9296                                 goto repair_abort;
9297                         fix = ret;
9298                         cur_err = 1;
9299                 }
9300                 if (all_backpointers_checked(rec, 1)) {
9301                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9302                                 (unsigned long long)rec->start,
9303                                 (unsigned long long)rec->nr);
9304                         fix = 1;
9305                         cur_err = 1;
9306                 }
9307                 if (!rec->owner_ref_checked) {
9308                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9309                                 (unsigned long long)rec->start,
9310                                 (unsigned long long)rec->nr);
9311                         fix = 1;
9312                         cur_err = 1;
9313                 }
9314
9315                 if (repair && fix) {
9316                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9317                         if (ret)
9318                                 goto repair_abort;
9319                 }
9320
9321
9322                 if (rec->bad_full_backref) {
9323                         fprintf(stderr, "bad full backref, on [%llu]\n",
9324                                 (unsigned long long)rec->start);
9325                         if (repair) {
9326                                 ret = fixup_extent_flags(root->fs_info, rec);
9327                                 if (ret)
9328                                         goto repair_abort;
9329                                 fix = 1;
9330                         }
9331                         cur_err = 1;
9332                 }
9333                 /*
9334                  * Although it's not a extent ref's problem, we reuse this
9335                  * routine for error reporting.
9336                  * No repair function yet.
9337                  */
9338                 if (rec->crossing_stripes) {
9339                         fprintf(stderr,
9340                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9341                                 rec->start, rec->start + rec->max_size);
9342                         cur_err = 1;
9343                 }
9344
9345                 if (rec->wrong_chunk_type) {
9346                         fprintf(stderr,
9347                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9348                                 rec->start, rec->start + rec->max_size);
9349                         cur_err = 1;
9350                 }
9351
9352                 remove_cache_extent(extent_cache, cache);
9353                 free_all_extent_backrefs(rec);
9354                 if (!init_extent_tree && repair && (!cur_err || fix))
9355                         clear_extent_dirty(root->fs_info->excluded_extents,
9356                                            rec->start,
9357                                            rec->start + rec->max_size - 1);
9358                 free(rec);
9359         }
9360 repair_abort:
9361         if (repair) {
9362                 if (ret && ret != -EAGAIN) {
9363                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9364                         exit(1);
9365                 } else if (!ret) {
9366                         struct btrfs_trans_handle *trans;
9367
9368                         root = root->fs_info->extent_root;
9369                         trans = btrfs_start_transaction(root, 1);
9370                         if (IS_ERR(trans)) {
9371                                 ret = PTR_ERR(trans);
9372                                 goto repair_abort;
9373                         }
9374
9375                         btrfs_fix_block_accounting(trans, root);
9376                         ret = btrfs_commit_transaction(trans, root);
9377                         if (ret)
9378                                 goto repair_abort;
9379                 }
9380                 return ret;
9381         }
9382         return 0;
9383 }
9384
9385 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9386 {
9387         u64 stripe_size;
9388
9389         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9390                 stripe_size = length;
9391                 stripe_size /= num_stripes;
9392         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9393                 stripe_size = length * 2;
9394                 stripe_size /= num_stripes;
9395         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9396                 stripe_size = length;
9397                 stripe_size /= (num_stripes - 1);
9398         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9399                 stripe_size = length;
9400                 stripe_size /= (num_stripes - 2);
9401         } else {
9402                 stripe_size = length;
9403         }
9404         return stripe_size;
9405 }
9406
9407 /*
9408  * Check the chunk with its block group/dev list ref:
9409  * Return 0 if all refs seems valid.
9410  * Return 1 if part of refs seems valid, need later check for rebuild ref
9411  * like missing block group and needs to search extent tree to rebuild them.
9412  * Return -1 if essential refs are missing and unable to rebuild.
9413  */
9414 static int check_chunk_refs(struct chunk_record *chunk_rec,
9415                             struct block_group_tree *block_group_cache,
9416                             struct device_extent_tree *dev_extent_cache,
9417                             int silent)
9418 {
9419         struct cache_extent *block_group_item;
9420         struct block_group_record *block_group_rec;
9421         struct cache_extent *dev_extent_item;
9422         struct device_extent_record *dev_extent_rec;
9423         u64 devid;
9424         u64 offset;
9425         u64 length;
9426         int metadump_v2 = 0;
9427         int i;
9428         int ret = 0;
9429
9430         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9431                                                chunk_rec->offset,
9432                                                chunk_rec->length);
9433         if (block_group_item) {
9434                 block_group_rec = container_of(block_group_item,
9435                                                struct block_group_record,
9436                                                cache);
9437                 if (chunk_rec->length != block_group_rec->offset ||
9438                     chunk_rec->offset != block_group_rec->objectid ||
9439                     (!metadump_v2 &&
9440                      chunk_rec->type_flags != block_group_rec->flags)) {
9441                         if (!silent)
9442                                 fprintf(stderr,
9443                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9444                                         chunk_rec->objectid,
9445                                         chunk_rec->type,
9446                                         chunk_rec->offset,
9447                                         chunk_rec->length,
9448                                         chunk_rec->offset,
9449                                         chunk_rec->type_flags,
9450                                         block_group_rec->objectid,
9451                                         block_group_rec->type,
9452                                         block_group_rec->offset,
9453                                         block_group_rec->offset,
9454                                         block_group_rec->objectid,
9455                                         block_group_rec->flags);
9456                         ret = -1;
9457                 } else {
9458                         list_del_init(&block_group_rec->list);
9459                         chunk_rec->bg_rec = block_group_rec;
9460                 }
9461         } else {
9462                 if (!silent)
9463                         fprintf(stderr,
9464                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9465                                 chunk_rec->objectid,
9466                                 chunk_rec->type,
9467                                 chunk_rec->offset,
9468                                 chunk_rec->length,
9469                                 chunk_rec->offset,
9470                                 chunk_rec->type_flags);
9471                 ret = 1;
9472         }
9473
9474         if (metadump_v2)
9475                 return ret;
9476
9477         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9478                                     chunk_rec->num_stripes);
9479         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9480                 devid = chunk_rec->stripes[i].devid;
9481                 offset = chunk_rec->stripes[i].offset;
9482                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9483                                                        devid, offset, length);
9484                 if (dev_extent_item) {
9485                         dev_extent_rec = container_of(dev_extent_item,
9486                                                 struct device_extent_record,
9487                                                 cache);
9488                         if (dev_extent_rec->objectid != devid ||
9489                             dev_extent_rec->offset != offset ||
9490                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9491                             dev_extent_rec->length != length) {
9492                                 if (!silent)
9493                                         fprintf(stderr,
9494                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9495                                                 chunk_rec->objectid,
9496                                                 chunk_rec->type,
9497                                                 chunk_rec->offset,
9498                                                 chunk_rec->stripes[i].devid,
9499                                                 chunk_rec->stripes[i].offset,
9500                                                 dev_extent_rec->objectid,
9501                                                 dev_extent_rec->offset,
9502                                                 dev_extent_rec->length);
9503                                 ret = -1;
9504                         } else {
9505                                 list_move(&dev_extent_rec->chunk_list,
9506                                           &chunk_rec->dextents);
9507                         }
9508                 } else {
9509                         if (!silent)
9510                                 fprintf(stderr,
9511                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9512                                         chunk_rec->objectid,
9513                                         chunk_rec->type,
9514                                         chunk_rec->offset,
9515                                         chunk_rec->stripes[i].devid,
9516                                         chunk_rec->stripes[i].offset);
9517                         ret = -1;
9518                 }
9519         }
9520         return ret;
9521 }
9522
9523 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9524 int check_chunks(struct cache_tree *chunk_cache,
9525                  struct block_group_tree *block_group_cache,
9526                  struct device_extent_tree *dev_extent_cache,
9527                  struct list_head *good, struct list_head *bad,
9528                  struct list_head *rebuild, int silent)
9529 {
9530         struct cache_extent *chunk_item;
9531         struct chunk_record *chunk_rec;
9532         struct block_group_record *bg_rec;
9533         struct device_extent_record *dext_rec;
9534         int err;
9535         int ret = 0;
9536
9537         chunk_item = first_cache_extent(chunk_cache);
9538         while (chunk_item) {
9539                 chunk_rec = container_of(chunk_item, struct chunk_record,
9540                                          cache);
9541                 err = check_chunk_refs(chunk_rec, block_group_cache,
9542                                        dev_extent_cache, silent);
9543                 if (err < 0)
9544                         ret = err;
9545                 if (err == 0 && good)
9546                         list_add_tail(&chunk_rec->list, good);
9547                 if (err > 0 && rebuild)
9548                         list_add_tail(&chunk_rec->list, rebuild);
9549                 if (err < 0 && bad)
9550                         list_add_tail(&chunk_rec->list, bad);
9551                 chunk_item = next_cache_extent(chunk_item);
9552         }
9553
9554         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9555                 if (!silent)
9556                         fprintf(stderr,
9557                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9558                                 bg_rec->objectid,
9559                                 bg_rec->offset,
9560                                 bg_rec->flags);
9561                 if (!ret)
9562                         ret = 1;
9563         }
9564
9565         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9566                             chunk_list) {
9567                 if (!silent)
9568                         fprintf(stderr,
9569                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9570                                 dext_rec->objectid,
9571                                 dext_rec->offset,
9572                                 dext_rec->length);
9573                 if (!ret)
9574                         ret = 1;
9575         }
9576         return ret;
9577 }
9578
9579
9580 static int check_device_used(struct device_record *dev_rec,
9581                              struct device_extent_tree *dext_cache)
9582 {
9583         struct cache_extent *cache;
9584         struct device_extent_record *dev_extent_rec;
9585         u64 total_byte = 0;
9586
9587         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9588         while (cache) {
9589                 dev_extent_rec = container_of(cache,
9590                                               struct device_extent_record,
9591                                               cache);
9592                 if (dev_extent_rec->objectid != dev_rec->devid)
9593                         break;
9594
9595                 list_del_init(&dev_extent_rec->device_list);
9596                 total_byte += dev_extent_rec->length;
9597                 cache = next_cache_extent(cache);
9598         }
9599
9600         if (total_byte != dev_rec->byte_used) {
9601                 fprintf(stderr,
9602                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9603                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9604                         dev_rec->type, dev_rec->offset);
9605                 return -1;
9606         } else {
9607                 return 0;
9608         }
9609 }
9610
9611 /* check btrfs_dev_item -> btrfs_dev_extent */
9612 static int check_devices(struct rb_root *dev_cache,
9613                          struct device_extent_tree *dev_extent_cache)
9614 {
9615         struct rb_node *dev_node;
9616         struct device_record *dev_rec;
9617         struct device_extent_record *dext_rec;
9618         int err;
9619         int ret = 0;
9620
9621         dev_node = rb_first(dev_cache);
9622         while (dev_node) {
9623                 dev_rec = container_of(dev_node, struct device_record, node);
9624                 err = check_device_used(dev_rec, dev_extent_cache);
9625                 if (err)
9626                         ret = err;
9627
9628                 dev_node = rb_next(dev_node);
9629         }
9630         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9631                             device_list) {
9632                 fprintf(stderr,
9633                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9634                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9635                 if (!ret)
9636                         ret = 1;
9637         }
9638         return ret;
9639 }
9640
9641 static int add_root_item_to_list(struct list_head *head,
9642                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9643                                   u8 level, u8 drop_level,
9644                                   int level_size, struct btrfs_key *drop_key)
9645 {
9646
9647         struct root_item_record *ri_rec;
9648         ri_rec = malloc(sizeof(*ri_rec));
9649         if (!ri_rec)
9650                 return -ENOMEM;
9651         ri_rec->bytenr = bytenr;
9652         ri_rec->objectid = objectid;
9653         ri_rec->level = level;
9654         ri_rec->level_size = level_size;
9655         ri_rec->drop_level = drop_level;
9656         ri_rec->last_snapshot = last_snapshot;
9657         if (drop_key)
9658                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9659         list_add_tail(&ri_rec->list, head);
9660
9661         return 0;
9662 }
9663
9664 static void free_root_item_list(struct list_head *list)
9665 {
9666         struct root_item_record *ri_rec;
9667
9668         while (!list_empty(list)) {
9669                 ri_rec = list_first_entry(list, struct root_item_record,
9670                                           list);
9671                 list_del_init(&ri_rec->list);
9672                 free(ri_rec);
9673         }
9674 }
9675
9676 static int deal_root_from_list(struct list_head *list,
9677                                struct btrfs_root *root,
9678                                struct block_info *bits,
9679                                int bits_nr,
9680                                struct cache_tree *pending,
9681                                struct cache_tree *seen,
9682                                struct cache_tree *reada,
9683                                struct cache_tree *nodes,
9684                                struct cache_tree *extent_cache,
9685                                struct cache_tree *chunk_cache,
9686                                struct rb_root *dev_cache,
9687                                struct block_group_tree *block_group_cache,
9688                                struct device_extent_tree *dev_extent_cache)
9689 {
9690         int ret = 0;
9691         u64 last;
9692
9693         while (!list_empty(list)) {
9694                 struct root_item_record *rec;
9695                 struct extent_buffer *buf;
9696                 rec = list_entry(list->next,
9697                                  struct root_item_record, list);
9698                 last = 0;
9699                 buf = read_tree_block(root->fs_info->tree_root,
9700                                       rec->bytenr, rec->level_size, 0);
9701                 if (!extent_buffer_uptodate(buf)) {
9702                         free_extent_buffer(buf);
9703                         ret = -EIO;
9704                         break;
9705                 }
9706                 ret = add_root_to_pending(buf, extent_cache, pending,
9707                                     seen, nodes, rec->objectid);
9708                 if (ret < 0)
9709                         break;
9710                 /*
9711                  * To rebuild extent tree, we need deal with snapshot
9712                  * one by one, otherwise we deal with node firstly which
9713                  * can maximize readahead.
9714                  */
9715                 while (1) {
9716                         ret = run_next_block(root, bits, bits_nr, &last,
9717                                              pending, seen, reada, nodes,
9718                                              extent_cache, chunk_cache,
9719                                              dev_cache, block_group_cache,
9720                                              dev_extent_cache, rec);
9721                         if (ret != 0)
9722                                 break;
9723                 }
9724                 free_extent_buffer(buf);
9725                 list_del(&rec->list);
9726                 free(rec);
9727                 if (ret < 0)
9728                         break;
9729         }
9730         while (ret >= 0) {
9731                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9732                                      reada, nodes, extent_cache, chunk_cache,
9733                                      dev_cache, block_group_cache,
9734                                      dev_extent_cache, NULL);
9735                 if (ret != 0) {
9736                         if (ret > 0)
9737                                 ret = 0;
9738                         break;
9739                 }
9740         }
9741         return ret;
9742 }
9743
9744 static int check_chunks_and_extents(struct btrfs_root *root)
9745 {
9746         struct rb_root dev_cache;
9747         struct cache_tree chunk_cache;
9748         struct block_group_tree block_group_cache;
9749         struct device_extent_tree dev_extent_cache;
9750         struct cache_tree extent_cache;
9751         struct cache_tree seen;
9752         struct cache_tree pending;
9753         struct cache_tree reada;
9754         struct cache_tree nodes;
9755         struct extent_io_tree excluded_extents;
9756         struct cache_tree corrupt_blocks;
9757         struct btrfs_path path;
9758         struct btrfs_key key;
9759         struct btrfs_key found_key;
9760         int ret, err = 0;
9761         struct block_info *bits;
9762         int bits_nr;
9763         struct extent_buffer *leaf;
9764         int slot;
9765         struct btrfs_root_item ri;
9766         struct list_head dropping_trees;
9767         struct list_head normal_trees;
9768         struct btrfs_root *root1;
9769         u64 objectid;
9770         u32 level_size;
9771         u8 level;
9772
9773         dev_cache = RB_ROOT;
9774         cache_tree_init(&chunk_cache);
9775         block_group_tree_init(&block_group_cache);
9776         device_extent_tree_init(&dev_extent_cache);
9777
9778         cache_tree_init(&extent_cache);
9779         cache_tree_init(&seen);
9780         cache_tree_init(&pending);
9781         cache_tree_init(&nodes);
9782         cache_tree_init(&reada);
9783         cache_tree_init(&corrupt_blocks);
9784         extent_io_tree_init(&excluded_extents);
9785         INIT_LIST_HEAD(&dropping_trees);
9786         INIT_LIST_HEAD(&normal_trees);
9787
9788         if (repair) {
9789                 root->fs_info->excluded_extents = &excluded_extents;
9790                 root->fs_info->fsck_extent_cache = &extent_cache;
9791                 root->fs_info->free_extent_hook = free_extent_hook;
9792                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9793         }
9794
9795         bits_nr = 1024;
9796         bits = malloc(bits_nr * sizeof(struct block_info));
9797         if (!bits) {
9798                 perror("malloc");
9799                 exit(1);
9800         }
9801
9802         if (ctx.progress_enabled) {
9803                 ctx.tp = TASK_EXTENTS;
9804                 task_start(ctx.info);
9805         }
9806
9807 again:
9808         root1 = root->fs_info->tree_root;
9809         level = btrfs_header_level(root1->node);
9810         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9811                                     root1->node->start, 0, level, 0,
9812                                     root1->nodesize, NULL);
9813         if (ret < 0)
9814                 goto out;
9815         root1 = root->fs_info->chunk_root;
9816         level = btrfs_header_level(root1->node);
9817         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9818                                     root1->node->start, 0, level, 0,
9819                                     root1->nodesize, NULL);
9820         if (ret < 0)
9821                 goto out;
9822         btrfs_init_path(&path);
9823         key.offset = 0;
9824         key.objectid = 0;
9825         key.type = BTRFS_ROOT_ITEM_KEY;
9826         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9827                                         &key, &path, 0, 0);
9828         if (ret < 0)
9829                 goto out;
9830         while(1) {
9831                 leaf = path.nodes[0];
9832                 slot = path.slots[0];
9833                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9834                         ret = btrfs_next_leaf(root, &path);
9835                         if (ret != 0)
9836                                 break;
9837                         leaf = path.nodes[0];
9838                         slot = path.slots[0];
9839                 }
9840                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9841                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9842                         unsigned long offset;
9843                         u64 last_snapshot;
9844
9845                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9846                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9847                         last_snapshot = btrfs_root_last_snapshot(&ri);
9848                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9849                                 level = btrfs_root_level(&ri);
9850                                 level_size = root->nodesize;
9851                                 ret = add_root_item_to_list(&normal_trees,
9852                                                 found_key.objectid,
9853                                                 btrfs_root_bytenr(&ri),
9854                                                 last_snapshot, level,
9855                                                 0, level_size, NULL);
9856                                 if (ret < 0)
9857                                         goto out;
9858                         } else {
9859                                 level = btrfs_root_level(&ri);
9860                                 level_size = root->nodesize;
9861                                 objectid = found_key.objectid;
9862                                 btrfs_disk_key_to_cpu(&found_key,
9863                                                       &ri.drop_progress);
9864                                 ret = add_root_item_to_list(&dropping_trees,
9865                                                 objectid,
9866                                                 btrfs_root_bytenr(&ri),
9867                                                 last_snapshot, level,
9868                                                 ri.drop_level,
9869                                                 level_size, &found_key);
9870                                 if (ret < 0)
9871                                         goto out;
9872                         }
9873                 }
9874                 path.slots[0]++;
9875         }
9876         btrfs_release_path(&path);
9877
9878         /*
9879          * check_block can return -EAGAIN if it fixes something, please keep
9880          * this in mind when dealing with return values from these functions, if
9881          * we get -EAGAIN we want to fall through and restart the loop.
9882          */
9883         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9884                                   &seen, &reada, &nodes, &extent_cache,
9885                                   &chunk_cache, &dev_cache, &block_group_cache,
9886                                   &dev_extent_cache);
9887         if (ret < 0) {
9888                 if (ret == -EAGAIN)
9889                         goto loop;
9890                 goto out;
9891         }
9892         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9893                                   &pending, &seen, &reada, &nodes,
9894                                   &extent_cache, &chunk_cache, &dev_cache,
9895                                   &block_group_cache, &dev_extent_cache);
9896         if (ret < 0) {
9897                 if (ret == -EAGAIN)
9898                         goto loop;
9899                 goto out;
9900         }
9901
9902         ret = check_chunks(&chunk_cache, &block_group_cache,
9903                            &dev_extent_cache, NULL, NULL, NULL, 0);
9904         if (ret) {
9905                 if (ret == -EAGAIN)
9906                         goto loop;
9907                 err = ret;
9908         }
9909
9910         ret = check_extent_refs(root, &extent_cache);
9911         if (ret < 0) {
9912                 if (ret == -EAGAIN)
9913                         goto loop;
9914                 goto out;
9915         }
9916
9917         ret = check_devices(&dev_cache, &dev_extent_cache);
9918         if (ret && err)
9919                 ret = err;
9920
9921 out:
9922         task_stop(ctx.info);
9923         if (repair) {
9924                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9925                 extent_io_tree_cleanup(&excluded_extents);
9926                 root->fs_info->fsck_extent_cache = NULL;
9927                 root->fs_info->free_extent_hook = NULL;
9928                 root->fs_info->corrupt_blocks = NULL;
9929                 root->fs_info->excluded_extents = NULL;
9930         }
9931         free(bits);
9932         free_chunk_cache_tree(&chunk_cache);
9933         free_device_cache_tree(&dev_cache);
9934         free_block_group_tree(&block_group_cache);
9935         free_device_extent_tree(&dev_extent_cache);
9936         free_extent_cache_tree(&seen);
9937         free_extent_cache_tree(&pending);
9938         free_extent_cache_tree(&reada);
9939         free_extent_cache_tree(&nodes);
9940         return ret;
9941 loop:
9942         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9943         free_extent_cache_tree(&seen);
9944         free_extent_cache_tree(&pending);
9945         free_extent_cache_tree(&reada);
9946         free_extent_cache_tree(&nodes);
9947         free_chunk_cache_tree(&chunk_cache);
9948         free_block_group_tree(&block_group_cache);
9949         free_device_cache_tree(&dev_cache);
9950         free_device_extent_tree(&dev_extent_cache);
9951         free_extent_record_cache(root->fs_info, &extent_cache);
9952         free_root_item_list(&normal_trees);
9953         free_root_item_list(&dropping_trees);
9954         extent_io_tree_cleanup(&excluded_extents);
9955         goto again;
9956 }
9957
9958 /*
9959  * Check backrefs of a tree block given by @bytenr or @eb.
9960  *
9961  * @root:       the root containing the @bytenr or @eb
9962  * @eb:         tree block extent buffer, can be NULL
9963  * @bytenr:     bytenr of the tree block to search
9964  * @level:      tree level of the tree block
9965  * @owner:      owner of the tree block
9966  *
9967  * Return >0 for any error found and output error message
9968  * Return 0 for no error found
9969  */
9970 static int check_tree_block_ref(struct btrfs_root *root,
9971                                 struct extent_buffer *eb, u64 bytenr,
9972                                 int level, u64 owner)
9973 {
9974         struct btrfs_key key;
9975         struct btrfs_root *extent_root = root->fs_info->extent_root;
9976         struct btrfs_path path;
9977         struct btrfs_extent_item *ei;
9978         struct btrfs_extent_inline_ref *iref;
9979         struct extent_buffer *leaf;
9980         unsigned long end;
9981         unsigned long ptr;
9982         int slot;
9983         int skinny_level;
9984         int type;
9985         u32 nodesize = root->nodesize;
9986         u32 item_size;
9987         u64 offset;
9988         int tree_reloc_root = 0;
9989         int found_ref = 0;
9990         int err = 0;
9991         int ret;
9992
9993         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
9994             btrfs_header_bytenr(root->node) == bytenr)
9995                 tree_reloc_root = 1;
9996
9997         btrfs_init_path(&path);
9998         key.objectid = bytenr;
9999         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10000                 key.type = BTRFS_METADATA_ITEM_KEY;
10001         else
10002                 key.type = BTRFS_EXTENT_ITEM_KEY;
10003         key.offset = (u64)-1;
10004
10005         /* Search for the backref in extent tree */
10006         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10007         if (ret < 0) {
10008                 err |= BACKREF_MISSING;
10009                 goto out;
10010         }
10011         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10012         if (ret) {
10013                 err |= BACKREF_MISSING;
10014                 goto out;
10015         }
10016
10017         leaf = path.nodes[0];
10018         slot = path.slots[0];
10019         btrfs_item_key_to_cpu(leaf, &key, slot);
10020
10021         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10022
10023         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10024                 skinny_level = (int)key.offset;
10025                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10026         } else {
10027                 struct btrfs_tree_block_info *info;
10028
10029                 info = (struct btrfs_tree_block_info *)(ei + 1);
10030                 skinny_level = btrfs_tree_block_level(leaf, info);
10031                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10032         }
10033
10034         if (eb) {
10035                 u64 header_gen;
10036                 u64 extent_gen;
10037
10038                 if (!(btrfs_extent_flags(leaf, ei) &
10039                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10040                         error(
10041                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10042                                 key.objectid, nodesize,
10043                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10044                         err = BACKREF_MISMATCH;
10045                 }
10046                 header_gen = btrfs_header_generation(eb);
10047                 extent_gen = btrfs_extent_generation(leaf, ei);
10048                 if (header_gen != extent_gen) {
10049                         error(
10050         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10051                                 key.objectid, nodesize, header_gen,
10052                                 extent_gen);
10053                         err = BACKREF_MISMATCH;
10054                 }
10055                 if (level != skinny_level) {
10056                         error(
10057                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10058                                 key.objectid, nodesize, level, skinny_level);
10059                         err = BACKREF_MISMATCH;
10060                 }
10061                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10062                         error(
10063                         "extent[%llu %u] is referred by other roots than %llu",
10064                                 key.objectid, nodesize, root->objectid);
10065                         err = BACKREF_MISMATCH;
10066                 }
10067         }
10068
10069         /*
10070          * Iterate the extent/metadata item to find the exact backref
10071          */
10072         item_size = btrfs_item_size_nr(leaf, slot);
10073         ptr = (unsigned long)iref;
10074         end = (unsigned long)ei + item_size;
10075         while (ptr < end) {
10076                 iref = (struct btrfs_extent_inline_ref *)ptr;
10077                 type = btrfs_extent_inline_ref_type(leaf, iref);
10078                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10079
10080                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10081                         (offset == root->objectid || offset == owner)) {
10082                         found_ref = 1;
10083                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10084                         /*
10085                          * Backref of tree reloc root points to itself, no need
10086                          * to check backref any more.
10087                          */
10088                         if (tree_reloc_root)
10089                                 found_ref = 1;
10090                         else
10091                         /* Check if the backref points to valid referencer */
10092                                 found_ref = !check_tree_block_ref(root, NULL,
10093                                                 offset, level + 1, owner);
10094                 }
10095
10096                 if (found_ref)
10097                         break;
10098                 ptr += btrfs_extent_inline_ref_size(type);
10099         }
10100
10101         /*
10102          * Inlined extent item doesn't have what we need, check
10103          * TREE_BLOCK_REF_KEY
10104          */
10105         if (!found_ref) {
10106                 btrfs_release_path(&path);
10107                 key.objectid = bytenr;
10108                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10109                 key.offset = root->objectid;
10110
10111                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10112                 if (!ret)
10113                         found_ref = 1;
10114         }
10115         if (!found_ref)
10116                 err |= BACKREF_MISSING;
10117 out:
10118         btrfs_release_path(&path);
10119         if (eb && (err & BACKREF_MISSING))
10120                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10121                         bytenr, nodesize, owner, level);
10122         return err;
10123 }
10124
10125 /*
10126  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10127  *
10128  * Return >0 any error found and output error message
10129  * Return 0 for no error found
10130  */
10131 static int check_extent_data_item(struct btrfs_root *root,
10132                                   struct extent_buffer *eb, int slot)
10133 {
10134         struct btrfs_file_extent_item *fi;
10135         struct btrfs_path path;
10136         struct btrfs_root *extent_root = root->fs_info->extent_root;
10137         struct btrfs_key fi_key;
10138         struct btrfs_key dbref_key;
10139         struct extent_buffer *leaf;
10140         struct btrfs_extent_item *ei;
10141         struct btrfs_extent_inline_ref *iref;
10142         struct btrfs_extent_data_ref *dref;
10143         u64 owner;
10144         u64 disk_bytenr;
10145         u64 disk_num_bytes;
10146         u64 extent_num_bytes;
10147         u64 extent_flags;
10148         u32 item_size;
10149         unsigned long end;
10150         unsigned long ptr;
10151         int type;
10152         u64 ref_root;
10153         int found_dbackref = 0;
10154         int err = 0;
10155         int ret;
10156
10157         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10158         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10159
10160         /* Nothing to check for hole and inline data extents */
10161         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10162             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10163                 return 0;
10164
10165         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10166         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10167         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10168
10169         /* Check unaligned disk_num_bytes and num_bytes */
10170         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10171                 error(
10172 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10173                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10174                         root->sectorsize);
10175                 err |= BYTES_UNALIGNED;
10176         } else {
10177                 data_bytes_allocated += disk_num_bytes;
10178         }
10179         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10180                 error(
10181 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10182                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10183                         root->sectorsize);
10184                 err |= BYTES_UNALIGNED;
10185         } else {
10186                 data_bytes_referenced += extent_num_bytes;
10187         }
10188         owner = btrfs_header_owner(eb);
10189
10190         /* Check the extent item of the file extent in extent tree */
10191         btrfs_init_path(&path);
10192         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10193         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10194         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10195
10196         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10197         if (ret) {
10198                 err |= BACKREF_MISSING;
10199                 goto error;
10200         }
10201
10202         leaf = path.nodes[0];
10203         slot = path.slots[0];
10204         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10205
10206         extent_flags = btrfs_extent_flags(leaf, ei);
10207
10208         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10209                 error(
10210                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10211                     disk_bytenr, disk_num_bytes,
10212                     BTRFS_EXTENT_FLAG_DATA);
10213                 err |= BACKREF_MISMATCH;
10214         }
10215
10216         /* Check data backref inside that extent item */
10217         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10218         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10219         ptr = (unsigned long)iref;
10220         end = (unsigned long)ei + item_size;
10221         while (ptr < end) {
10222                 iref = (struct btrfs_extent_inline_ref *)ptr;
10223                 type = btrfs_extent_inline_ref_type(leaf, iref);
10224                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10225
10226                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10227                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10228                         if (ref_root == owner || ref_root == root->objectid)
10229                                 found_dbackref = 1;
10230                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10231                         found_dbackref = !check_tree_block_ref(root, NULL,
10232                                 btrfs_extent_inline_ref_offset(leaf, iref),
10233                                 0, owner);
10234                 }
10235
10236                 if (found_dbackref)
10237                         break;
10238                 ptr += btrfs_extent_inline_ref_size(type);
10239         }
10240
10241         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
10242         if (!found_dbackref) {
10243                 btrfs_release_path(&path);
10244
10245                 btrfs_init_path(&path);
10246                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10247                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10248                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10249                                 fi_key.objectid, fi_key.offset);
10250
10251                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10252                                         &dbref_key, &path, 0, 0);
10253                 if (!ret)
10254                         found_dbackref = 1;
10255         }
10256
10257         if (!found_dbackref)
10258                 err |= BACKREF_MISSING;
10259 error:
10260         btrfs_release_path(&path);
10261         if (err & BACKREF_MISSING) {
10262                 error("data extent[%llu %llu] backref lost",
10263                       disk_bytenr, disk_num_bytes);
10264         }
10265         return err;
10266 }
10267
10268 /*
10269  * Get real tree block level for the case like shared block
10270  * Return >= 0 as tree level
10271  * Return <0 for error
10272  */
10273 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10274 {
10275         struct extent_buffer *eb;
10276         struct btrfs_path path;
10277         struct btrfs_key key;
10278         struct btrfs_extent_item *ei;
10279         u64 flags;
10280         u64 transid;
10281         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10282         u8 backref_level;
10283         u8 header_level;
10284         int ret;
10285
10286         /* Search extent tree for extent generation and level */
10287         key.objectid = bytenr;
10288         key.type = BTRFS_METADATA_ITEM_KEY;
10289         key.offset = (u64)-1;
10290
10291         btrfs_init_path(&path);
10292         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10293         if (ret < 0)
10294                 goto release_out;
10295         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10296         if (ret < 0)
10297                 goto release_out;
10298         if (ret > 0) {
10299                 ret = -ENOENT;
10300                 goto release_out;
10301         }
10302
10303         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10304         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10305                             struct btrfs_extent_item);
10306         flags = btrfs_extent_flags(path.nodes[0], ei);
10307         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10308                 ret = -ENOENT;
10309                 goto release_out;
10310         }
10311
10312         /* Get transid for later read_tree_block() check */
10313         transid = btrfs_extent_generation(path.nodes[0], ei);
10314
10315         /* Get backref level as one source */
10316         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10317                 backref_level = key.offset;
10318         } else {
10319                 struct btrfs_tree_block_info *info;
10320
10321                 info = (struct btrfs_tree_block_info *)(ei + 1);
10322                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10323         }
10324         btrfs_release_path(&path);
10325
10326         /* Get level from tree block as an alternative source */
10327         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10328         if (!extent_buffer_uptodate(eb)) {
10329                 free_extent_buffer(eb);
10330                 return -EIO;
10331         }
10332         header_level = btrfs_header_level(eb);
10333         free_extent_buffer(eb);
10334
10335         if (header_level != backref_level)
10336                 return -EIO;
10337         return header_level;
10338
10339 release_out:
10340         btrfs_release_path(&path);
10341         return ret;
10342 }
10343
10344 /*
10345  * Check if a tree block backref is valid (points to a valid tree block)
10346  * if level == -1, level will be resolved
10347  * Return >0 for any error found and print error message
10348  */
10349 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10350                                     u64 bytenr, int level)
10351 {
10352         struct btrfs_root *root;
10353         struct btrfs_key key;
10354         struct btrfs_path path;
10355         struct extent_buffer *eb;
10356         struct extent_buffer *node;
10357         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10358         int err = 0;
10359         int ret;
10360
10361         /* Query level for level == -1 special case */
10362         if (level == -1)
10363                 level = query_tree_block_level(fs_info, bytenr);
10364         if (level < 0) {
10365                 err |= REFERENCER_MISSING;
10366                 goto out;
10367         }
10368
10369         key.objectid = root_id;
10370         key.type = BTRFS_ROOT_ITEM_KEY;
10371         key.offset = (u64)-1;
10372
10373         root = btrfs_read_fs_root(fs_info, &key);
10374         if (IS_ERR(root)) {
10375                 err |= REFERENCER_MISSING;
10376                 goto out;
10377         }
10378
10379         /* Read out the tree block to get item/node key */
10380         eb = read_tree_block(root, bytenr, root->nodesize, 0);
10381         if (!extent_buffer_uptodate(eb)) {
10382                 err |= REFERENCER_MISSING;
10383                 free_extent_buffer(eb);
10384                 goto out;
10385         }
10386
10387         /* Empty tree, no need to check key */
10388         if (!btrfs_header_nritems(eb) && !level) {
10389                 free_extent_buffer(eb);
10390                 goto out;
10391         }
10392
10393         if (level)
10394                 btrfs_node_key_to_cpu(eb, &key, 0);
10395         else
10396                 btrfs_item_key_to_cpu(eb, &key, 0);
10397
10398         free_extent_buffer(eb);
10399
10400         btrfs_init_path(&path);
10401         path.lowest_level = level;
10402         /* Search with the first key, to ensure we can reach it */
10403         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10404         if (ret < 0) {
10405                 err |= REFERENCER_MISSING;
10406                 goto release_out;
10407         }
10408
10409         node = path.nodes[level];
10410         if (btrfs_header_bytenr(node) != bytenr) {
10411                 error(
10412         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10413                         bytenr, nodesize, bytenr,
10414                         btrfs_header_bytenr(node));
10415                 err |= REFERENCER_MISMATCH;
10416         }
10417         if (btrfs_header_level(node) != level) {
10418                 error(
10419         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10420                         bytenr, nodesize, level,
10421                         btrfs_header_level(node));
10422                 err |= REFERENCER_MISMATCH;
10423         }
10424
10425 release_out:
10426         btrfs_release_path(&path);
10427 out:
10428         if (err & REFERENCER_MISSING) {
10429                 if (level < 0)
10430                         error("extent [%llu %d] lost referencer (owner: %llu)",
10431                                 bytenr, nodesize, root_id);
10432                 else
10433                         error(
10434                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10435                                 bytenr, nodesize, root_id, level);
10436         }
10437
10438         return err;
10439 }
10440
10441 /*
10442  * Check if tree block @eb is tree reloc root.
10443  * Return 0 if it's not or any problem happens
10444  * Return 1 if it's a tree reloc root
10445  */
10446 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10447                                  struct extent_buffer *eb)
10448 {
10449         struct btrfs_root *tree_reloc_root;
10450         struct btrfs_key key;
10451         u64 bytenr = btrfs_header_bytenr(eb);
10452         u64 owner = btrfs_header_owner(eb);
10453         int ret = 0;
10454
10455         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10456         key.offset = owner;
10457         key.type = BTRFS_ROOT_ITEM_KEY;
10458
10459         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10460         if (IS_ERR(tree_reloc_root))
10461                 return 0;
10462
10463         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10464                 ret = 1;
10465         btrfs_free_fs_root(tree_reloc_root);
10466         return ret;
10467 }
10468
10469 /*
10470  * Check referencer for shared block backref
10471  * If level == -1, this function will resolve the level.
10472  */
10473 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10474                                      u64 parent, u64 bytenr, int level)
10475 {
10476         struct extent_buffer *eb;
10477         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10478         u32 nr;
10479         int found_parent = 0;
10480         int i;
10481
10482         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10483         if (!extent_buffer_uptodate(eb))
10484                 goto out;
10485
10486         if (level == -1)
10487                 level = query_tree_block_level(fs_info, bytenr);
10488         if (level < 0)
10489                 goto out;
10490
10491         /* It's possible it's a tree reloc root */
10492         if (parent == bytenr) {
10493                 if (is_tree_reloc_root(fs_info, eb))
10494                         found_parent = 1;
10495                 goto out;
10496         }
10497
10498         if (level + 1 != btrfs_header_level(eb))
10499                 goto out;
10500
10501         nr = btrfs_header_nritems(eb);
10502         for (i = 0; i < nr; i++) {
10503                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10504                         found_parent = 1;
10505                         break;
10506                 }
10507         }
10508 out:
10509         free_extent_buffer(eb);
10510         if (!found_parent) {
10511                 error(
10512         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10513                         bytenr, nodesize, parent, level);
10514                 return REFERENCER_MISSING;
10515         }
10516         return 0;
10517 }
10518
10519 /*
10520  * Check referencer for normal (inlined) data ref
10521  * If len == 0, it will be resolved by searching in extent tree
10522  */
10523 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10524                                      u64 root_id, u64 objectid, u64 offset,
10525                                      u64 bytenr, u64 len, u32 count)
10526 {
10527         struct btrfs_root *root;
10528         struct btrfs_root *extent_root = fs_info->extent_root;
10529         struct btrfs_key key;
10530         struct btrfs_path path;
10531         struct extent_buffer *leaf;
10532         struct btrfs_file_extent_item *fi;
10533         u32 found_count = 0;
10534         int slot;
10535         int ret = 0;
10536
10537         if (!len) {
10538                 key.objectid = bytenr;
10539                 key.type = BTRFS_EXTENT_ITEM_KEY;
10540                 key.offset = (u64)-1;
10541
10542                 btrfs_init_path(&path);
10543                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10544                 if (ret < 0)
10545                         goto out;
10546                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10547                 if (ret)
10548                         goto out;
10549                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10550                 if (key.objectid != bytenr ||
10551                     key.type != BTRFS_EXTENT_ITEM_KEY)
10552                         goto out;
10553                 len = key.offset;
10554                 btrfs_release_path(&path);
10555         }
10556         key.objectid = root_id;
10557         key.type = BTRFS_ROOT_ITEM_KEY;
10558         key.offset = (u64)-1;
10559         btrfs_init_path(&path);
10560
10561         root = btrfs_read_fs_root(fs_info, &key);
10562         if (IS_ERR(root))
10563                 goto out;
10564
10565         key.objectid = objectid;
10566         key.type = BTRFS_EXTENT_DATA_KEY;
10567         /*
10568          * It can be nasty as data backref offset is
10569          * file offset - file extent offset, which is smaller or
10570          * equal to original backref offset.  The only special case is
10571          * overflow.  So we need to special check and do further search.
10572          */
10573         key.offset = offset & (1ULL << 63) ? 0 : offset;
10574
10575         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10576         if (ret < 0)
10577                 goto out;
10578
10579         /*
10580          * Search afterwards to get correct one
10581          * NOTE: As we must do a comprehensive check on the data backref to
10582          * make sure the dref count also matches, we must iterate all file
10583          * extents for that inode.
10584          */
10585         while (1) {
10586                 leaf = path.nodes[0];
10587                 slot = path.slots[0];
10588
10589                 btrfs_item_key_to_cpu(leaf, &key, slot);
10590                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10591                         break;
10592                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10593                 /*
10594                  * Except normal disk bytenr and disk num bytes, we still
10595                  * need to do extra check on dbackref offset as
10596                  * dbackref offset = file_offset - file_extent_offset
10597                  */
10598                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10599                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10600                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10601                     offset)
10602                         found_count++;
10603
10604                 ret = btrfs_next_item(root, &path);
10605                 if (ret)
10606                         break;
10607         }
10608 out:
10609         btrfs_release_path(&path);
10610         if (found_count != count) {
10611                 error(
10612 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10613                         bytenr, len, root_id, objectid, offset, count, found_count);
10614                 return REFERENCER_MISSING;
10615         }
10616         return 0;
10617 }
10618
10619 /*
10620  * Check if the referencer of a shared data backref exists
10621  */
10622 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10623                                      u64 parent, u64 bytenr)
10624 {
10625         struct extent_buffer *eb;
10626         struct btrfs_key key;
10627         struct btrfs_file_extent_item *fi;
10628         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10629         u32 nr;
10630         int found_parent = 0;
10631         int i;
10632
10633         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10634         if (!extent_buffer_uptodate(eb))
10635                 goto out;
10636
10637         nr = btrfs_header_nritems(eb);
10638         for (i = 0; i < nr; i++) {
10639                 btrfs_item_key_to_cpu(eb, &key, i);
10640                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10641                         continue;
10642
10643                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10644                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10645                         continue;
10646
10647                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10648                         found_parent = 1;
10649                         break;
10650                 }
10651         }
10652
10653 out:
10654         free_extent_buffer(eb);
10655         if (!found_parent) {
10656                 error("shared extent %llu referencer lost (parent: %llu)",
10657                         bytenr, parent);
10658                 return REFERENCER_MISSING;
10659         }
10660         return 0;
10661 }
10662
10663 /*
10664  * This function will check a given extent item, including its backref and
10665  * itself (like crossing stripe boundary and type)
10666  *
10667  * Since we don't use extent_record anymore, introduce new error bit
10668  */
10669 static int check_extent_item(struct btrfs_fs_info *fs_info,
10670                              struct extent_buffer *eb, int slot)
10671 {
10672         struct btrfs_extent_item *ei;
10673         struct btrfs_extent_inline_ref *iref;
10674         struct btrfs_extent_data_ref *dref;
10675         unsigned long end;
10676         unsigned long ptr;
10677         int type;
10678         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10679         u32 item_size = btrfs_item_size_nr(eb, slot);
10680         u64 flags;
10681         u64 offset;
10682         int metadata = 0;
10683         int level;
10684         struct btrfs_key key;
10685         int ret;
10686         int err = 0;
10687
10688         btrfs_item_key_to_cpu(eb, &key, slot);
10689         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10690                 bytes_used += key.offset;
10691         else
10692                 bytes_used += nodesize;
10693
10694         if (item_size < sizeof(*ei)) {
10695                 /*
10696                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10697                  * old thing when on disk format is still un-determined.
10698                  * No need to care about it anymore
10699                  */
10700                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10701                 return -ENOTTY;
10702         }
10703
10704         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10705         flags = btrfs_extent_flags(eb, ei);
10706
10707         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10708                 metadata = 1;
10709         if (metadata && check_crossing_stripes(global_info, key.objectid,
10710                                                eb->len)) {
10711                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10712                       key.objectid, key.objectid + nodesize);
10713                 err |= CROSSING_STRIPE_BOUNDARY;
10714         }
10715
10716         ptr = (unsigned long)(ei + 1);
10717
10718         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10719                 /* Old EXTENT_ITEM metadata */
10720                 struct btrfs_tree_block_info *info;
10721
10722                 info = (struct btrfs_tree_block_info *)ptr;
10723                 level = btrfs_tree_block_level(eb, info);
10724                 ptr += sizeof(struct btrfs_tree_block_info);
10725         } else {
10726                 /* New METADATA_ITEM */
10727                 level = key.offset;
10728         }
10729         end = (unsigned long)ei + item_size;
10730
10731         if (ptr >= end) {
10732                 err |= ITEM_SIZE_MISMATCH;
10733                 goto out;
10734         }
10735
10736         /* Now check every backref in this extent item */
10737 next:
10738         iref = (struct btrfs_extent_inline_ref *)ptr;
10739         type = btrfs_extent_inline_ref_type(eb, iref);
10740         offset = btrfs_extent_inline_ref_offset(eb, iref);
10741         switch (type) {
10742         case BTRFS_TREE_BLOCK_REF_KEY:
10743                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10744                                                level);
10745                 err |= ret;
10746                 break;
10747         case BTRFS_SHARED_BLOCK_REF_KEY:
10748                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10749                                                  level);
10750                 err |= ret;
10751                 break;
10752         case BTRFS_EXTENT_DATA_REF_KEY:
10753                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10754                 ret = check_extent_data_backref(fs_info,
10755                                 btrfs_extent_data_ref_root(eb, dref),
10756                                 btrfs_extent_data_ref_objectid(eb, dref),
10757                                 btrfs_extent_data_ref_offset(eb, dref),
10758                                 key.objectid, key.offset,
10759                                 btrfs_extent_data_ref_count(eb, dref));
10760                 err |= ret;
10761                 break;
10762         case BTRFS_SHARED_DATA_REF_KEY:
10763                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10764                 err |= ret;
10765                 break;
10766         default:
10767                 error("extent[%llu %d %llu] has unknown ref type: %d",
10768                         key.objectid, key.type, key.offset, type);
10769                 err |= UNKNOWN_TYPE;
10770                 goto out;
10771         }
10772
10773         ptr += btrfs_extent_inline_ref_size(type);
10774         if (ptr < end)
10775                 goto next;
10776
10777 out:
10778         return err;
10779 }
10780
10781 /*
10782  * Check if a dev extent item is referred correctly by its chunk
10783  */
10784 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10785                                  struct extent_buffer *eb, int slot)
10786 {
10787         struct btrfs_root *chunk_root = fs_info->chunk_root;
10788         struct btrfs_dev_extent *ptr;
10789         struct btrfs_path path;
10790         struct btrfs_key chunk_key;
10791         struct btrfs_key devext_key;
10792         struct btrfs_chunk *chunk;
10793         struct extent_buffer *l;
10794         int num_stripes;
10795         u64 length;
10796         int i;
10797         int found_chunk = 0;
10798         int ret;
10799
10800         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10801         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10802         length = btrfs_dev_extent_length(eb, ptr);
10803
10804         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10805         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10806         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10807
10808         btrfs_init_path(&path);
10809         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10810         if (ret)
10811                 goto out;
10812
10813         l = path.nodes[0];
10814         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10815         if (btrfs_chunk_length(l, chunk) != length)
10816                 goto out;
10817
10818         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10819         for (i = 0; i < num_stripes; i++) {
10820                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10821                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10822
10823                 if (devid == devext_key.objectid &&
10824                     offset == devext_key.offset) {
10825                         found_chunk = 1;
10826                         break;
10827                 }
10828         }
10829 out:
10830         btrfs_release_path(&path);
10831         if (!found_chunk) {
10832                 error(
10833                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10834                         devext_key.objectid, devext_key.offset, length);
10835                 return REFERENCER_MISSING;
10836         }
10837         return 0;
10838 }
10839
10840 /*
10841  * Check if the used space is correct with the dev item
10842  */
10843 static int check_dev_item(struct btrfs_fs_info *fs_info,
10844                           struct extent_buffer *eb, int slot)
10845 {
10846         struct btrfs_root *dev_root = fs_info->dev_root;
10847         struct btrfs_dev_item *dev_item;
10848         struct btrfs_path path;
10849         struct btrfs_key key;
10850         struct btrfs_dev_extent *ptr;
10851         u64 dev_id;
10852         u64 used;
10853         u64 total = 0;
10854         int ret;
10855
10856         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10857         dev_id = btrfs_device_id(eb, dev_item);
10858         used = btrfs_device_bytes_used(eb, dev_item);
10859
10860         key.objectid = dev_id;
10861         key.type = BTRFS_DEV_EXTENT_KEY;
10862         key.offset = 0;
10863
10864         btrfs_init_path(&path);
10865         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10866         if (ret < 0) {
10867                 btrfs_item_key_to_cpu(eb, &key, slot);
10868                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10869                         key.objectid, key.type, key.offset);
10870                 btrfs_release_path(&path);
10871                 return REFERENCER_MISSING;
10872         }
10873
10874         /* Iterate dev_extents to calculate the used space of a device */
10875         while (1) {
10876                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10877
10878                 if (key.objectid > dev_id)
10879                         break;
10880                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10881                         goto next;
10882
10883                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10884                                      struct btrfs_dev_extent);
10885                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10886 next:
10887                 ret = btrfs_next_item(dev_root, &path);
10888                 if (ret)
10889                         break;
10890         }
10891         btrfs_release_path(&path);
10892
10893         if (used != total) {
10894                 btrfs_item_key_to_cpu(eb, &key, slot);
10895                 error(
10896 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10897                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10898                         BTRFS_DEV_EXTENT_KEY, dev_id);
10899                 return ACCOUNTING_MISMATCH;
10900         }
10901         return 0;
10902 }
10903
10904 /*
10905  * Check a block group item with its referener (chunk) and its used space
10906  * with extent/metadata item
10907  */
10908 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10909                                   struct extent_buffer *eb, int slot)
10910 {
10911         struct btrfs_root *extent_root = fs_info->extent_root;
10912         struct btrfs_root *chunk_root = fs_info->chunk_root;
10913         struct btrfs_block_group_item *bi;
10914         struct btrfs_block_group_item bg_item;
10915         struct btrfs_path path;
10916         struct btrfs_key bg_key;
10917         struct btrfs_key chunk_key;
10918         struct btrfs_key extent_key;
10919         struct btrfs_chunk *chunk;
10920         struct extent_buffer *leaf;
10921         struct btrfs_extent_item *ei;
10922         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10923         u64 flags;
10924         u64 bg_flags;
10925         u64 used;
10926         u64 total = 0;
10927         int ret;
10928         int err = 0;
10929
10930         btrfs_item_key_to_cpu(eb, &bg_key, slot);
10931         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10932         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10933         used = btrfs_block_group_used(&bg_item);
10934         bg_flags = btrfs_block_group_flags(&bg_item);
10935
10936         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10937         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10938         chunk_key.offset = bg_key.objectid;
10939
10940         btrfs_init_path(&path);
10941         /* Search for the referencer chunk */
10942         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10943         if (ret) {
10944                 error(
10945                 "block group[%llu %llu] did not find the related chunk item",
10946                         bg_key.objectid, bg_key.offset);
10947                 err |= REFERENCER_MISSING;
10948         } else {
10949                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10950                                         struct btrfs_chunk);
10951                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10952                                                 bg_key.offset) {
10953                         error(
10954         "block group[%llu %llu] related chunk item length does not match",
10955                                 bg_key.objectid, bg_key.offset);
10956                         err |= REFERENCER_MISMATCH;
10957                 }
10958         }
10959         btrfs_release_path(&path);
10960
10961         /* Search from the block group bytenr */
10962         extent_key.objectid = bg_key.objectid;
10963         extent_key.type = 0;
10964         extent_key.offset = 0;
10965
10966         btrfs_init_path(&path);
10967         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10968         if (ret < 0)
10969                 goto out;
10970
10971         /* Iterate extent tree to account used space */
10972         while (1) {
10973                 leaf = path.nodes[0];
10974                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
10975                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
10976                         break;
10977
10978                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
10979                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
10980                         goto next;
10981                 if (extent_key.objectid < bg_key.objectid)
10982                         goto next;
10983
10984                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
10985                         total += nodesize;
10986                 else
10987                         total += extent_key.offset;
10988
10989                 ei = btrfs_item_ptr(leaf, path.slots[0],
10990                                     struct btrfs_extent_item);
10991                 flags = btrfs_extent_flags(leaf, ei);
10992                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
10993                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
10994                                 error(
10995                         "bad extent[%llu, %llu) type mismatch with chunk",
10996                                         extent_key.objectid,
10997                                         extent_key.objectid + extent_key.offset);
10998                                 err |= CHUNK_TYPE_MISMATCH;
10999                         }
11000                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11001                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11002                                     BTRFS_BLOCK_GROUP_METADATA))) {
11003                                 error(
11004                         "bad extent[%llu, %llu) type mismatch with chunk",
11005                                         extent_key.objectid,
11006                                         extent_key.objectid + nodesize);
11007                                 err |= CHUNK_TYPE_MISMATCH;
11008                         }
11009                 }
11010 next:
11011                 ret = btrfs_next_item(extent_root, &path);
11012                 if (ret)
11013                         break;
11014         }
11015
11016 out:
11017         btrfs_release_path(&path);
11018
11019         if (total != used) {
11020                 error(
11021                 "block group[%llu %llu] used %llu but extent items used %llu",
11022                         bg_key.objectid, bg_key.offset, used, total);
11023                 err |= ACCOUNTING_MISMATCH;
11024         }
11025         return err;
11026 }
11027
11028 /*
11029  * Check a chunk item.
11030  * Including checking all referred dev_extents and block group
11031  */
11032 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11033                             struct extent_buffer *eb, int slot)
11034 {
11035         struct btrfs_root *extent_root = fs_info->extent_root;
11036         struct btrfs_root *dev_root = fs_info->dev_root;
11037         struct btrfs_path path;
11038         struct btrfs_key chunk_key;
11039         struct btrfs_key bg_key;
11040         struct btrfs_key devext_key;
11041         struct btrfs_chunk *chunk;
11042         struct extent_buffer *leaf;
11043         struct btrfs_block_group_item *bi;
11044         struct btrfs_block_group_item bg_item;
11045         struct btrfs_dev_extent *ptr;
11046         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11047         u64 length;
11048         u64 chunk_end;
11049         u64 type;
11050         u64 profile;
11051         int num_stripes;
11052         u64 offset;
11053         u64 objectid;
11054         int i;
11055         int ret;
11056         int err = 0;
11057
11058         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11059         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11060         length = btrfs_chunk_length(eb, chunk);
11061         chunk_end = chunk_key.offset + length;
11062         if (!IS_ALIGNED(length, sectorsize)) {
11063                 error("chunk[%llu %llu) not aligned to %u",
11064                         chunk_key.offset, chunk_end, sectorsize);
11065                 err |= BYTES_UNALIGNED;
11066                 goto out;
11067         }
11068
11069         type = btrfs_chunk_type(eb, chunk);
11070         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11071         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11072                 error("chunk[%llu %llu) has no chunk type",
11073                         chunk_key.offset, chunk_end);
11074                 err |= UNKNOWN_TYPE;
11075         }
11076         if (profile && (profile & (profile - 1))) {
11077                 error("chunk[%llu %llu) multiple profiles detected: %llx",
11078                         chunk_key.offset, chunk_end, profile);
11079                 err |= UNKNOWN_TYPE;
11080         }
11081
11082         bg_key.objectid = chunk_key.offset;
11083         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11084         bg_key.offset = length;
11085
11086         btrfs_init_path(&path);
11087         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11088         if (ret) {
11089                 error(
11090                 "chunk[%llu %llu) did not find the related block group item",
11091                         chunk_key.offset, chunk_end);
11092                 err |= REFERENCER_MISSING;
11093         } else{
11094                 leaf = path.nodes[0];
11095                 bi = btrfs_item_ptr(leaf, path.slots[0],
11096                                     struct btrfs_block_group_item);
11097                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11098                                    sizeof(bg_item));
11099                 if (btrfs_block_group_flags(&bg_item) != type) {
11100                         error(
11101 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11102                                 chunk_key.offset, chunk_end, type,
11103                                 btrfs_block_group_flags(&bg_item));
11104                         err |= REFERENCER_MISSING;
11105                 }
11106         }
11107
11108         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11109         for (i = 0; i < num_stripes; i++) {
11110                 btrfs_release_path(&path);
11111                 btrfs_init_path(&path);
11112                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11113                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11114                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11115
11116                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11117                                         0, 0);
11118                 if (ret)
11119                         goto not_match_dev;
11120
11121                 leaf = path.nodes[0];
11122                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11123                                      struct btrfs_dev_extent);
11124                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11125                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11126                 if (objectid != chunk_key.objectid ||
11127                     offset != chunk_key.offset ||
11128                     btrfs_dev_extent_length(leaf, ptr) != length)
11129                         goto not_match_dev;
11130                 continue;
11131 not_match_dev:
11132                 err |= BACKREF_MISSING;
11133                 error(
11134                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11135                         chunk_key.objectid, chunk_end, i);
11136                 continue;
11137         }
11138         btrfs_release_path(&path);
11139 out:
11140         return err;
11141 }
11142
11143 /*
11144  * Main entry function to check known items and update related accounting info
11145  */
11146 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11147 {
11148         struct btrfs_fs_info *fs_info = root->fs_info;
11149         struct btrfs_key key;
11150         int slot = 0;
11151         int type;
11152         struct btrfs_extent_data_ref *dref;
11153         int ret;
11154         int err = 0;
11155
11156 next:
11157         btrfs_item_key_to_cpu(eb, &key, slot);
11158         type = key.type;
11159
11160         switch (type) {
11161         case BTRFS_EXTENT_DATA_KEY:
11162                 ret = check_extent_data_item(root, eb, slot);
11163                 err |= ret;
11164                 break;
11165         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11166                 ret = check_block_group_item(fs_info, eb, slot);
11167                 err |= ret;
11168                 break;
11169         case BTRFS_DEV_ITEM_KEY:
11170                 ret = check_dev_item(fs_info, eb, slot);
11171                 err |= ret;
11172                 break;
11173         case BTRFS_CHUNK_ITEM_KEY:
11174                 ret = check_chunk_item(fs_info, eb, slot);
11175                 err |= ret;
11176                 break;
11177         case BTRFS_DEV_EXTENT_KEY:
11178                 ret = check_dev_extent_item(fs_info, eb, slot);
11179                 err |= ret;
11180                 break;
11181         case BTRFS_EXTENT_ITEM_KEY:
11182         case BTRFS_METADATA_ITEM_KEY:
11183                 ret = check_extent_item(fs_info, eb, slot);
11184                 err |= ret;
11185                 break;
11186         case BTRFS_EXTENT_CSUM_KEY:
11187                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11188                 break;
11189         case BTRFS_TREE_BLOCK_REF_KEY:
11190                 ret = check_tree_block_backref(fs_info, key.offset,
11191                                                key.objectid, -1);
11192                 err |= ret;
11193                 break;
11194         case BTRFS_EXTENT_DATA_REF_KEY:
11195                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11196                 ret = check_extent_data_backref(fs_info,
11197                                 btrfs_extent_data_ref_root(eb, dref),
11198                                 btrfs_extent_data_ref_objectid(eb, dref),
11199                                 btrfs_extent_data_ref_offset(eb, dref),
11200                                 key.objectid, 0,
11201                                 btrfs_extent_data_ref_count(eb, dref));
11202                 err |= ret;
11203                 break;
11204         case BTRFS_SHARED_BLOCK_REF_KEY:
11205                 ret = check_shared_block_backref(fs_info, key.offset,
11206                                                  key.objectid, -1);
11207                 err |= ret;
11208                 break;
11209         case BTRFS_SHARED_DATA_REF_KEY:
11210                 ret = check_shared_data_backref(fs_info, key.offset,
11211                                                 key.objectid);
11212                 err |= ret;
11213                 break;
11214         default:
11215                 break;
11216         }
11217
11218         if (++slot < btrfs_header_nritems(eb))
11219                 goto next;
11220
11221         return err;
11222 }
11223
11224 /*
11225  * Helper function for later fs/subvol tree check.  To determine if a tree
11226  * block should be checked.
11227  * This function will ensure only the direct referencer with lowest rootid to
11228  * check a fs/subvolume tree block.
11229  *
11230  * Backref check at extent tree would detect errors like missing subvolume
11231  * tree, so we can do aggressive check to reduce duplicated checks.
11232  */
11233 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11234 {
11235         struct btrfs_root *extent_root = root->fs_info->extent_root;
11236         struct btrfs_key key;
11237         struct btrfs_path path;
11238         struct extent_buffer *leaf;
11239         int slot;
11240         struct btrfs_extent_item *ei;
11241         unsigned long ptr;
11242         unsigned long end;
11243         int type;
11244         u32 item_size;
11245         u64 offset;
11246         struct btrfs_extent_inline_ref *iref;
11247         int ret;
11248
11249         btrfs_init_path(&path);
11250         key.objectid = btrfs_header_bytenr(eb);
11251         key.type = BTRFS_METADATA_ITEM_KEY;
11252         key.offset = (u64)-1;
11253
11254         /*
11255          * Any failure in backref resolving means we can't determine
11256          * whom the tree block belongs to.
11257          * So in that case, we need to check that tree block
11258          */
11259         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11260         if (ret < 0)
11261                 goto need_check;
11262
11263         ret = btrfs_previous_extent_item(extent_root, &path,
11264                                          btrfs_header_bytenr(eb));
11265         if (ret)
11266                 goto need_check;
11267
11268         leaf = path.nodes[0];
11269         slot = path.slots[0];
11270         btrfs_item_key_to_cpu(leaf, &key, slot);
11271         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11272
11273         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11274                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11275         } else {
11276                 struct btrfs_tree_block_info *info;
11277
11278                 info = (struct btrfs_tree_block_info *)(ei + 1);
11279                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11280         }
11281
11282         item_size = btrfs_item_size_nr(leaf, slot);
11283         ptr = (unsigned long)iref;
11284         end = (unsigned long)ei + item_size;
11285         while (ptr < end) {
11286                 iref = (struct btrfs_extent_inline_ref *)ptr;
11287                 type = btrfs_extent_inline_ref_type(leaf, iref);
11288                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11289
11290                 /*
11291                  * We only check the tree block if current root is
11292                  * the lowest referencer of it.
11293                  */
11294                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11295                     offset < root->objectid) {
11296                         btrfs_release_path(&path);
11297                         return 0;
11298                 }
11299
11300                 ptr += btrfs_extent_inline_ref_size(type);
11301         }
11302         /*
11303          * Normally we should also check keyed tree block ref, but that may be
11304          * very time consuming.  Inlined ref should already make us skip a lot
11305          * of refs now.  So skip search keyed tree block ref.
11306          */
11307
11308 need_check:
11309         btrfs_release_path(&path);
11310         return 1;
11311 }
11312
11313 /*
11314  * Traversal function for tree block. We will do:
11315  * 1) Skip shared fs/subvolume tree blocks
11316  * 2) Update related bytes accounting
11317  * 3) Pre-order traversal
11318  */
11319 static int traverse_tree_block(struct btrfs_root *root,
11320                                 struct extent_buffer *node)
11321 {
11322         struct extent_buffer *eb;
11323         struct btrfs_key key;
11324         struct btrfs_key drop_key;
11325         int level;
11326         u64 nr;
11327         int i;
11328         int err = 0;
11329         int ret;
11330
11331         /*
11332          * Skip shared fs/subvolume tree block, in that case they will
11333          * be checked by referencer with lowest rootid
11334          */
11335         if (is_fstree(root->objectid) && !should_check(root, node))
11336                 return 0;
11337
11338         /* Update bytes accounting */
11339         total_btree_bytes += node->len;
11340         if (fs_root_objectid(btrfs_header_owner(node)))
11341                 total_fs_tree_bytes += node->len;
11342         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11343                 total_extent_tree_bytes += node->len;
11344         if (!found_old_backref &&
11345             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11346             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11347             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11348                 found_old_backref = 1;
11349
11350         /* pre-order tranversal, check itself first */
11351         level = btrfs_header_level(node);
11352         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11353                                    btrfs_header_level(node),
11354                                    btrfs_header_owner(node));
11355         err |= ret;
11356         if (err)
11357                 error(
11358         "check %s failed root %llu bytenr %llu level %d, force continue check",
11359                         level ? "node":"leaf", root->objectid,
11360                         btrfs_header_bytenr(node), btrfs_header_level(node));
11361
11362         if (!level) {
11363                 btree_space_waste += btrfs_leaf_free_space(root, node);
11364                 ret = check_leaf_items(root, node);
11365                 err |= ret;
11366                 return err;
11367         }
11368
11369         nr = btrfs_header_nritems(node);
11370         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11371         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11372                 sizeof(struct btrfs_key_ptr);
11373
11374         /* Then check all its children */
11375         for (i = 0; i < nr; i++) {
11376                 u64 blocknr = btrfs_node_blockptr(node, i);
11377
11378                 btrfs_node_key_to_cpu(node, &key, i);
11379                 if (level == root->root_item.drop_level &&
11380                     is_dropped_key(&key, &drop_key))
11381                         continue;
11382
11383                 /*
11384                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11385                  * to call the function itself.
11386                  */
11387                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11388                 if (extent_buffer_uptodate(eb)) {
11389                         ret = traverse_tree_block(root, eb);
11390                         err |= ret;
11391                 }
11392                 free_extent_buffer(eb);
11393         }
11394
11395         return err;
11396 }
11397
11398 /*
11399  * Low memory usage version check_chunks_and_extents.
11400  */
11401 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11402 {
11403         struct btrfs_path path;
11404         struct btrfs_key key;
11405         struct btrfs_root *root1;
11406         struct btrfs_root *cur_root;
11407         int err = 0;
11408         int ret;
11409
11410         root1 = root->fs_info->chunk_root;
11411         ret = traverse_tree_block(root1, root1->node);
11412         err |= ret;
11413
11414         root1 = root->fs_info->tree_root;
11415         ret = traverse_tree_block(root1, root1->node);
11416         err |= ret;
11417
11418         btrfs_init_path(&path);
11419         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11420         key.offset = 0;
11421         key.type = BTRFS_ROOT_ITEM_KEY;
11422
11423         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11424         if (ret) {
11425                 error("cannot find extent treet in tree_root");
11426                 goto out;
11427         }
11428
11429         while (1) {
11430                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11431                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11432                         goto next;
11433                 key.offset = (u64)-1;
11434
11435                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11436                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11437                                         &key);
11438                 else
11439                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11440                 if (IS_ERR(cur_root) || !cur_root) {
11441                         error("failed to read tree: %lld", key.objectid);
11442                         goto next;
11443                 }
11444
11445                 ret = traverse_tree_block(cur_root, cur_root->node);
11446                 err |= ret;
11447
11448                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11449                         btrfs_free_fs_root(cur_root);
11450 next:
11451                 ret = btrfs_next_item(root1, &path);
11452                 if (ret)
11453                         goto out;
11454         }
11455
11456 out:
11457         btrfs_release_path(&path);
11458         return err;
11459 }
11460
11461 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11462                            struct btrfs_root *root, int overwrite)
11463 {
11464         struct extent_buffer *c;
11465         struct extent_buffer *old = root->node;
11466         int level;
11467         int ret;
11468         struct btrfs_disk_key disk_key = {0,0,0};
11469
11470         level = 0;
11471
11472         if (overwrite) {
11473                 c = old;
11474                 extent_buffer_get(c);
11475                 goto init;
11476         }
11477         c = btrfs_alloc_free_block(trans, root,
11478                                    root->nodesize,
11479                                    root->root_key.objectid,
11480                                    &disk_key, level, 0, 0);
11481         if (IS_ERR(c)) {
11482                 c = old;
11483                 extent_buffer_get(c);
11484                 overwrite = 1;
11485         }
11486 init:
11487         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11488         btrfs_set_header_level(c, level);
11489         btrfs_set_header_bytenr(c, c->start);
11490         btrfs_set_header_generation(c, trans->transid);
11491         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11492         btrfs_set_header_owner(c, root->root_key.objectid);
11493
11494         write_extent_buffer(c, root->fs_info->fsid,
11495                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11496
11497         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11498                             btrfs_header_chunk_tree_uuid(c),
11499                             BTRFS_UUID_SIZE);
11500
11501         btrfs_mark_buffer_dirty(c);
11502         /*
11503          * this case can happen in the following case:
11504          *
11505          * 1.overwrite previous root.
11506          *
11507          * 2.reinit reloc data root, this is because we skip pin
11508          * down reloc data tree before which means we can allocate
11509          * same block bytenr here.
11510          */
11511         if (old->start == c->start) {
11512                 btrfs_set_root_generation(&root->root_item,
11513                                           trans->transid);
11514                 root->root_item.level = btrfs_header_level(root->node);
11515                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11516                                         &root->root_key, &root->root_item);
11517                 if (ret) {
11518                         free_extent_buffer(c);
11519                         return ret;
11520                 }
11521         }
11522         free_extent_buffer(old);
11523         root->node = c;
11524         add_root_to_dirty_list(root);
11525         return 0;
11526 }
11527
11528 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11529                                 struct extent_buffer *eb, int tree_root)
11530 {
11531         struct extent_buffer *tmp;
11532         struct btrfs_root_item *ri;
11533         struct btrfs_key key;
11534         u64 bytenr;
11535         u32 nodesize;
11536         int level = btrfs_header_level(eb);
11537         int nritems;
11538         int ret;
11539         int i;
11540
11541         /*
11542          * If we have pinned this block before, don't pin it again.
11543          * This can not only avoid forever loop with broken filesystem
11544          * but also give us some speedups.
11545          */
11546         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11547                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11548                 return 0;
11549
11550         btrfs_pin_extent(fs_info, eb->start, eb->len);
11551
11552         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11553         nritems = btrfs_header_nritems(eb);
11554         for (i = 0; i < nritems; i++) {
11555                 if (level == 0) {
11556                         btrfs_item_key_to_cpu(eb, &key, i);
11557                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11558                                 continue;
11559                         /* Skip the extent root and reloc roots */
11560                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11561                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11562                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11563                                 continue;
11564                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11565                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11566
11567                         /*
11568                          * If at any point we start needing the real root we
11569                          * will have to build a stump root for the root we are
11570                          * in, but for now this doesn't actually use the root so
11571                          * just pass in extent_root.
11572                          */
11573                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11574                                               nodesize, 0);
11575                         if (!extent_buffer_uptodate(tmp)) {
11576                                 fprintf(stderr, "Error reading root block\n");
11577                                 return -EIO;
11578                         }
11579                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11580                         free_extent_buffer(tmp);
11581                         if (ret)
11582                                 return ret;
11583                 } else {
11584                         bytenr = btrfs_node_blockptr(eb, i);
11585
11586                         /* If we aren't the tree root don't read the block */
11587                         if (level == 1 && !tree_root) {
11588                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11589                                 continue;
11590                         }
11591
11592                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11593                                               nodesize, 0);
11594                         if (!extent_buffer_uptodate(tmp)) {
11595                                 fprintf(stderr, "Error reading tree block\n");
11596                                 return -EIO;
11597                         }
11598                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11599                         free_extent_buffer(tmp);
11600                         if (ret)
11601                                 return ret;
11602                 }
11603         }
11604
11605         return 0;
11606 }
11607
11608 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11609 {
11610         int ret;
11611
11612         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11613         if (ret)
11614                 return ret;
11615
11616         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11617 }
11618
11619 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11620 {
11621         struct btrfs_block_group_cache *cache;
11622         struct btrfs_path path;
11623         struct extent_buffer *leaf;
11624         struct btrfs_chunk *chunk;
11625         struct btrfs_key key;
11626         int ret;
11627         u64 start;
11628
11629         btrfs_init_path(&path);
11630         key.objectid = 0;
11631         key.type = BTRFS_CHUNK_ITEM_KEY;
11632         key.offset = 0;
11633         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11634         if (ret < 0) {
11635                 btrfs_release_path(&path);
11636                 return ret;
11637         }
11638
11639         /*
11640          * We do this in case the block groups were screwed up and had alloc
11641          * bits that aren't actually set on the chunks.  This happens with
11642          * restored images every time and could happen in real life I guess.
11643          */
11644         fs_info->avail_data_alloc_bits = 0;
11645         fs_info->avail_metadata_alloc_bits = 0;
11646         fs_info->avail_system_alloc_bits = 0;
11647
11648         /* First we need to create the in-memory block groups */
11649         while (1) {
11650                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11651                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11652                         if (ret < 0) {
11653                                 btrfs_release_path(&path);
11654                                 return ret;
11655                         }
11656                         if (ret) {
11657                                 ret = 0;
11658                                 break;
11659                         }
11660                 }
11661                 leaf = path.nodes[0];
11662                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11663                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11664                         path.slots[0]++;
11665                         continue;
11666                 }
11667
11668                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11669                 btrfs_add_block_group(fs_info, 0,
11670                                       btrfs_chunk_type(leaf, chunk),
11671                                       key.objectid, key.offset,
11672                                       btrfs_chunk_length(leaf, chunk));
11673                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11674                                  key.offset + btrfs_chunk_length(leaf, chunk));
11675                 path.slots[0]++;
11676         }
11677         start = 0;
11678         while (1) {
11679                 cache = btrfs_lookup_first_block_group(fs_info, start);
11680                 if (!cache)
11681                         break;
11682                 cache->cached = 1;
11683                 start = cache->key.objectid + cache->key.offset;
11684         }
11685
11686         btrfs_release_path(&path);
11687         return 0;
11688 }
11689
11690 static int reset_balance(struct btrfs_trans_handle *trans,
11691                          struct btrfs_fs_info *fs_info)
11692 {
11693         struct btrfs_root *root = fs_info->tree_root;
11694         struct btrfs_path path;
11695         struct extent_buffer *leaf;
11696         struct btrfs_key key;
11697         int del_slot, del_nr = 0;
11698         int ret;
11699         int found = 0;
11700
11701         btrfs_init_path(&path);
11702         key.objectid = BTRFS_BALANCE_OBJECTID;
11703         key.type = BTRFS_BALANCE_ITEM_KEY;
11704         key.offset = 0;
11705         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11706         if (ret) {
11707                 if (ret > 0)
11708                         ret = 0;
11709                 if (!ret)
11710                         goto reinit_data_reloc;
11711                 else
11712                         goto out;
11713         }
11714
11715         ret = btrfs_del_item(trans, root, &path);
11716         if (ret)
11717                 goto out;
11718         btrfs_release_path(&path);
11719
11720         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11721         key.type = BTRFS_ROOT_ITEM_KEY;
11722         key.offset = 0;
11723         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11724         if (ret < 0)
11725                 goto out;
11726         while (1) {
11727                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11728                         if (!found)
11729                                 break;
11730
11731                         if (del_nr) {
11732                                 ret = btrfs_del_items(trans, root, &path,
11733                                                       del_slot, del_nr);
11734                                 del_nr = 0;
11735                                 if (ret)
11736                                         goto out;
11737                         }
11738                         key.offset++;
11739                         btrfs_release_path(&path);
11740
11741                         found = 0;
11742                         ret = btrfs_search_slot(trans, root, &key, &path,
11743                                                 -1, 1);
11744                         if (ret < 0)
11745                                 goto out;
11746                         continue;
11747                 }
11748                 found = 1;
11749                 leaf = path.nodes[0];
11750                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11751                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11752                         break;
11753                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11754                         path.slots[0]++;
11755                         continue;
11756                 }
11757                 if (!del_nr) {
11758                         del_slot = path.slots[0];
11759                         del_nr = 1;
11760                 } else {
11761                         del_nr++;
11762                 }
11763                 path.slots[0]++;
11764         }
11765
11766         if (del_nr) {
11767                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11768                 if (ret)
11769                         goto out;
11770         }
11771         btrfs_release_path(&path);
11772
11773 reinit_data_reloc:
11774         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11775         key.type = BTRFS_ROOT_ITEM_KEY;
11776         key.offset = (u64)-1;
11777         root = btrfs_read_fs_root(fs_info, &key);
11778         if (IS_ERR(root)) {
11779                 fprintf(stderr, "Error reading data reloc tree\n");
11780                 ret = PTR_ERR(root);
11781                 goto out;
11782         }
11783         record_root_in_trans(trans, root);
11784         ret = btrfs_fsck_reinit_root(trans, root, 0);
11785         if (ret)
11786                 goto out;
11787         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11788 out:
11789         btrfs_release_path(&path);
11790         return ret;
11791 }
11792
11793 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11794                               struct btrfs_fs_info *fs_info)
11795 {
11796         u64 start = 0;
11797         int ret;
11798
11799         /*
11800          * The only reason we don't do this is because right now we're just
11801          * walking the trees we find and pinning down their bytes, we don't look
11802          * at any of the leaves.  In order to do mixed groups we'd have to check
11803          * the leaves of any fs roots and pin down the bytes for any file
11804          * extents we find.  Not hard but why do it if we don't have to?
11805          */
11806         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11807                 fprintf(stderr, "We don't support re-initing the extent tree "
11808                         "for mixed block groups yet, please notify a btrfs "
11809                         "developer you want to do this so they can add this "
11810                         "functionality.\n");
11811                 return -EINVAL;
11812         }
11813
11814         /*
11815          * first we need to walk all of the trees except the extent tree and pin
11816          * down the bytes that are in use so we don't overwrite any existing
11817          * metadata.
11818          */
11819         ret = pin_metadata_blocks(fs_info);
11820         if (ret) {
11821                 fprintf(stderr, "error pinning down used bytes\n");
11822                 return ret;
11823         }
11824
11825         /*
11826          * Need to drop all the block groups since we're going to recreate all
11827          * of them again.
11828          */
11829         btrfs_free_block_groups(fs_info);
11830         ret = reset_block_groups(fs_info);
11831         if (ret) {
11832                 fprintf(stderr, "error resetting the block groups\n");
11833                 return ret;
11834         }
11835
11836         /* Ok we can allocate now, reinit the extent root */
11837         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11838         if (ret) {
11839                 fprintf(stderr, "extent root initialization failed\n");
11840                 /*
11841                  * When the transaction code is updated we should end the
11842                  * transaction, but for now progs only knows about commit so
11843                  * just return an error.
11844                  */
11845                 return ret;
11846         }
11847
11848         /*
11849          * Now we have all the in-memory block groups setup so we can make
11850          * allocations properly, and the metadata we care about is safe since we
11851          * pinned all of it above.
11852          */
11853         while (1) {
11854                 struct btrfs_block_group_cache *cache;
11855
11856                 cache = btrfs_lookup_first_block_group(fs_info, start);
11857                 if (!cache)
11858                         break;
11859                 start = cache->key.objectid + cache->key.offset;
11860                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11861                                         &cache->key, &cache->item,
11862                                         sizeof(cache->item));
11863                 if (ret) {
11864                         fprintf(stderr, "Error adding block group\n");
11865                         return ret;
11866                 }
11867                 btrfs_extent_post_op(trans, fs_info->extent_root);
11868         }
11869
11870         ret = reset_balance(trans, fs_info);
11871         if (ret)
11872                 fprintf(stderr, "error resetting the pending balance\n");
11873
11874         return ret;
11875 }
11876
11877 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11878 {
11879         struct btrfs_path path;
11880         struct btrfs_trans_handle *trans;
11881         struct btrfs_key key;
11882         int ret;
11883
11884         printf("Recowing metadata block %llu\n", eb->start);
11885         key.objectid = btrfs_header_owner(eb);
11886         key.type = BTRFS_ROOT_ITEM_KEY;
11887         key.offset = (u64)-1;
11888
11889         root = btrfs_read_fs_root(root->fs_info, &key);
11890         if (IS_ERR(root)) {
11891                 fprintf(stderr, "Couldn't find owner root %llu\n",
11892                         key.objectid);
11893                 return PTR_ERR(root);
11894         }
11895
11896         trans = btrfs_start_transaction(root, 1);
11897         if (IS_ERR(trans))
11898                 return PTR_ERR(trans);
11899
11900         btrfs_init_path(&path);
11901         path.lowest_level = btrfs_header_level(eb);
11902         if (path.lowest_level)
11903                 btrfs_node_key_to_cpu(eb, &key, 0);
11904         else
11905                 btrfs_item_key_to_cpu(eb, &key, 0);
11906
11907         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11908         btrfs_commit_transaction(trans, root);
11909         btrfs_release_path(&path);
11910         return ret;
11911 }
11912
11913 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11914 {
11915         struct btrfs_path path;
11916         struct btrfs_trans_handle *trans;
11917         struct btrfs_key key;
11918         int ret;
11919
11920         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11921                bad->key.type, bad->key.offset);
11922         key.objectid = bad->root_id;
11923         key.type = BTRFS_ROOT_ITEM_KEY;
11924         key.offset = (u64)-1;
11925
11926         root = btrfs_read_fs_root(root->fs_info, &key);
11927         if (IS_ERR(root)) {
11928                 fprintf(stderr, "Couldn't find owner root %llu\n",
11929                         key.objectid);
11930                 return PTR_ERR(root);
11931         }
11932
11933         trans = btrfs_start_transaction(root, 1);
11934         if (IS_ERR(trans))
11935                 return PTR_ERR(trans);
11936
11937         btrfs_init_path(&path);
11938         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11939         if (ret) {
11940                 if (ret > 0)
11941                         ret = 0;
11942                 goto out;
11943         }
11944         ret = btrfs_del_item(trans, root, &path);
11945 out:
11946         btrfs_commit_transaction(trans, root);
11947         btrfs_release_path(&path);
11948         return ret;
11949 }
11950
11951 static int zero_log_tree(struct btrfs_root *root)
11952 {
11953         struct btrfs_trans_handle *trans;
11954         int ret;
11955
11956         trans = btrfs_start_transaction(root, 1);
11957         if (IS_ERR(trans)) {
11958                 ret = PTR_ERR(trans);
11959                 return ret;
11960         }
11961         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11962         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11963         ret = btrfs_commit_transaction(trans, root);
11964         return ret;
11965 }
11966
11967 static int populate_csum(struct btrfs_trans_handle *trans,
11968                          struct btrfs_root *csum_root, char *buf, u64 start,
11969                          u64 len)
11970 {
11971         u64 offset = 0;
11972         u64 sectorsize;
11973         int ret = 0;
11974
11975         while (offset < len) {
11976                 sectorsize = csum_root->sectorsize;
11977                 ret = read_extent_data(csum_root, buf, start + offset,
11978                                        &sectorsize, 0);
11979                 if (ret)
11980                         break;
11981                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
11982                                             start + offset, buf, sectorsize);
11983                 if (ret)
11984                         break;
11985                 offset += sectorsize;
11986         }
11987         return ret;
11988 }
11989
11990 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
11991                                       struct btrfs_root *csum_root,
11992                                       struct btrfs_root *cur_root)
11993 {
11994         struct btrfs_path path;
11995         struct btrfs_key key;
11996         struct extent_buffer *node;
11997         struct btrfs_file_extent_item *fi;
11998         char *buf = NULL;
11999         u64 start = 0;
12000         u64 len = 0;
12001         int slot = 0;
12002         int ret = 0;
12003
12004         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12005         if (!buf)
12006                 return -ENOMEM;
12007
12008         btrfs_init_path(&path);
12009         key.objectid = 0;
12010         key.offset = 0;
12011         key.type = 0;
12012         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12013         if (ret < 0)
12014                 goto out;
12015         /* Iterate all regular file extents and fill its csum */
12016         while (1) {
12017                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12018
12019                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12020                         goto next;
12021                 node = path.nodes[0];
12022                 slot = path.slots[0];
12023                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12024                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12025                         goto next;
12026                 start = btrfs_file_extent_disk_bytenr(node, fi);
12027                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12028
12029                 ret = populate_csum(trans, csum_root, buf, start, len);
12030                 if (ret == -EEXIST)
12031                         ret = 0;
12032                 if (ret < 0)
12033                         goto out;
12034 next:
12035                 /*
12036                  * TODO: if next leaf is corrupted, jump to nearest next valid
12037                  * leaf.
12038                  */
12039                 ret = btrfs_next_item(cur_root, &path);
12040                 if (ret < 0)
12041                         goto out;
12042                 if (ret > 0) {
12043                         ret = 0;
12044                         goto out;
12045                 }
12046         }
12047
12048 out:
12049         btrfs_release_path(&path);
12050         free(buf);
12051         return ret;
12052 }
12053
12054 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12055                                   struct btrfs_root *csum_root)
12056 {
12057         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12058         struct btrfs_path path;
12059         struct btrfs_root *tree_root = fs_info->tree_root;
12060         struct btrfs_root *cur_root;
12061         struct extent_buffer *node;
12062         struct btrfs_key key;
12063         int slot = 0;
12064         int ret = 0;
12065
12066         btrfs_init_path(&path);
12067         key.objectid = BTRFS_FS_TREE_OBJECTID;
12068         key.offset = 0;
12069         key.type = BTRFS_ROOT_ITEM_KEY;
12070         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12071         if (ret < 0)
12072                 goto out;
12073         if (ret > 0) {
12074                 ret = -ENOENT;
12075                 goto out;
12076         }
12077
12078         while (1) {
12079                 node = path.nodes[0];
12080                 slot = path.slots[0];
12081                 btrfs_item_key_to_cpu(node, &key, slot);
12082                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12083                         goto out;
12084                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12085                         goto next;
12086                 if (!is_fstree(key.objectid))
12087                         goto next;
12088                 key.offset = (u64)-1;
12089
12090                 cur_root = btrfs_read_fs_root(fs_info, &key);
12091                 if (IS_ERR(cur_root) || !cur_root) {
12092                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12093                                 key.objectid);
12094                         goto out;
12095                 }
12096                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12097                                 cur_root);
12098                 if (ret < 0)
12099                         goto out;
12100 next:
12101                 ret = btrfs_next_item(tree_root, &path);
12102                 if (ret > 0) {
12103                         ret = 0;
12104                         goto out;
12105                 }
12106                 if (ret < 0)
12107                         goto out;
12108         }
12109
12110 out:
12111         btrfs_release_path(&path);
12112         return ret;
12113 }
12114
12115 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12116                                       struct btrfs_root *csum_root)
12117 {
12118         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12119         struct btrfs_path path;
12120         struct btrfs_extent_item *ei;
12121         struct extent_buffer *leaf;
12122         char *buf;
12123         struct btrfs_key key;
12124         int ret;
12125
12126         btrfs_init_path(&path);
12127         key.objectid = 0;
12128         key.type = BTRFS_EXTENT_ITEM_KEY;
12129         key.offset = 0;
12130         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12131         if (ret < 0) {
12132                 btrfs_release_path(&path);
12133                 return ret;
12134         }
12135
12136         buf = malloc(csum_root->sectorsize);
12137         if (!buf) {
12138                 btrfs_release_path(&path);
12139                 return -ENOMEM;
12140         }
12141
12142         while (1) {
12143                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12144                         ret = btrfs_next_leaf(extent_root, &path);
12145                         if (ret < 0)
12146                                 break;
12147                         if (ret) {
12148                                 ret = 0;
12149                                 break;
12150                         }
12151                 }
12152                 leaf = path.nodes[0];
12153
12154                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12155                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12156                         path.slots[0]++;
12157                         continue;
12158                 }
12159
12160                 ei = btrfs_item_ptr(leaf, path.slots[0],
12161                                     struct btrfs_extent_item);
12162                 if (!(btrfs_extent_flags(leaf, ei) &
12163                       BTRFS_EXTENT_FLAG_DATA)) {
12164                         path.slots[0]++;
12165                         continue;
12166                 }
12167
12168                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12169                                     key.offset);
12170                 if (ret)
12171                         break;
12172                 path.slots[0]++;
12173         }
12174
12175         btrfs_release_path(&path);
12176         free(buf);
12177         return ret;
12178 }
12179
12180 /*
12181  * Recalculate the csum and put it into the csum tree.
12182  *
12183  * Extent tree init will wipe out all the extent info, so in that case, we
12184  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12185  * will use fs/subvol trees to init the csum tree.
12186  */
12187 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12188                           struct btrfs_root *csum_root,
12189                           int search_fs_tree)
12190 {
12191         if (search_fs_tree)
12192                 return fill_csum_tree_from_fs(trans, csum_root);
12193         else
12194                 return fill_csum_tree_from_extent(trans, csum_root);
12195 }
12196
12197 static void free_roots_info_cache(void)
12198 {
12199         if (!roots_info_cache)
12200                 return;
12201
12202         while (!cache_tree_empty(roots_info_cache)) {
12203                 struct cache_extent *entry;
12204                 struct root_item_info *rii;
12205
12206                 entry = first_cache_extent(roots_info_cache);
12207                 if (!entry)
12208                         break;
12209                 remove_cache_extent(roots_info_cache, entry);
12210                 rii = container_of(entry, struct root_item_info, cache_extent);
12211                 free(rii);
12212         }
12213
12214         free(roots_info_cache);
12215         roots_info_cache = NULL;
12216 }
12217
12218 static int build_roots_info_cache(struct btrfs_fs_info *info)
12219 {
12220         int ret = 0;
12221         struct btrfs_key key;
12222         struct extent_buffer *leaf;
12223         struct btrfs_path path;
12224
12225         if (!roots_info_cache) {
12226                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12227                 if (!roots_info_cache)
12228                         return -ENOMEM;
12229                 cache_tree_init(roots_info_cache);
12230         }
12231
12232         btrfs_init_path(&path);
12233         key.objectid = 0;
12234         key.type = BTRFS_EXTENT_ITEM_KEY;
12235         key.offset = 0;
12236         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12237         if (ret < 0)
12238                 goto out;
12239         leaf = path.nodes[0];
12240
12241         while (1) {
12242                 struct btrfs_key found_key;
12243                 struct btrfs_extent_item *ei;
12244                 struct btrfs_extent_inline_ref *iref;
12245                 int slot = path.slots[0];
12246                 int type;
12247                 u64 flags;
12248                 u64 root_id;
12249                 u8 level;
12250                 struct cache_extent *entry;
12251                 struct root_item_info *rii;
12252
12253                 if (slot >= btrfs_header_nritems(leaf)) {
12254                         ret = btrfs_next_leaf(info->extent_root, &path);
12255                         if (ret < 0) {
12256                                 break;
12257                         } else if (ret) {
12258                                 ret = 0;
12259                                 break;
12260                         }
12261                         leaf = path.nodes[0];
12262                         slot = path.slots[0];
12263                 }
12264
12265                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12266
12267                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12268                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12269                         goto next;
12270
12271                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12272                 flags = btrfs_extent_flags(leaf, ei);
12273
12274                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12275                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12276                         goto next;
12277
12278                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12279                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12280                         level = found_key.offset;
12281                 } else {
12282                         struct btrfs_tree_block_info *binfo;
12283
12284                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12285                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12286                         level = btrfs_tree_block_level(leaf, binfo);
12287                 }
12288
12289                 /*
12290                  * For a root extent, it must be of the following type and the
12291                  * first (and only one) iref in the item.
12292                  */
12293                 type = btrfs_extent_inline_ref_type(leaf, iref);
12294                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12295                         goto next;
12296
12297                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12298                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12299                 if (!entry) {
12300                         rii = malloc(sizeof(struct root_item_info));
12301                         if (!rii) {
12302                                 ret = -ENOMEM;
12303                                 goto out;
12304                         }
12305                         rii->cache_extent.start = root_id;
12306                         rii->cache_extent.size = 1;
12307                         rii->level = (u8)-1;
12308                         entry = &rii->cache_extent;
12309                         ret = insert_cache_extent(roots_info_cache, entry);
12310                         ASSERT(ret == 0);
12311                 } else {
12312                         rii = container_of(entry, struct root_item_info,
12313                                            cache_extent);
12314                 }
12315
12316                 ASSERT(rii->cache_extent.start == root_id);
12317                 ASSERT(rii->cache_extent.size == 1);
12318
12319                 if (level > rii->level || rii->level == (u8)-1) {
12320                         rii->level = level;
12321                         rii->bytenr = found_key.objectid;
12322                         rii->gen = btrfs_extent_generation(leaf, ei);
12323                         rii->node_count = 1;
12324                 } else if (level == rii->level) {
12325                         rii->node_count++;
12326                 }
12327 next:
12328                 path.slots[0]++;
12329         }
12330
12331 out:
12332         btrfs_release_path(&path);
12333
12334         return ret;
12335 }
12336
12337 static int maybe_repair_root_item(struct btrfs_fs_info *info,
12338                                   struct btrfs_path *path,
12339                                   const struct btrfs_key *root_key,
12340                                   const int read_only_mode)
12341 {
12342         const u64 root_id = root_key->objectid;
12343         struct cache_extent *entry;
12344         struct root_item_info *rii;
12345         struct btrfs_root_item ri;
12346         unsigned long offset;
12347
12348         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12349         if (!entry) {
12350                 fprintf(stderr,
12351                         "Error: could not find extent items for root %llu\n",
12352                         root_key->objectid);
12353                 return -ENOENT;
12354         }
12355
12356         rii = container_of(entry, struct root_item_info, cache_extent);
12357         ASSERT(rii->cache_extent.start == root_id);
12358         ASSERT(rii->cache_extent.size == 1);
12359
12360         if (rii->node_count != 1) {
12361                 fprintf(stderr,
12362                         "Error: could not find btree root extent for root %llu\n",
12363                         root_id);
12364                 return -ENOENT;
12365         }
12366
12367         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12368         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12369
12370         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12371             btrfs_root_level(&ri) != rii->level ||
12372             btrfs_root_generation(&ri) != rii->gen) {
12373
12374                 /*
12375                  * If we're in repair mode but our caller told us to not update
12376                  * the root item, i.e. just check if it needs to be updated, don't
12377                  * print this message, since the caller will call us again shortly
12378                  * for the same root item without read only mode (the caller will
12379                  * open a transaction first).
12380                  */
12381                 if (!(read_only_mode && repair))
12382                         fprintf(stderr,
12383                                 "%sroot item for root %llu,"
12384                                 " current bytenr %llu, current gen %llu, current level %u,"
12385                                 " new bytenr %llu, new gen %llu, new level %u\n",
12386                                 (read_only_mode ? "" : "fixing "),
12387                                 root_id,
12388                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12389                                 btrfs_root_level(&ri),
12390                                 rii->bytenr, rii->gen, rii->level);
12391
12392                 if (btrfs_root_generation(&ri) > rii->gen) {
12393                         fprintf(stderr,
12394                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12395                                 root_id, btrfs_root_generation(&ri), rii->gen);
12396                         return -EINVAL;
12397                 }
12398
12399                 if (!read_only_mode) {
12400                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12401                         btrfs_set_root_level(&ri, rii->level);
12402                         btrfs_set_root_generation(&ri, rii->gen);
12403                         write_extent_buffer(path->nodes[0], &ri,
12404                                             offset, sizeof(ri));
12405                 }
12406
12407                 return 1;
12408         }
12409
12410         return 0;
12411 }
12412
12413 /*
12414  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12415  * caused read-only snapshots to be corrupted if they were created at a moment
12416  * when the source subvolume/snapshot had orphan items. The issue was that the
12417  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12418  * node instead of the post orphan cleanup root node.
12419  * So this function, and its callees, just detects and fixes those cases. Even
12420  * though the regression was for read-only snapshots, this function applies to
12421  * any snapshot/subvolume root.
12422  * This must be run before any other repair code - not doing it so, makes other
12423  * repair code delete or modify backrefs in the extent tree for example, which
12424  * will result in an inconsistent fs after repairing the root items.
12425  */
12426 static int repair_root_items(struct btrfs_fs_info *info)
12427 {
12428         struct btrfs_path path;
12429         struct btrfs_key key;
12430         struct extent_buffer *leaf;
12431         struct btrfs_trans_handle *trans = NULL;
12432         int ret = 0;
12433         int bad_roots = 0;
12434         int need_trans = 0;
12435
12436         btrfs_init_path(&path);
12437
12438         ret = build_roots_info_cache(info);
12439         if (ret)
12440                 goto out;
12441
12442         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12443         key.type = BTRFS_ROOT_ITEM_KEY;
12444         key.offset = 0;
12445
12446 again:
12447         /*
12448          * Avoid opening and committing transactions if a leaf doesn't have
12449          * any root items that need to be fixed, so that we avoid rotating
12450          * backup roots unnecessarily.
12451          */
12452         if (need_trans) {
12453                 trans = btrfs_start_transaction(info->tree_root, 1);
12454                 if (IS_ERR(trans)) {
12455                         ret = PTR_ERR(trans);
12456                         goto out;
12457                 }
12458         }
12459
12460         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12461                                 0, trans ? 1 : 0);
12462         if (ret < 0)
12463                 goto out;
12464         leaf = path.nodes[0];
12465
12466         while (1) {
12467                 struct btrfs_key found_key;
12468
12469                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12470                         int no_more_keys = find_next_key(&path, &key);
12471
12472                         btrfs_release_path(&path);
12473                         if (trans) {
12474                                 ret = btrfs_commit_transaction(trans,
12475                                                                info->tree_root);
12476                                 trans = NULL;
12477                                 if (ret < 0)
12478                                         goto out;
12479                         }
12480                         need_trans = 0;
12481                         if (no_more_keys)
12482                                 break;
12483                         goto again;
12484                 }
12485
12486                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12487
12488                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12489                         goto next;
12490                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12491                         goto next;
12492
12493                 ret = maybe_repair_root_item(info, &path, &found_key,
12494                                              trans ? 0 : 1);
12495                 if (ret < 0)
12496                         goto out;
12497                 if (ret) {
12498                         if (!trans && repair) {
12499                                 need_trans = 1;
12500                                 key = found_key;
12501                                 btrfs_release_path(&path);
12502                                 goto again;
12503                         }
12504                         bad_roots++;
12505                 }
12506 next:
12507                 path.slots[0]++;
12508         }
12509         ret = 0;
12510 out:
12511         free_roots_info_cache();
12512         btrfs_release_path(&path);
12513         if (trans)
12514                 btrfs_commit_transaction(trans, info->tree_root);
12515         if (ret < 0)
12516                 return ret;
12517
12518         return bad_roots;
12519 }
12520
12521 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12522 {
12523         struct btrfs_trans_handle *trans;
12524         struct btrfs_block_group_cache *bg_cache;
12525         u64 current = 0;
12526         int ret = 0;
12527
12528         /* Clear all free space cache inodes and its extent data */
12529         while (1) {
12530                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12531                 if (!bg_cache)
12532                         break;
12533                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12534                 if (ret < 0)
12535                         return ret;
12536                 current = bg_cache->key.objectid + bg_cache->key.offset;
12537         }
12538
12539         /* Don't forget to set cache_generation to -1 */
12540         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12541         if (IS_ERR(trans)) {
12542                 error("failed to update super block cache generation");
12543                 return PTR_ERR(trans);
12544         }
12545         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12546         btrfs_commit_transaction(trans, fs_info->tree_root);
12547
12548         return ret;
12549 }
12550
12551 const char * const cmd_check_usage[] = {
12552         "btrfs check [options] <device>",
12553         "Check structural integrity of a filesystem (unmounted).",
12554         "Check structural integrity of an unmounted filesystem. Verify internal",
12555         "trees' consistency and item connectivity. In the repair mode try to",
12556         "fix the problems found. ",
12557         "WARNING: the repair mode is considered dangerous",
12558         "",
12559         "-s|--super <superblock>     use this superblock copy",
12560         "-b|--backup                 use the first valid backup root copy",
12561         "--repair                    try to repair the filesystem",
12562         "--readonly                  run in read-only mode (default)",
12563         "--init-csum-tree            create a new CRC tree",
12564         "--init-extent-tree          create a new extent tree",
12565         "--mode <MODE>               allows choice of memory/IO trade-offs",
12566         "                            where MODE is one of:",
12567         "                            original - read inodes and extents to memory (requires",
12568         "                                       more memory, does less IO)",
12569         "                            lowmem   - try to use less memory but read blocks again",
12570         "                                       when needed",
12571         "--check-data-csum           verify checksums of data blocks",
12572         "-Q|--qgroup-report          print a report on qgroup consistency",
12573         "-E|--subvol-extents <subvolid>",
12574         "                            print subvolume extents and sharing state",
12575         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12576         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12577         "-p|--progress               indicate progress",
12578         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12579         NULL
12580 };
12581
12582 int cmd_check(int argc, char **argv)
12583 {
12584         struct cache_tree root_cache;
12585         struct btrfs_root *root;
12586         struct btrfs_fs_info *info;
12587         u64 bytenr = 0;
12588         u64 subvolid = 0;
12589         u64 tree_root_bytenr = 0;
12590         u64 chunk_root_bytenr = 0;
12591         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12592         int ret;
12593         int err = 0;
12594         u64 num;
12595         int init_csum_tree = 0;
12596         int readonly = 0;
12597         int clear_space_cache = 0;
12598         int qgroup_report = 0;
12599         int qgroups_repaired = 0;
12600         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12601
12602         while(1) {
12603                 int c;
12604                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12605                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12606                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12607                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12608                 static const struct option long_options[] = {
12609                         { "super", required_argument, NULL, 's' },
12610                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12611                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12612                         { "init-csum-tree", no_argument, NULL,
12613                                 GETOPT_VAL_INIT_CSUM },
12614                         { "init-extent-tree", no_argument, NULL,
12615                                 GETOPT_VAL_INIT_EXTENT },
12616                         { "check-data-csum", no_argument, NULL,
12617                                 GETOPT_VAL_CHECK_CSUM },
12618                         { "backup", no_argument, NULL, 'b' },
12619                         { "subvol-extents", required_argument, NULL, 'E' },
12620                         { "qgroup-report", no_argument, NULL, 'Q' },
12621                         { "tree-root", required_argument, NULL, 'r' },
12622                         { "chunk-root", required_argument, NULL,
12623                                 GETOPT_VAL_CHUNK_TREE },
12624                         { "progress", no_argument, NULL, 'p' },
12625                         { "mode", required_argument, NULL,
12626                                 GETOPT_VAL_MODE },
12627                         { "clear-space-cache", required_argument, NULL,
12628                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12629                         { NULL, 0, NULL, 0}
12630                 };
12631
12632                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12633                 if (c < 0)
12634                         break;
12635                 switch(c) {
12636                         case 'a': /* ignored */ break;
12637                         case 'b':
12638                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12639                                 break;
12640                         case 's':
12641                                 num = arg_strtou64(optarg);
12642                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12643                                         error(
12644                                         "super mirror should be less than %d",
12645                                                 BTRFS_SUPER_MIRROR_MAX);
12646                                         exit(1);
12647                                 }
12648                                 bytenr = btrfs_sb_offset(((int)num));
12649                                 printf("using SB copy %llu, bytenr %llu\n", num,
12650                                        (unsigned long long)bytenr);
12651                                 break;
12652                         case 'Q':
12653                                 qgroup_report = 1;
12654                                 break;
12655                         case 'E':
12656                                 subvolid = arg_strtou64(optarg);
12657                                 break;
12658                         case 'r':
12659                                 tree_root_bytenr = arg_strtou64(optarg);
12660                                 break;
12661                         case GETOPT_VAL_CHUNK_TREE:
12662                                 chunk_root_bytenr = arg_strtou64(optarg);
12663                                 break;
12664                         case 'p':
12665                                 ctx.progress_enabled = true;
12666                                 break;
12667                         case '?':
12668                         case 'h':
12669                                 usage(cmd_check_usage);
12670                         case GETOPT_VAL_REPAIR:
12671                                 printf("enabling repair mode\n");
12672                                 repair = 1;
12673                                 ctree_flags |= OPEN_CTREE_WRITES;
12674                                 break;
12675                         case GETOPT_VAL_READONLY:
12676                                 readonly = 1;
12677                                 break;
12678                         case GETOPT_VAL_INIT_CSUM:
12679                                 printf("Creating a new CRC tree\n");
12680                                 init_csum_tree = 1;
12681                                 repair = 1;
12682                                 ctree_flags |= OPEN_CTREE_WRITES;
12683                                 break;
12684                         case GETOPT_VAL_INIT_EXTENT:
12685                                 init_extent_tree = 1;
12686                                 ctree_flags |= (OPEN_CTREE_WRITES |
12687                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12688                                 repair = 1;
12689                                 break;
12690                         case GETOPT_VAL_CHECK_CSUM:
12691                                 check_data_csum = 1;
12692                                 break;
12693                         case GETOPT_VAL_MODE:
12694                                 check_mode = parse_check_mode(optarg);
12695                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12696                                         error("unknown mode: %s", optarg);
12697                                         exit(1);
12698                                 }
12699                                 break;
12700                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12701                                 if (strcmp(optarg, "v1") == 0) {
12702                                         clear_space_cache = 1;
12703                                 } else if (strcmp(optarg, "v2") == 0) {
12704                                         clear_space_cache = 2;
12705                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12706                                 } else {
12707                                         error(
12708                 "invalid argument to --clear-space-cache, must be v1 or v2");
12709                                         exit(1);
12710                                 }
12711                                 ctree_flags |= OPEN_CTREE_WRITES;
12712                                 break;
12713                 }
12714         }
12715
12716         if (check_argc_exact(argc - optind, 1))
12717                 usage(cmd_check_usage);
12718
12719         if (ctx.progress_enabled) {
12720                 ctx.tp = TASK_NOTHING;
12721                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12722         }
12723
12724         /* This check is the only reason for --readonly to exist */
12725         if (readonly && repair) {
12726                 error("repair options are not compatible with --readonly");
12727                 exit(1);
12728         }
12729
12730         /*
12731          * Not supported yet
12732          */
12733         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12734                 error("low memory mode doesn't support repair yet");
12735                 exit(1);
12736         }
12737
12738         radix_tree_init();
12739         cache_tree_init(&root_cache);
12740
12741         if((ret = check_mounted(argv[optind])) < 0) {
12742                 error("could not check mount status: %s", strerror(-ret));
12743                 err |= !!ret;
12744                 goto err_out;
12745         } else if(ret) {
12746                 error("%s is currently mounted, aborting", argv[optind]);
12747                 ret = -EBUSY;
12748                 err |= !!ret;
12749                 goto err_out;
12750         }
12751
12752         /* only allow partial opening under repair mode */
12753         if (repair)
12754                 ctree_flags |= OPEN_CTREE_PARTIAL;
12755
12756         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12757                                   chunk_root_bytenr, ctree_flags);
12758         if (!info) {
12759                 error("cannot open file system");
12760                 ret = -EIO;
12761                 err |= !!ret;
12762                 goto err_out;
12763         }
12764
12765         global_info = info;
12766         root = info->fs_root;
12767         if (clear_space_cache == 1) {
12768                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12769                         error(
12770                 "free space cache v2 detected, use --clear-space-cache v2");
12771                         ret = 1;
12772                         goto close_out;
12773                 }
12774                 printf("Clearing free space cache\n");
12775                 ret = clear_free_space_cache(info);
12776                 if (ret) {
12777                         error("failed to clear free space cache");
12778                         ret = 1;
12779                 } else {
12780                         printf("Free space cache cleared\n");
12781                 }
12782                 goto close_out;
12783         } else if (clear_space_cache == 2) {
12784                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12785                         printf("no free space cache v2 to clear\n");
12786                         ret = 0;
12787                         goto close_out;
12788                 }
12789                 printf("Clear free space cache v2\n");
12790                 ret = btrfs_clear_free_space_tree(info);
12791                 if (ret) {
12792                         error("failed to clear free space cache v2: %d", ret);
12793                         ret = 1;
12794                 } else {
12795                         printf("free space cache v2 cleared\n");
12796                 }
12797                 goto close_out;
12798         }
12799
12800         /*
12801          * repair mode will force us to commit transaction which
12802          * will make us fail to load log tree when mounting.
12803          */
12804         if (repair && btrfs_super_log_root(info->super_copy)) {
12805                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12806                 if (!ret) {
12807                         ret = 1;
12808                         err |= !!ret;
12809                         goto close_out;
12810                 }
12811                 ret = zero_log_tree(root);
12812                 err |= !!ret;
12813                 if (ret) {
12814                         error("failed to zero log tree: %d", ret);
12815                         goto close_out;
12816                 }
12817         }
12818
12819         uuid_unparse(info->super_copy->fsid, uuidbuf);
12820         if (qgroup_report) {
12821                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12822                        uuidbuf);
12823                 ret = qgroup_verify_all(info);
12824                 err |= !!ret;
12825                 if (ret == 0)
12826                         report_qgroups(1);
12827                 goto close_out;
12828         }
12829         if (subvolid) {
12830                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12831                        subvolid, argv[optind], uuidbuf);
12832                 ret = print_extent_state(info, subvolid);
12833                 err |= !!ret;
12834                 goto close_out;
12835         }
12836         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12837
12838         if (!extent_buffer_uptodate(info->tree_root->node) ||
12839             !extent_buffer_uptodate(info->dev_root->node) ||
12840             !extent_buffer_uptodate(info->chunk_root->node)) {
12841                 error("critical roots corrupted, unable to check the filesystem");
12842                 err |= !!ret;
12843                 ret = -EIO;
12844                 goto close_out;
12845         }
12846
12847         if (init_extent_tree || init_csum_tree) {
12848                 struct btrfs_trans_handle *trans;
12849
12850                 trans = btrfs_start_transaction(info->extent_root, 0);
12851                 if (IS_ERR(trans)) {
12852                         error("error starting transaction");
12853                         ret = PTR_ERR(trans);
12854                         err |= !!ret;
12855                         goto close_out;
12856                 }
12857
12858                 if (init_extent_tree) {
12859                         printf("Creating a new extent tree\n");
12860                         ret = reinit_extent_tree(trans, info);
12861                         err |= !!ret;
12862                         if (ret)
12863                                 goto close_out;
12864                 }
12865
12866                 if (init_csum_tree) {
12867                         printf("Reinitialize checksum tree\n");
12868                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12869                         if (ret) {
12870                                 error("checksum tree initialization failed: %d",
12871                                                 ret);
12872                                 ret = -EIO;
12873                                 err |= !!ret;
12874                                 goto close_out;
12875                         }
12876
12877                         ret = fill_csum_tree(trans, info->csum_root,
12878                                              init_extent_tree);
12879                         err |= !!ret;
12880                         if (ret) {
12881                                 error("checksum tree refilling failed: %d", ret);
12882                                 return -EIO;
12883                         }
12884                 }
12885                 /*
12886                  * Ok now we commit and run the normal fsck, which will add
12887                  * extent entries for all of the items it finds.
12888                  */
12889                 ret = btrfs_commit_transaction(trans, info->extent_root);
12890                 err |= !!ret;
12891                 if (ret)
12892                         goto close_out;
12893         }
12894         if (!extent_buffer_uptodate(info->extent_root->node)) {
12895                 error("critical: extent_root, unable to check the filesystem");
12896                 ret = -EIO;
12897                 err |= !!ret;
12898                 goto close_out;
12899         }
12900         if (!extent_buffer_uptodate(info->csum_root->node)) {
12901                 error("critical: csum_root, unable to check the filesystem");
12902                 ret = -EIO;
12903                 err |= !!ret;
12904                 goto close_out;
12905         }
12906
12907         if (!ctx.progress_enabled)
12908                 fprintf(stderr, "checking extents\n");
12909         if (check_mode == CHECK_MODE_LOWMEM)
12910                 ret = check_chunks_and_extents_v2(root);
12911         else
12912                 ret = check_chunks_and_extents(root);
12913         err |= !!ret;
12914         if (ret)
12915                 error(
12916                 "errors found in extent allocation tree or chunk allocation");
12917
12918         ret = repair_root_items(info);
12919         err |= !!ret;
12920         if (ret < 0)
12921                 goto close_out;
12922         if (repair) {
12923                 fprintf(stderr, "Fixed %d roots.\n", ret);
12924                 ret = 0;
12925         } else if (ret > 0) {
12926                 fprintf(stderr,
12927                        "Found %d roots with an outdated root item.\n",
12928                        ret);
12929                 fprintf(stderr,
12930                         "Please run a filesystem check with the option --repair to fix them.\n");
12931                 ret = 1;
12932                 err |= !!ret;
12933                 goto close_out;
12934         }
12935
12936         if (!ctx.progress_enabled) {
12937                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
12938                         fprintf(stderr, "checking free space tree\n");
12939                 else
12940                         fprintf(stderr, "checking free space cache\n");
12941         }
12942         ret = check_space_cache(root);
12943         err |= !!ret;
12944         if (ret)
12945                 goto out;
12946
12947         /*
12948          * We used to have to have these hole extents in between our real
12949          * extents so if we don't have this flag set we need to make sure there
12950          * are no gaps in the file extents for inodes, otherwise we can just
12951          * ignore it when this happens.
12952          */
12953         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
12954         if (!ctx.progress_enabled)
12955                 fprintf(stderr, "checking fs roots\n");
12956         if (check_mode == CHECK_MODE_LOWMEM)
12957                 ret = check_fs_roots_v2(root->fs_info);
12958         else
12959                 ret = check_fs_roots(root, &root_cache);
12960         err |= !!ret;
12961         if (ret)
12962                 goto out;
12963
12964         fprintf(stderr, "checking csums\n");
12965         ret = check_csums(root);
12966         err |= !!ret;
12967         if (ret)
12968                 goto out;
12969
12970         fprintf(stderr, "checking root refs\n");
12971         /* For low memory mode, check_fs_roots_v2 handles root refs */
12972         if (check_mode != CHECK_MODE_LOWMEM) {
12973                 ret = check_root_refs(root, &root_cache);
12974                 err |= !!ret;
12975                 if (ret)
12976                         goto out;
12977         }
12978
12979         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
12980                 struct extent_buffer *eb;
12981
12982                 eb = list_first_entry(&root->fs_info->recow_ebs,
12983                                       struct extent_buffer, recow);
12984                 list_del_init(&eb->recow);
12985                 ret = recow_extent_buffer(root, eb);
12986                 err |= !!ret;
12987                 if (ret)
12988                         break;
12989         }
12990
12991         while (!list_empty(&delete_items)) {
12992                 struct bad_item *bad;
12993
12994                 bad = list_first_entry(&delete_items, struct bad_item, list);
12995                 list_del_init(&bad->list);
12996                 if (repair) {
12997                         ret = delete_bad_item(root, bad);
12998                         err |= !!ret;
12999                 }
13000                 free(bad);
13001         }
13002
13003         if (info->quota_enabled) {
13004                 fprintf(stderr, "checking quota groups\n");
13005                 ret = qgroup_verify_all(info);
13006                 err |= !!ret;
13007                 if (ret)
13008                         goto out;
13009                 report_qgroups(0);
13010                 ret = repair_qgroups(info, &qgroups_repaired);
13011                 err |= !!ret;
13012                 if (err)
13013                         goto out;
13014                 ret = 0;
13015         }
13016
13017         if (!list_empty(&root->fs_info->recow_ebs)) {
13018                 error("transid errors in file system");
13019                 ret = 1;
13020                 err |= !!ret;
13021         }
13022 out:
13023         if (found_old_backref) { /*
13024                  * there was a disk format change when mixed
13025                  * backref was in testing tree. The old format
13026                  * existed about one week.
13027                  */
13028                 printf("\n * Found old mixed backref format. "
13029                        "The old format is not supported! *"
13030                        "\n * Please mount the FS in readonly mode, "
13031                        "backup data and re-format the FS. *\n\n");
13032                 err |= 1;
13033         }
13034         printf("found %llu bytes used err is %d\n",
13035                (unsigned long long)bytes_used, ret);
13036         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13037         printf("total tree bytes: %llu\n",
13038                (unsigned long long)total_btree_bytes);
13039         printf("total fs tree bytes: %llu\n",
13040                (unsigned long long)total_fs_tree_bytes);
13041         printf("total extent tree bytes: %llu\n",
13042                (unsigned long long)total_extent_tree_bytes);
13043         printf("btree space waste bytes: %llu\n",
13044                (unsigned long long)btree_space_waste);
13045         printf("file data blocks allocated: %llu\n referenced %llu\n",
13046                 (unsigned long long)data_bytes_allocated,
13047                 (unsigned long long)data_bytes_referenced);
13048
13049         free_qgroup_counts();
13050         free_root_recs_tree(&root_cache);
13051 close_out:
13052         close_ctree(root);
13053 err_out:
13054         if (ctx.progress_enabled)
13055                 task_deinit(ctx.info);
13056
13057         return err;
13058 }