btrfs-progs: check: remove unused argument from delete_bogus_item
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         int level_size;
230         struct btrfs_key drop_key;
231 };
232
233 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
235 #define REF_ERR_NO_INODE_REF            (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
238 #define REF_ERR_DUP_INODE_REF           (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF             (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
246
247 struct file_extent_hole {
248         struct rb_node node;
249         u64 start;
250         u64 len;
251 };
252
253 struct inode_record {
254         struct list_head backrefs;
255         unsigned int checked:1;
256         unsigned int merging:1;
257         unsigned int found_inode_item:1;
258         unsigned int found_dir_item:1;
259         unsigned int found_file_extent:1;
260         unsigned int found_csum_item:1;
261         unsigned int some_csum_missing:1;
262         unsigned int nodatasum:1;
263         int errors;
264
265         u64 ino;
266         u32 nlink;
267         u32 imode;
268         u64 isize;
269         u64 nbytes;
270
271         u32 found_link;
272         u64 found_size;
273         u64 extent_start;
274         u64 extent_end;
275         struct rb_root holes;
276         struct list_head orphan_extents;
277
278         u32 refs;
279 };
280
281 #define I_ERR_NO_INODE_ITEM             (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
296
297 struct root_backref {
298         struct list_head list;
299         unsigned int found_dir_item:1;
300         unsigned int found_dir_index:1;
301         unsigned int found_back_ref:1;
302         unsigned int found_forward_ref:1;
303         unsigned int reachable:1;
304         int errors;
305         u64 ref_root;
306         u64 dir;
307         u64 index;
308         u16 namelen;
309         char name[0];
310 };
311
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 {
314         return list_entry(entry, struct root_backref, list);
315 }
316
317 struct root_record {
318         struct list_head backrefs;
319         struct cache_extent cache;
320         unsigned int found_root_item:1;
321         u64 objectid;
322         u32 found_ref;
323 };
324
325 struct ptr_node {
326         struct cache_extent cache;
327         void *data;
328 };
329
330 struct shared_node {
331         struct cache_extent cache;
332         struct cache_tree root_cache;
333         struct cache_tree inode_cache;
334         struct inode_record *current;
335         u32 refs;
336 };
337
338 struct block_info {
339         u64 start;
340         u32 size;
341 };
342
343 struct walk_control {
344         struct cache_tree shared;
345         struct shared_node *nodes[BTRFS_MAX_LEVEL];
346         int active_node;
347         int root_level;
348 };
349
350 struct bad_item {
351         struct btrfs_key key;
352         u64 root_id;
353         struct list_head list;
354 };
355
356 struct extent_entry {
357         u64 bytenr;
358         u64 bytes;
359         int count;
360         int broken;
361         struct list_head list;
362 };
363
364 struct root_item_info {
365         /* level of the root */
366         u8 level;
367         /* number of nodes at this level, must be 1 for a root */
368         int node_count;
369         u64 bytenr;
370         u64 gen;
371         struct cache_extent cache_extent;
372 };
373
374 /*
375  * Error bit for low memory mode check.
376  *
377  * Currently no caller cares about it yet.  Just internal use for error
378  * classification.
379  */
380 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH     (1 << 8)
390
391 static void *print_status_check(void *p)
392 {
393         struct task_ctx *priv = p;
394         const char work_indicator[] = { '.', 'o', 'O', 'o' };
395         uint32_t count = 0;
396         static char *task_position_string[] = {
397                 "checking extents",
398                 "checking free space cache",
399                 "checking fs roots",
400         };
401
402         task_period_start(priv->info, 1000 /* 1s */);
403
404         if (priv->tp == TASK_NOTHING)
405                 return NULL;
406
407         while (1) {
408                 printf("%s [%c]\r", task_position_string[priv->tp],
409                                 work_indicator[count % 4]);
410                 count++;
411                 fflush(stdout);
412                 task_period_wait(priv->info);
413         }
414         return NULL;
415 }
416
417 static int print_status_return(void *p)
418 {
419         printf("\n");
420         fflush(stdout);
421
422         return 0;
423 }
424
425 static enum btrfs_check_mode parse_check_mode(const char *str)
426 {
427         if (strcmp(str, "lowmem") == 0)
428                 return CHECK_MODE_LOWMEM;
429         if (strcmp(str, "orig") == 0)
430                 return CHECK_MODE_ORIGINAL;
431         if (strcmp(str, "original") == 0)
432                 return CHECK_MODE_ORIGINAL;
433
434         return CHECK_MODE_UNKNOWN;
435 }
436
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
439 {
440         struct file_extent_hole *hole;
441
442         if (RB_EMPTY_ROOT(holes))
443                 return (u64)-1;
444
445         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446         return hole->start;
447 }
448
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 {
451         struct file_extent_hole *hole1;
452         struct file_extent_hole *hole2;
453
454         hole1 = rb_entry(node1, struct file_extent_hole, node);
455         hole2 = rb_entry(node2, struct file_extent_hole, node);
456
457         if (hole1->start > hole2->start)
458                 return -1;
459         if (hole1->start < hole2->start)
460                 return 1;
461         /* Now hole1->start == hole2->start */
462         if (hole1->len >= hole2->len)
463                 /*
464                  * Hole 1 will be merge center
465                  * Same hole will be merged later
466                  */
467                 return -1;
468         /* Hole 2 will be merge center */
469         return 1;
470 }
471
472 /*
473  * Add a hole to the record
474  *
475  * This will do hole merge for copy_file_extent_holes(),
476  * which will ensure there won't be continuous holes.
477  */
478 static int add_file_extent_hole(struct rb_root *holes,
479                                 u64 start, u64 len)
480 {
481         struct file_extent_hole *hole;
482         struct file_extent_hole *prev = NULL;
483         struct file_extent_hole *next = NULL;
484
485         hole = malloc(sizeof(*hole));
486         if (!hole)
487                 return -ENOMEM;
488         hole->start = start;
489         hole->len = len;
490         /* Since compare will not return 0, no -EEXIST will happen */
491         rb_insert(holes, &hole->node, compare_hole);
492
493         /* simple merge with previous hole */
494         if (rb_prev(&hole->node))
495                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496                                 node);
497         if (prev && prev->start + prev->len >= hole->start) {
498                 hole->len = hole->start + hole->len - prev->start;
499                 hole->start = prev->start;
500                 rb_erase(&prev->node, holes);
501                 free(prev);
502                 prev = NULL;
503         }
504
505         /* iterate merge with next holes */
506         while (1) {
507                 if (!rb_next(&hole->node))
508                         break;
509                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510                                         node);
511                 if (hole->start + hole->len >= next->start) {
512                         if (hole->start + hole->len <= next->start + next->len)
513                                 hole->len = next->start + next->len -
514                                             hole->start;
515                         rb_erase(&next->node, holes);
516                         free(next);
517                         next = NULL;
518                 } else
519                         break;
520         }
521         return 0;
522 }
523
524 static int compare_hole_range(struct rb_node *node, void *data)
525 {
526         struct file_extent_hole *hole;
527         u64 start;
528
529         hole = (struct file_extent_hole *)data;
530         start = hole->start;
531
532         hole = rb_entry(node, struct file_extent_hole, node);
533         if (start < hole->start)
534                 return -1;
535         if (start >= hole->start && start < hole->start + hole->len)
536                 return 0;
537         return 1;
538 }
539
540 /*
541  * Delete a hole in the record
542  *
543  * This will do the hole split and is much restrict than add.
544  */
545 static int del_file_extent_hole(struct rb_root *holes,
546                                 u64 start, u64 len)
547 {
548         struct file_extent_hole *hole;
549         struct file_extent_hole tmp;
550         u64 prev_start = 0;
551         u64 prev_len = 0;
552         u64 next_start = 0;
553         u64 next_len = 0;
554         struct rb_node *node;
555         int have_prev = 0;
556         int have_next = 0;
557         int ret = 0;
558
559         tmp.start = start;
560         tmp.len = len;
561         node = rb_search(holes, &tmp, compare_hole_range, NULL);
562         if (!node)
563                 return -EEXIST;
564         hole = rb_entry(node, struct file_extent_hole, node);
565         if (start + len > hole->start + hole->len)
566                 return -EEXIST;
567
568         /*
569          * Now there will be no overlap, delete the hole and re-add the
570          * split(s) if they exists.
571          */
572         if (start > hole->start) {
573                 prev_start = hole->start;
574                 prev_len = start - hole->start;
575                 have_prev = 1;
576         }
577         if (hole->start + hole->len > start + len) {
578                 next_start = start + len;
579                 next_len = hole->start + hole->len - start - len;
580                 have_next = 1;
581         }
582         rb_erase(node, holes);
583         free(hole);
584         if (have_prev) {
585                 ret = add_file_extent_hole(holes, prev_start, prev_len);
586                 if (ret < 0)
587                         return ret;
588         }
589         if (have_next) {
590                 ret = add_file_extent_hole(holes, next_start, next_len);
591                 if (ret < 0)
592                         return ret;
593         }
594         return 0;
595 }
596
597 static int copy_file_extent_holes(struct rb_root *dst,
598                                   struct rb_root *src)
599 {
600         struct file_extent_hole *hole;
601         struct rb_node *node;
602         int ret = 0;
603
604         node = rb_first(src);
605         while (node) {
606                 hole = rb_entry(node, struct file_extent_hole, node);
607                 ret = add_file_extent_hole(dst, hole->start, hole->len);
608                 if (ret)
609                         break;
610                 node = rb_next(node);
611         }
612         return ret;
613 }
614
615 static void free_file_extent_holes(struct rb_root *holes)
616 {
617         struct rb_node *node;
618         struct file_extent_hole *hole;
619
620         node = rb_first(holes);
621         while (node) {
622                 hole = rb_entry(node, struct file_extent_hole, node);
623                 rb_erase(node, holes);
624                 free(hole);
625                 node = rb_first(holes);
626         }
627 }
628
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632                                  struct btrfs_root *root)
633 {
634         if (root->last_trans != trans->transid) {
635                 root->track_dirty = 1;
636                 root->last_trans = trans->transid;
637                 root->commit_root = root->node;
638                 extent_buffer_get(root->node);
639         }
640 }
641
642 static u8 imode_to_type(u32 imode)
643 {
644 #define S_SHIFT 12
645         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
647                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
648                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
649                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
650                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
651                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
652                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
653         };
654
655         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
657 }
658
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 {
661         struct device_record *rec1;
662         struct device_record *rec2;
663
664         rec1 = rb_entry(node1, struct device_record, node);
665         rec2 = rb_entry(node2, struct device_record, node);
666         if (rec1->devid > rec2->devid)
667                 return -1;
668         else if (rec1->devid < rec2->devid)
669                 return 1;
670         else
671                 return 0;
672 }
673
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 {
676         struct inode_record *rec;
677         struct inode_backref *backref;
678         struct inode_backref *orig;
679         struct inode_backref *tmp;
680         struct orphan_data_extent *src_orphan;
681         struct orphan_data_extent *dst_orphan;
682         struct rb_node *rb;
683         size_t size;
684         int ret;
685
686         rec = malloc(sizeof(*rec));
687         if (!rec)
688                 return ERR_PTR(-ENOMEM);
689         memcpy(rec, orig_rec, sizeof(*rec));
690         rec->refs = 1;
691         INIT_LIST_HEAD(&rec->backrefs);
692         INIT_LIST_HEAD(&rec->orphan_extents);
693         rec->holes = RB_ROOT;
694
695         list_for_each_entry(orig, &orig_rec->backrefs, list) {
696                 size = sizeof(*orig) + orig->namelen + 1;
697                 backref = malloc(size);
698                 if (!backref) {
699                         ret = -ENOMEM;
700                         goto cleanup;
701                 }
702                 memcpy(backref, orig, size);
703                 list_add_tail(&backref->list, &rec->backrefs);
704         }
705         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706                 dst_orphan = malloc(sizeof(*dst_orphan));
707                 if (!dst_orphan) {
708                         ret = -ENOMEM;
709                         goto cleanup;
710                 }
711                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713         }
714         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715         if (ret < 0)
716                 goto cleanup_rb;
717
718         return rec;
719
720 cleanup_rb:
721         rb = rb_first(&rec->holes);
722         while (rb) {
723                 struct file_extent_hole *hole;
724
725                 hole = rb_entry(rb, struct file_extent_hole, node);
726                 rb = rb_next(rb);
727                 free(hole);
728         }
729
730 cleanup:
731         if (!list_empty(&rec->backrefs))
732                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733                         list_del(&orig->list);
734                         free(orig);
735                 }
736
737         if (!list_empty(&rec->orphan_extents))
738                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739                         list_del(&orig->list);
740                         free(orig);
741                 }
742
743         free(rec);
744
745         return ERR_PTR(ret);
746 }
747
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749                                       u64 objectid)
750 {
751         struct orphan_data_extent *orphan;
752
753         if (list_empty(orphan_extents))
754                 return;
755         printf("The following data extent is lost in tree %llu:\n",
756                objectid);
757         list_for_each_entry(orphan, orphan_extents, list) {
758                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
760                        orphan->disk_len);
761         }
762 }
763
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 {
766         u64 root_objectid = root->root_key.objectid;
767         int errors = rec->errors;
768
769         if (!errors)
770                 return;
771         /* reloc root errors, we print its corresponding fs root objectid*/
772         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773                 root_objectid = root->root_key.offset;
774                 fprintf(stderr, "reloc");
775         }
776         fprintf(stderr, "root %llu inode %llu errors %x",
777                 (unsigned long long) root_objectid,
778                 (unsigned long long) rec->ino, rec->errors);
779
780         if (errors & I_ERR_NO_INODE_ITEM)
781                 fprintf(stderr, ", no inode item");
782         if (errors & I_ERR_NO_ORPHAN_ITEM)
783                 fprintf(stderr, ", no orphan item");
784         if (errors & I_ERR_DUP_INODE_ITEM)
785                 fprintf(stderr, ", dup inode item");
786         if (errors & I_ERR_DUP_DIR_INDEX)
787                 fprintf(stderr, ", dup dir index");
788         if (errors & I_ERR_ODD_DIR_ITEM)
789                 fprintf(stderr, ", odd dir item");
790         if (errors & I_ERR_ODD_FILE_EXTENT)
791                 fprintf(stderr, ", odd file extent");
792         if (errors & I_ERR_BAD_FILE_EXTENT)
793                 fprintf(stderr, ", bad file extent");
794         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795                 fprintf(stderr, ", file extent overlap");
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797                 fprintf(stderr, ", file extent discount");
798         if (errors & I_ERR_DIR_ISIZE_WRONG)
799                 fprintf(stderr, ", dir isize wrong");
800         if (errors & I_ERR_FILE_NBYTES_WRONG)
801                 fprintf(stderr, ", nbytes wrong");
802         if (errors & I_ERR_ODD_CSUM_ITEM)
803                 fprintf(stderr, ", odd csum item");
804         if (errors & I_ERR_SOME_CSUM_MISSING)
805                 fprintf(stderr, ", some csum missing");
806         if (errors & I_ERR_LINK_COUNT_WRONG)
807                 fprintf(stderr, ", link count wrong");
808         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809                 fprintf(stderr, ", orphan file extent");
810         fprintf(stderr, "\n");
811         /* Print the orphan extents if needed */
812         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814
815         /* Print the holes if needed */
816         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817                 struct file_extent_hole *hole;
818                 struct rb_node *node;
819                 int found = 0;
820
821                 node = rb_first(&rec->holes);
822                 fprintf(stderr, "Found file extent holes:\n");
823                 while (node) {
824                         found = 1;
825                         hole = rb_entry(node, struct file_extent_hole, node);
826                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
827                                 hole->start, hole->len);
828                         node = rb_next(node);
829                 }
830                 if (!found)
831                         fprintf(stderr, "\tstart: 0, len: %llu\n",
832                                 round_up(rec->isize, root->sectorsize));
833         }
834 }
835
836 static void print_ref_error(int errors)
837 {
838         if (errors & REF_ERR_NO_DIR_ITEM)
839                 fprintf(stderr, ", no dir item");
840         if (errors & REF_ERR_NO_DIR_INDEX)
841                 fprintf(stderr, ", no dir index");
842         if (errors & REF_ERR_NO_INODE_REF)
843                 fprintf(stderr, ", no inode ref");
844         if (errors & REF_ERR_DUP_DIR_ITEM)
845                 fprintf(stderr, ", dup dir item");
846         if (errors & REF_ERR_DUP_DIR_INDEX)
847                 fprintf(stderr, ", dup dir index");
848         if (errors & REF_ERR_DUP_INODE_REF)
849                 fprintf(stderr, ", dup inode ref");
850         if (errors & REF_ERR_INDEX_UNMATCH)
851                 fprintf(stderr, ", index mismatch");
852         if (errors & REF_ERR_FILETYPE_UNMATCH)
853                 fprintf(stderr, ", filetype mismatch");
854         if (errors & REF_ERR_NAME_TOO_LONG)
855                 fprintf(stderr, ", name too long");
856         if (errors & REF_ERR_NO_ROOT_REF)
857                 fprintf(stderr, ", no root ref");
858         if (errors & REF_ERR_NO_ROOT_BACKREF)
859                 fprintf(stderr, ", no root backref");
860         if (errors & REF_ERR_DUP_ROOT_REF)
861                 fprintf(stderr, ", dup root ref");
862         if (errors & REF_ERR_DUP_ROOT_BACKREF)
863                 fprintf(stderr, ", dup root backref");
864         fprintf(stderr, "\n");
865 }
866
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
868                                           u64 ino, int mod)
869 {
870         struct ptr_node *node;
871         struct cache_extent *cache;
872         struct inode_record *rec = NULL;
873         int ret;
874
875         cache = lookup_cache_extent(inode_cache, ino, 1);
876         if (cache) {
877                 node = container_of(cache, struct ptr_node, cache);
878                 rec = node->data;
879                 if (mod && rec->refs > 1) {
880                         node->data = clone_inode_rec(rec);
881                         if (IS_ERR(node->data))
882                                 return node->data;
883                         rec->refs--;
884                         rec = node->data;
885                 }
886         } else if (mod) {
887                 rec = calloc(1, sizeof(*rec));
888                 if (!rec)
889                         return ERR_PTR(-ENOMEM);
890                 rec->ino = ino;
891                 rec->extent_start = (u64)-1;
892                 rec->refs = 1;
893                 INIT_LIST_HEAD(&rec->backrefs);
894                 INIT_LIST_HEAD(&rec->orphan_extents);
895                 rec->holes = RB_ROOT;
896
897                 node = malloc(sizeof(*node));
898                 if (!node) {
899                         free(rec);
900                         return ERR_PTR(-ENOMEM);
901                 }
902                 node->cache.start = ino;
903                 node->cache.size = 1;
904                 node->data = rec;
905
906                 if (ino == BTRFS_FREE_INO_OBJECTID)
907                         rec->found_link = 1;
908
909                 ret = insert_cache_extent(inode_cache, &node->cache);
910                 if (ret)
911                         return ERR_PTR(-EEXIST);
912         }
913         return rec;
914 }
915
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
917 {
918         struct orphan_data_extent *orphan;
919
920         while (!list_empty(orphan_extents)) {
921                 orphan = list_entry(orphan_extents->next,
922                                     struct orphan_data_extent, list);
923                 list_del(&orphan->list);
924                 free(orphan);
925         }
926 }
927
928 static void free_inode_rec(struct inode_record *rec)
929 {
930         struct inode_backref *backref;
931
932         if (--rec->refs > 0)
933                 return;
934
935         while (!list_empty(&rec->backrefs)) {
936                 backref = to_inode_backref(rec->backrefs.next);
937                 list_del(&backref->list);
938                 free(backref);
939         }
940         free_orphan_data_extents(&rec->orphan_extents);
941         free_file_extent_holes(&rec->holes);
942         free(rec);
943 }
944
945 static int can_free_inode_rec(struct inode_record *rec)
946 {
947         if (!rec->errors && rec->checked && rec->found_inode_item &&
948             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
949                 return 1;
950         return 0;
951 }
952
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954                                  struct inode_record *rec)
955 {
956         struct cache_extent *cache;
957         struct inode_backref *tmp, *backref;
958         struct ptr_node *node;
959         u8 filetype;
960
961         if (!rec->found_inode_item)
962                 return;
963
964         filetype = imode_to_type(rec->imode);
965         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966                 if (backref->found_dir_item && backref->found_dir_index) {
967                         if (backref->filetype != filetype)
968                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969                         if (!backref->errors && backref->found_inode_ref &&
970                             rec->nlink == rec->found_link) {
971                                 list_del(&backref->list);
972                                 free(backref);
973                         }
974                 }
975         }
976
977         if (!rec->checked || rec->merging)
978                 return;
979
980         if (S_ISDIR(rec->imode)) {
981                 if (rec->found_size != rec->isize)
982                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983                 if (rec->found_file_extent)
984                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
985         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986                 if (rec->found_dir_item)
987                         rec->errors |= I_ERR_ODD_DIR_ITEM;
988                 if (rec->found_size != rec->nbytes)
989                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990                 if (rec->nlink > 0 && !no_holes &&
991                     (rec->extent_end < rec->isize ||
992                      first_extent_gap(&rec->holes) < rec->isize))
993                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
994         }
995
996         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997                 if (rec->found_csum_item && rec->nodatasum)
998                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
999                 if (rec->some_csum_missing && !rec->nodatasum)
1000                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1001         }
1002
1003         BUG_ON(rec->refs != 1);
1004         if (can_free_inode_rec(rec)) {
1005                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006                 node = container_of(cache, struct ptr_node, cache);
1007                 BUG_ON(node->data != rec);
1008                 remove_cache_extent(inode_cache, &node->cache);
1009                 free(node);
1010                 free_inode_rec(rec);
1011         }
1012 }
1013
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1015 {
1016         struct btrfs_path path;
1017         struct btrfs_key key;
1018         int ret;
1019
1020         key.objectid = BTRFS_ORPHAN_OBJECTID;
1021         key.type = BTRFS_ORPHAN_ITEM_KEY;
1022         key.offset = ino;
1023
1024         btrfs_init_path(&path);
1025         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026         btrfs_release_path(&path);
1027         if (ret > 0)
1028                 ret = -ENOENT;
1029         return ret;
1030 }
1031
1032 static int process_inode_item(struct extent_buffer *eb,
1033                               int slot, struct btrfs_key *key,
1034                               struct shared_node *active_node)
1035 {
1036         struct inode_record *rec;
1037         struct btrfs_inode_item *item;
1038
1039         rec = active_node->current;
1040         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041         if (rec->found_inode_item) {
1042                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1043                 return 1;
1044         }
1045         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046         rec->nlink = btrfs_inode_nlink(eb, item);
1047         rec->isize = btrfs_inode_size(eb, item);
1048         rec->nbytes = btrfs_inode_nbytes(eb, item);
1049         rec->imode = btrfs_inode_mode(eb, item);
1050         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051                 rec->nodatasum = 1;
1052         rec->found_inode_item = 1;
1053         if (rec->nlink == 0)
1054                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055         maybe_free_inode_rec(&active_node->inode_cache, rec);
1056         return 0;
1057 }
1058
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060                                                 const char *name,
1061                                                 int namelen, u64 dir)
1062 {
1063         struct inode_backref *backref;
1064
1065         list_for_each_entry(backref, &rec->backrefs, list) {
1066                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067                         break;
1068                 if (backref->dir != dir || backref->namelen != namelen)
1069                         continue;
1070                 if (memcmp(name, backref->name, namelen))
1071                         continue;
1072                 return backref;
1073         }
1074
1075         backref = malloc(sizeof(*backref) + namelen + 1);
1076         if (!backref)
1077                 return NULL;
1078         memset(backref, 0, sizeof(*backref));
1079         backref->dir = dir;
1080         backref->namelen = namelen;
1081         memcpy(backref->name, name, namelen);
1082         backref->name[namelen] = '\0';
1083         list_add_tail(&backref->list, &rec->backrefs);
1084         return backref;
1085 }
1086
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088                              u64 ino, u64 dir, u64 index,
1089                              const char *name, int namelen,
1090                              u8 filetype, u8 itemtype, int errors)
1091 {
1092         struct inode_record *rec;
1093         struct inode_backref *backref;
1094
1095         rec = get_inode_rec(inode_cache, ino, 1);
1096         BUG_ON(IS_ERR(rec));
1097         backref = get_inode_backref(rec, name, namelen, dir);
1098         BUG_ON(!backref);
1099         if (errors)
1100                 backref->errors |= errors;
1101         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102                 if (backref->found_dir_index)
1103                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104                 if (backref->found_inode_ref && backref->index != index)
1105                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1106                 if (backref->found_dir_item && backref->filetype != filetype)
1107                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1108
1109                 backref->index = index;
1110                 backref->filetype = filetype;
1111                 backref->found_dir_index = 1;
1112         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113                 rec->found_link++;
1114                 if (backref->found_dir_item)
1115                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116                 if (backref->found_dir_index && backref->filetype != filetype)
1117                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1118
1119                 backref->filetype = filetype;
1120                 backref->found_dir_item = 1;
1121         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123                 if (backref->found_inode_ref)
1124                         backref->errors |= REF_ERR_DUP_INODE_REF;
1125                 if (backref->found_dir_index && backref->index != index)
1126                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1127                 else
1128                         backref->index = index;
1129
1130                 backref->ref_type = itemtype;
1131                 backref->found_inode_ref = 1;
1132         } else {
1133                 BUG_ON(1);
1134         }
1135
1136         maybe_free_inode_rec(inode_cache, rec);
1137         return 0;
1138 }
1139
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141                             struct cache_tree *dst_cache)
1142 {
1143         struct inode_backref *backref;
1144         u32 dir_count = 0;
1145         int ret = 0;
1146
1147         dst->merging = 1;
1148         list_for_each_entry(backref, &src->backrefs, list) {
1149                 if (backref->found_dir_index) {
1150                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1151                                         backref->index, backref->name,
1152                                         backref->namelen, backref->filetype,
1153                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1154                 }
1155                 if (backref->found_dir_item) {
1156                         dir_count++;
1157                         add_inode_backref(dst_cache, dst->ino,
1158                                         backref->dir, 0, backref->name,
1159                                         backref->namelen, backref->filetype,
1160                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1161                 }
1162                 if (backref->found_inode_ref) {
1163                         add_inode_backref(dst_cache, dst->ino,
1164                                         backref->dir, backref->index,
1165                                         backref->name, backref->namelen, 0,
1166                                         backref->ref_type, backref->errors);
1167                 }
1168         }
1169
1170         if (src->found_dir_item)
1171                 dst->found_dir_item = 1;
1172         if (src->found_file_extent)
1173                 dst->found_file_extent = 1;
1174         if (src->found_csum_item)
1175                 dst->found_csum_item = 1;
1176         if (src->some_csum_missing)
1177                 dst->some_csum_missing = 1;
1178         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1180                 if (ret < 0)
1181                         return ret;
1182         }
1183
1184         BUG_ON(src->found_link < dir_count);
1185         dst->found_link += src->found_link - dir_count;
1186         dst->found_size += src->found_size;
1187         if (src->extent_start != (u64)-1) {
1188                 if (dst->extent_start == (u64)-1) {
1189                         dst->extent_start = src->extent_start;
1190                         dst->extent_end = src->extent_end;
1191                 } else {
1192                         if (dst->extent_end > src->extent_start)
1193                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194                         else if (dst->extent_end < src->extent_start) {
1195                                 ret = add_file_extent_hole(&dst->holes,
1196                                         dst->extent_end,
1197                                         src->extent_start - dst->extent_end);
1198                         }
1199                         if (dst->extent_end < src->extent_end)
1200                                 dst->extent_end = src->extent_end;
1201                 }
1202         }
1203
1204         dst->errors |= src->errors;
1205         if (src->found_inode_item) {
1206                 if (!dst->found_inode_item) {
1207                         dst->nlink = src->nlink;
1208                         dst->isize = src->isize;
1209                         dst->nbytes = src->nbytes;
1210                         dst->imode = src->imode;
1211                         dst->nodatasum = src->nodatasum;
1212                         dst->found_inode_item = 1;
1213                 } else {
1214                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1215                 }
1216         }
1217         dst->merging = 0;
1218
1219         return 0;
1220 }
1221
1222 static int splice_shared_node(struct shared_node *src_node,
1223                               struct shared_node *dst_node)
1224 {
1225         struct cache_extent *cache;
1226         struct ptr_node *node, *ins;
1227         struct cache_tree *src, *dst;
1228         struct inode_record *rec, *conflict;
1229         u64 current_ino = 0;
1230         int splice = 0;
1231         int ret;
1232
1233         if (--src_node->refs == 0)
1234                 splice = 1;
1235         if (src_node->current)
1236                 current_ino = src_node->current->ino;
1237
1238         src = &src_node->root_cache;
1239         dst = &dst_node->root_cache;
1240 again:
1241         cache = search_cache_extent(src, 0);
1242         while (cache) {
1243                 node = container_of(cache, struct ptr_node, cache);
1244                 rec = node->data;
1245                 cache = next_cache_extent(cache);
1246
1247                 if (splice) {
1248                         remove_cache_extent(src, &node->cache);
1249                         ins = node;
1250                 } else {
1251                         ins = malloc(sizeof(*ins));
1252                         BUG_ON(!ins);
1253                         ins->cache.start = node->cache.start;
1254                         ins->cache.size = node->cache.size;
1255                         ins->data = rec;
1256                         rec->refs++;
1257                 }
1258                 ret = insert_cache_extent(dst, &ins->cache);
1259                 if (ret == -EEXIST) {
1260                         conflict = get_inode_rec(dst, rec->ino, 1);
1261                         BUG_ON(IS_ERR(conflict));
1262                         merge_inode_recs(rec, conflict, dst);
1263                         if (rec->checked) {
1264                                 conflict->checked = 1;
1265                                 if (dst_node->current == conflict)
1266                                         dst_node->current = NULL;
1267                         }
1268                         maybe_free_inode_rec(dst, conflict);
1269                         free_inode_rec(rec);
1270                         free(ins);
1271                 } else {
1272                         BUG_ON(ret);
1273                 }
1274         }
1275
1276         if (src == &src_node->root_cache) {
1277                 src = &src_node->inode_cache;
1278                 dst = &dst_node->inode_cache;
1279                 goto again;
1280         }
1281
1282         if (current_ino > 0 && (!dst_node->current ||
1283             current_ino > dst_node->current->ino)) {
1284                 if (dst_node->current) {
1285                         dst_node->current->checked = 1;
1286                         maybe_free_inode_rec(dst, dst_node->current);
1287                 }
1288                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289                 BUG_ON(IS_ERR(dst_node->current));
1290         }
1291         return 0;
1292 }
1293
1294 static void free_inode_ptr(struct cache_extent *cache)
1295 {
1296         struct ptr_node *node;
1297         struct inode_record *rec;
1298
1299         node = container_of(cache, struct ptr_node, cache);
1300         rec = node->data;
1301         free_inode_rec(rec);
1302         free(node);
1303 }
1304
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1306
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1308                                             u64 bytenr)
1309 {
1310         struct cache_extent *cache;
1311         struct shared_node *node;
1312
1313         cache = lookup_cache_extent(shared, bytenr, 1);
1314         if (cache) {
1315                 node = container_of(cache, struct shared_node, cache);
1316                 return node;
1317         }
1318         return NULL;
1319 }
1320
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1322 {
1323         int ret;
1324         struct shared_node *node;
1325
1326         node = calloc(1, sizeof(*node));
1327         if (!node)
1328                 return -ENOMEM;
1329         node->cache.start = bytenr;
1330         node->cache.size = 1;
1331         cache_tree_init(&node->root_cache);
1332         cache_tree_init(&node->inode_cache);
1333         node->refs = refs;
1334
1335         ret = insert_cache_extent(shared, &node->cache);
1336
1337         return ret;
1338 }
1339
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341                              struct walk_control *wc, int level)
1342 {
1343         struct shared_node *node;
1344         struct shared_node *dest;
1345         int ret;
1346
1347         if (level == wc->active_node)
1348                 return 0;
1349
1350         BUG_ON(wc->active_node <= level);
1351         node = find_shared_node(&wc->shared, bytenr);
1352         if (!node) {
1353                 ret = add_shared_node(&wc->shared, bytenr, refs);
1354                 BUG_ON(ret);
1355                 node = find_shared_node(&wc->shared, bytenr);
1356                 wc->nodes[level] = node;
1357                 wc->active_node = level;
1358                 return 0;
1359         }
1360
1361         if (wc->root_level == wc->active_node &&
1362             btrfs_root_refs(&root->root_item) == 0) {
1363                 if (--node->refs == 0) {
1364                         free_inode_recs_tree(&node->root_cache);
1365                         free_inode_recs_tree(&node->inode_cache);
1366                         remove_cache_extent(&wc->shared, &node->cache);
1367                         free(node);
1368                 }
1369                 return 1;
1370         }
1371
1372         dest = wc->nodes[wc->active_node];
1373         splice_shared_node(node, dest);
1374         if (node->refs == 0) {
1375                 remove_cache_extent(&wc->shared, &node->cache);
1376                 free(node);
1377         }
1378         return 1;
1379 }
1380
1381 static int leave_shared_node(struct btrfs_root *root,
1382                              struct walk_control *wc, int level)
1383 {
1384         struct shared_node *node;
1385         struct shared_node *dest;
1386         int i;
1387
1388         if (level == wc->root_level)
1389                 return 0;
1390
1391         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1392                 if (wc->nodes[i])
1393                         break;
1394         }
1395         BUG_ON(i >= BTRFS_MAX_LEVEL);
1396
1397         node = wc->nodes[wc->active_node];
1398         wc->nodes[wc->active_node] = NULL;
1399         wc->active_node = i;
1400
1401         dest = wc->nodes[wc->active_node];
1402         if (wc->active_node < wc->root_level ||
1403             btrfs_root_refs(&root->root_item) > 0) {
1404                 BUG_ON(node->refs <= 1);
1405                 splice_shared_node(node, dest);
1406         } else {
1407                 BUG_ON(node->refs < 2);
1408                 node->refs--;
1409         }
1410         return 0;
1411 }
1412
1413 /*
1414  * Returns:
1415  * < 0 - on error
1416  * 1   - if the root with id child_root_id is a child of root parent_root_id
1417  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1418  *       has other root(s) as parent(s)
1419  * 2   - if the root child_root_id doesn't have any parent roots
1420  */
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1422                          u64 child_root_id)
1423 {
1424         struct btrfs_path path;
1425         struct btrfs_key key;
1426         struct extent_buffer *leaf;
1427         int has_parent = 0;
1428         int ret;
1429
1430         btrfs_init_path(&path);
1431
1432         key.objectid = parent_root_id;
1433         key.type = BTRFS_ROOT_REF_KEY;
1434         key.offset = child_root_id;
1435         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1436                                 0, 0);
1437         if (ret < 0)
1438                 return ret;
1439         btrfs_release_path(&path);
1440         if (!ret)
1441                 return 1;
1442
1443         key.objectid = child_root_id;
1444         key.type = BTRFS_ROOT_BACKREF_KEY;
1445         key.offset = 0;
1446         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1447                                 0, 0);
1448         if (ret < 0)
1449                 goto out;
1450
1451         while (1) {
1452                 leaf = path.nodes[0];
1453                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1455                         if (ret)
1456                                 break;
1457                         leaf = path.nodes[0];
1458                 }
1459
1460                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461                 if (key.objectid != child_root_id ||
1462                     key.type != BTRFS_ROOT_BACKREF_KEY)
1463                         break;
1464
1465                 has_parent = 1;
1466
1467                 if (key.offset == parent_root_id) {
1468                         btrfs_release_path(&path);
1469                         return 1;
1470                 }
1471
1472                 path.slots[0]++;
1473         }
1474 out:
1475         btrfs_release_path(&path);
1476         if (ret < 0)
1477                 return ret;
1478         return has_parent ? 0 : 2;
1479 }
1480
1481 static int process_dir_item(struct extent_buffer *eb,
1482                             int slot, struct btrfs_key *key,
1483                             struct shared_node *active_node)
1484 {
1485         u32 total;
1486         u32 cur = 0;
1487         u32 len;
1488         u32 name_len;
1489         u32 data_len;
1490         int error;
1491         int nritems = 0;
1492         u8 filetype;
1493         struct btrfs_dir_item *di;
1494         struct inode_record *rec;
1495         struct cache_tree *root_cache;
1496         struct cache_tree *inode_cache;
1497         struct btrfs_key location;
1498         char namebuf[BTRFS_NAME_LEN];
1499
1500         root_cache = &active_node->root_cache;
1501         inode_cache = &active_node->inode_cache;
1502         rec = active_node->current;
1503         rec->found_dir_item = 1;
1504
1505         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506         total = btrfs_item_size_nr(eb, slot);
1507         while (cur < total) {
1508                 nritems++;
1509                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510                 name_len = btrfs_dir_name_len(eb, di);
1511                 data_len = btrfs_dir_data_len(eb, di);
1512                 filetype = btrfs_dir_type(eb, di);
1513
1514                 rec->found_size += name_len;
1515                 if (name_len <= BTRFS_NAME_LEN) {
1516                         len = name_len;
1517                         error = 0;
1518                 } else {
1519                         len = BTRFS_NAME_LEN;
1520                         error = REF_ERR_NAME_TOO_LONG;
1521                 }
1522                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1523
1524                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525                         add_inode_backref(inode_cache, location.objectid,
1526                                           key->objectid, key->offset, namebuf,
1527                                           len, filetype, key->type, error);
1528                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529                         add_inode_backref(root_cache, location.objectid,
1530                                           key->objectid, key->offset,
1531                                           namebuf, len, filetype,
1532                                           key->type, error);
1533                 } else {
1534                         fprintf(stderr, "invalid location in dir item %u\n",
1535                                 location.type);
1536                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537                                           key->objectid, key->offset, namebuf,
1538                                           len, filetype, key->type, error);
1539                 }
1540
1541                 len = sizeof(*di) + name_len + data_len;
1542                 di = (struct btrfs_dir_item *)((char *)di + len);
1543                 cur += len;
1544         }
1545         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1547
1548         return 0;
1549 }
1550
1551 static int process_inode_ref(struct extent_buffer *eb,
1552                              int slot, struct btrfs_key *key,
1553                              struct shared_node *active_node)
1554 {
1555         u32 total;
1556         u32 cur = 0;
1557         u32 len;
1558         u32 name_len;
1559         u64 index;
1560         int error;
1561         struct cache_tree *inode_cache;
1562         struct btrfs_inode_ref *ref;
1563         char namebuf[BTRFS_NAME_LEN];
1564
1565         inode_cache = &active_node->inode_cache;
1566
1567         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568         total = btrfs_item_size_nr(eb, slot);
1569         while (cur < total) {
1570                 name_len = btrfs_inode_ref_name_len(eb, ref);
1571                 index = btrfs_inode_ref_index(eb, ref);
1572                 if (name_len <= BTRFS_NAME_LEN) {
1573                         len = name_len;
1574                         error = 0;
1575                 } else {
1576                         len = BTRFS_NAME_LEN;
1577                         error = REF_ERR_NAME_TOO_LONG;
1578                 }
1579                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1580                 add_inode_backref(inode_cache, key->objectid, key->offset,
1581                                   index, namebuf, len, 0, key->type, error);
1582
1583                 len = sizeof(*ref) + name_len;
1584                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1585                 cur += len;
1586         }
1587         return 0;
1588 }
1589
1590 static int process_inode_extref(struct extent_buffer *eb,
1591                                 int slot, struct btrfs_key *key,
1592                                 struct shared_node *active_node)
1593 {
1594         u32 total;
1595         u32 cur = 0;
1596         u32 len;
1597         u32 name_len;
1598         u64 index;
1599         u64 parent;
1600         int error;
1601         struct cache_tree *inode_cache;
1602         struct btrfs_inode_extref *extref;
1603         char namebuf[BTRFS_NAME_LEN];
1604
1605         inode_cache = &active_node->inode_cache;
1606
1607         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1608         total = btrfs_item_size_nr(eb, slot);
1609         while (cur < total) {
1610                 name_len = btrfs_inode_extref_name_len(eb, extref);
1611                 index = btrfs_inode_extref_index(eb, extref);
1612                 parent = btrfs_inode_extref_parent(eb, extref);
1613                 if (name_len <= BTRFS_NAME_LEN) {
1614                         len = name_len;
1615                         error = 0;
1616                 } else {
1617                         len = BTRFS_NAME_LEN;
1618                         error = REF_ERR_NAME_TOO_LONG;
1619                 }
1620                 read_extent_buffer(eb, namebuf,
1621                                    (unsigned long)(extref + 1), len);
1622                 add_inode_backref(inode_cache, key->objectid, parent,
1623                                   index, namebuf, len, 0, key->type, error);
1624
1625                 len = sizeof(*extref) + name_len;
1626                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1627                 cur += len;
1628         }
1629         return 0;
1630
1631 }
1632
1633 static int count_csum_range(struct btrfs_root *root, u64 start,
1634                             u64 len, u64 *found)
1635 {
1636         struct btrfs_key key;
1637         struct btrfs_path path;
1638         struct extent_buffer *leaf;
1639         int ret;
1640         size_t size;
1641         *found = 0;
1642         u64 csum_end;
1643         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1644
1645         btrfs_init_path(&path);
1646
1647         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1648         key.offset = start;
1649         key.type = BTRFS_EXTENT_CSUM_KEY;
1650
1651         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1652                                 &key, &path, 0, 0);
1653         if (ret < 0)
1654                 goto out;
1655         if (ret > 0 && path.slots[0] > 0) {
1656                 leaf = path.nodes[0];
1657                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1658                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1659                     key.type == BTRFS_EXTENT_CSUM_KEY)
1660                         path.slots[0]--;
1661         }
1662
1663         while (len > 0) {
1664                 leaf = path.nodes[0];
1665                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1666                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1667                         if (ret > 0)
1668                                 break;
1669                         else if (ret < 0)
1670                                 goto out;
1671                         leaf = path.nodes[0];
1672                 }
1673
1674                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1675                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1676                     key.type != BTRFS_EXTENT_CSUM_KEY)
1677                         break;
1678
1679                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1680                 if (key.offset >= start + len)
1681                         break;
1682
1683                 if (key.offset > start)
1684                         start = key.offset;
1685
1686                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1687                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1688                 if (csum_end > start) {
1689                         size = min(csum_end - start, len);
1690                         len -= size;
1691                         start += size;
1692                         *found += size;
1693                 }
1694
1695                 path.slots[0]++;
1696         }
1697 out:
1698         btrfs_release_path(&path);
1699         if (ret < 0)
1700                 return ret;
1701         return 0;
1702 }
1703
1704 static int process_file_extent(struct btrfs_root *root,
1705                                 struct extent_buffer *eb,
1706                                 int slot, struct btrfs_key *key,
1707                                 struct shared_node *active_node)
1708 {
1709         struct inode_record *rec;
1710         struct btrfs_file_extent_item *fi;
1711         u64 num_bytes = 0;
1712         u64 disk_bytenr = 0;
1713         u64 extent_offset = 0;
1714         u64 mask = root->sectorsize - 1;
1715         int extent_type;
1716         int ret;
1717
1718         rec = active_node->current;
1719         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1720         rec->found_file_extent = 1;
1721
1722         if (rec->extent_start == (u64)-1) {
1723                 rec->extent_start = key->offset;
1724                 rec->extent_end = key->offset;
1725         }
1726
1727         if (rec->extent_end > key->offset)
1728                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1729         else if (rec->extent_end < key->offset) {
1730                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1731                                            key->offset - rec->extent_end);
1732                 if (ret < 0)
1733                         return ret;
1734         }
1735
1736         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1737         extent_type = btrfs_file_extent_type(eb, fi);
1738
1739         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1740                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1741                 if (num_bytes == 0)
1742                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1743                 rec->found_size += num_bytes;
1744                 num_bytes = (num_bytes + mask) & ~mask;
1745         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1746                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1747                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1748                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1749                 extent_offset = btrfs_file_extent_offset(eb, fi);
1750                 if (num_bytes == 0 || (num_bytes & mask))
1751                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752                 if (num_bytes + extent_offset >
1753                     btrfs_file_extent_ram_bytes(eb, fi))
1754                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1755                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1756                     (btrfs_file_extent_compression(eb, fi) ||
1757                      btrfs_file_extent_encryption(eb, fi) ||
1758                      btrfs_file_extent_other_encoding(eb, fi)))
1759                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760                 if (disk_bytenr > 0)
1761                         rec->found_size += num_bytes;
1762         } else {
1763                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1764         }
1765         rec->extent_end = key->offset + num_bytes;
1766
1767         /*
1768          * The data reloc tree will copy full extents into its inode and then
1769          * copy the corresponding csums.  Because the extent it copied could be
1770          * a preallocated extent that hasn't been written to yet there may be no
1771          * csums to copy, ergo we won't have csums for our file extent.  This is
1772          * ok so just don't bother checking csums if the inode belongs to the
1773          * data reloc tree.
1774          */
1775         if (disk_bytenr > 0 &&
1776             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1777                 u64 found;
1778                 if (btrfs_file_extent_compression(eb, fi))
1779                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1780                 else
1781                         disk_bytenr += extent_offset;
1782
1783                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1784                 if (ret < 0)
1785                         return ret;
1786                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1787                         if (found > 0)
1788                                 rec->found_csum_item = 1;
1789                         if (found < num_bytes)
1790                                 rec->some_csum_missing = 1;
1791                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1792                         if (found > 0)
1793                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1794                 }
1795         }
1796         return 0;
1797 }
1798
1799 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1800                             struct walk_control *wc)
1801 {
1802         struct btrfs_key key;
1803         u32 nritems;
1804         int i;
1805         int ret = 0;
1806         struct cache_tree *inode_cache;
1807         struct shared_node *active_node;
1808
1809         if (wc->root_level == wc->active_node &&
1810             btrfs_root_refs(&root->root_item) == 0)
1811                 return 0;
1812
1813         active_node = wc->nodes[wc->active_node];
1814         inode_cache = &active_node->inode_cache;
1815         nritems = btrfs_header_nritems(eb);
1816         for (i = 0; i < nritems; i++) {
1817                 btrfs_item_key_to_cpu(eb, &key, i);
1818
1819                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1820                         continue;
1821                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1822                         continue;
1823
1824                 if (active_node->current == NULL ||
1825                     active_node->current->ino < key.objectid) {
1826                         if (active_node->current) {
1827                                 active_node->current->checked = 1;
1828                                 maybe_free_inode_rec(inode_cache,
1829                                                      active_node->current);
1830                         }
1831                         active_node->current = get_inode_rec(inode_cache,
1832                                                              key.objectid, 1);
1833                         BUG_ON(IS_ERR(active_node->current));
1834                 }
1835                 switch (key.type) {
1836                 case BTRFS_DIR_ITEM_KEY:
1837                 case BTRFS_DIR_INDEX_KEY:
1838                         ret = process_dir_item(eb, i, &key, active_node);
1839                         break;
1840                 case BTRFS_INODE_REF_KEY:
1841                         ret = process_inode_ref(eb, i, &key, active_node);
1842                         break;
1843                 case BTRFS_INODE_EXTREF_KEY:
1844                         ret = process_inode_extref(eb, i, &key, active_node);
1845                         break;
1846                 case BTRFS_INODE_ITEM_KEY:
1847                         ret = process_inode_item(eb, i, &key, active_node);
1848                         break;
1849                 case BTRFS_EXTENT_DATA_KEY:
1850                         ret = process_file_extent(root, eb, i, &key,
1851                                                   active_node);
1852                         break;
1853                 default:
1854                         break;
1855                 };
1856         }
1857         return ret;
1858 }
1859
1860 struct node_refs {
1861         u64 bytenr[BTRFS_MAX_LEVEL];
1862         u64 refs[BTRFS_MAX_LEVEL];
1863         int need_check[BTRFS_MAX_LEVEL];
1864 };
1865
1866 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1867                              struct node_refs *nrefs, u64 level);
1868 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1869                             unsigned int ext_ref);
1870
1871 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1872                                struct node_refs *nrefs, int *level, int ext_ref)
1873 {
1874         struct extent_buffer *cur = path->nodes[0];
1875         struct btrfs_key key;
1876         u64 cur_bytenr;
1877         u32 nritems;
1878         u64 first_ino = 0;
1879         int root_level = btrfs_header_level(root->node);
1880         int i;
1881         int ret = 0; /* Final return value */
1882         int err = 0; /* Positive error bitmap */
1883
1884         cur_bytenr = cur->start;
1885
1886         /* skip to first inode item or the first inode number change */
1887         nritems = btrfs_header_nritems(cur);
1888         for (i = 0; i < nritems; i++) {
1889                 btrfs_item_key_to_cpu(cur, &key, i);
1890                 if (i == 0)
1891                         first_ino = key.objectid;
1892                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1893                     (first_ino && first_ino != key.objectid))
1894                         break;
1895         }
1896         if (i == nritems) {
1897                 path->slots[0] = nritems;
1898                 return 0;
1899         }
1900         path->slots[0] = i;
1901
1902 again:
1903         err |= check_inode_item(root, path, ext_ref);
1904
1905         if (err & LAST_ITEM)
1906                 goto out;
1907
1908         /* still have inode items in thie leaf */
1909         if (cur->start == cur_bytenr)
1910                 goto again;
1911
1912         /*
1913          * we have switched to another leaf, above nodes may
1914          * have changed, here walk down the path, if a node
1915          * or leaf is shared, check whether we can skip this
1916          * node or leaf.
1917          */
1918         for (i = root_level; i >= 0; i--) {
1919                 if (path->nodes[i]->start == nrefs->bytenr[i])
1920                         continue;
1921
1922                 ret = update_nodes_refs(root,
1923                                 path->nodes[i]->start,
1924                                 nrefs, i);
1925                 if (ret)
1926                         goto out;
1927
1928                 if (!nrefs->need_check[i]) {
1929                         *level += 1;
1930                         break;
1931                 }
1932         }
1933
1934         for (i = 0; i < *level; i++) {
1935                 free_extent_buffer(path->nodes[i]);
1936                 path->nodes[i] = NULL;
1937         }
1938 out:
1939         err &= ~LAST_ITEM;
1940         /*
1941          * Convert any error bitmap to -EIO, as we should avoid
1942          * mixing positive and negative return value to represent
1943          * error
1944          */
1945         if (err && !ret)
1946                 ret = -EIO;
1947         return ret;
1948 }
1949
1950 static void reada_walk_down(struct btrfs_root *root,
1951                             struct extent_buffer *node, int slot)
1952 {
1953         u64 bytenr;
1954         u64 ptr_gen;
1955         u32 nritems;
1956         u32 blocksize;
1957         int i;
1958         int level;
1959
1960         level = btrfs_header_level(node);
1961         if (level != 1)
1962                 return;
1963
1964         nritems = btrfs_header_nritems(node);
1965         blocksize = root->nodesize;
1966         for (i = slot; i < nritems; i++) {
1967                 bytenr = btrfs_node_blockptr(node, i);
1968                 ptr_gen = btrfs_node_ptr_generation(node, i);
1969                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1970         }
1971 }
1972
1973 /*
1974  * Check the child node/leaf by the following condition:
1975  * 1. the first item key of the node/leaf should be the same with the one
1976  *    in parent.
1977  * 2. block in parent node should match the child node/leaf.
1978  * 3. generation of parent node and child's header should be consistent.
1979  *
1980  * Or the child node/leaf pointed by the key in parent is not valid.
1981  *
1982  * We hope to check leaf owner too, but since subvol may share leaves,
1983  * which makes leaf owner check not so strong, key check should be
1984  * sufficient enough for that case.
1985  */
1986 static int check_child_node(struct extent_buffer *parent, int slot,
1987                             struct extent_buffer *child)
1988 {
1989         struct btrfs_key parent_key;
1990         struct btrfs_key child_key;
1991         int ret = 0;
1992
1993         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1994         if (btrfs_header_level(child) == 0)
1995                 btrfs_item_key_to_cpu(child, &child_key, 0);
1996         else
1997                 btrfs_node_key_to_cpu(child, &child_key, 0);
1998
1999         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2000                 ret = -EINVAL;
2001                 fprintf(stderr,
2002                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2003                         parent_key.objectid, parent_key.type, parent_key.offset,
2004                         child_key.objectid, child_key.type, child_key.offset);
2005         }
2006         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2007                 ret = -EINVAL;
2008                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2009                         btrfs_node_blockptr(parent, slot),
2010                         btrfs_header_bytenr(child));
2011         }
2012         if (btrfs_node_ptr_generation(parent, slot) !=
2013             btrfs_header_generation(child)) {
2014                 ret = -EINVAL;
2015                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2016                         btrfs_header_generation(child),
2017                         btrfs_node_ptr_generation(parent, slot));
2018         }
2019         return ret;
2020 }
2021
2022 /*
2023  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2024  * in every fs or file tree check. Here we find its all root ids, and only check
2025  * it in the fs or file tree which has the smallest root id.
2026  */
2027 static int need_check(struct btrfs_root *root, struct ulist *roots)
2028 {
2029         struct rb_node *node;
2030         struct ulist_node *u;
2031
2032         if (roots->nnodes == 1)
2033                 return 1;
2034
2035         node = rb_first(&roots->root);
2036         u = rb_entry(node, struct ulist_node, rb_node);
2037         /*
2038          * current root id is not smallest, we skip it and let it be checked
2039          * in the fs or file tree who hash the smallest root id.
2040          */
2041         if (root->objectid != u->val)
2042                 return 0;
2043
2044         return 1;
2045 }
2046
2047 /*
2048  * for a tree node or leaf, we record its reference count, so later if we still
2049  * process this node or leaf, don't need to compute its reference count again.
2050  */
2051 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2052                              struct node_refs *nrefs, u64 level)
2053 {
2054         int check, ret;
2055         u64 refs;
2056         struct ulist *roots;
2057
2058         if (nrefs->bytenr[level] != bytenr) {
2059                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2060                                        level, 1, &refs, NULL);
2061                 if (ret < 0)
2062                         return ret;
2063
2064                 nrefs->bytenr[level] = bytenr;
2065                 nrefs->refs[level] = refs;
2066                 if (refs > 1) {
2067                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2068                                                    0, &roots);
2069                         if (ret)
2070                                 return -EIO;
2071
2072                         check = need_check(root, roots);
2073                         ulist_free(roots);
2074                         nrefs->need_check[level] = check;
2075                 } else {
2076                         nrefs->need_check[level] = 1;
2077                 }
2078         }
2079
2080         return 0;
2081 }
2082
2083 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2084                           struct walk_control *wc, int *level,
2085                           struct node_refs *nrefs)
2086 {
2087         enum btrfs_tree_block_status status;
2088         u64 bytenr;
2089         u64 ptr_gen;
2090         struct extent_buffer *next;
2091         struct extent_buffer *cur;
2092         u32 blocksize;
2093         int ret, err = 0;
2094         u64 refs;
2095
2096         WARN_ON(*level < 0);
2097         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2098
2099         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2100                 refs = nrefs->refs[*level];
2101                 ret = 0;
2102         } else {
2103                 ret = btrfs_lookup_extent_info(NULL, root,
2104                                        path->nodes[*level]->start,
2105                                        *level, 1, &refs, NULL);
2106                 if (ret < 0) {
2107                         err = ret;
2108                         goto out;
2109                 }
2110                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2111                 nrefs->refs[*level] = refs;
2112         }
2113
2114         if (refs > 1) {
2115                 ret = enter_shared_node(root, path->nodes[*level]->start,
2116                                         refs, wc, *level);
2117                 if (ret > 0) {
2118                         err = ret;
2119                         goto out;
2120                 }
2121         }
2122
2123         while (*level >= 0) {
2124                 WARN_ON(*level < 0);
2125                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2126                 cur = path->nodes[*level];
2127
2128                 if (btrfs_header_level(cur) != *level)
2129                         WARN_ON(1);
2130
2131                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2132                         break;
2133                 if (*level == 0) {
2134                         ret = process_one_leaf(root, cur, wc);
2135                         if (ret < 0)
2136                                 err = ret;
2137                         break;
2138                 }
2139                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2140                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2141                 blocksize = root->nodesize;
2142
2143                 if (bytenr == nrefs->bytenr[*level - 1]) {
2144                         refs = nrefs->refs[*level - 1];
2145                 } else {
2146                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2147                                         *level - 1, 1, &refs, NULL);
2148                         if (ret < 0) {
2149                                 refs = 0;
2150                         } else {
2151                                 nrefs->bytenr[*level - 1] = bytenr;
2152                                 nrefs->refs[*level - 1] = refs;
2153                         }
2154                 }
2155
2156                 if (refs > 1) {
2157                         ret = enter_shared_node(root, bytenr, refs,
2158                                                 wc, *level - 1);
2159                         if (ret > 0) {
2160                                 path->slots[*level]++;
2161                                 continue;
2162                         }
2163                 }
2164
2165                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2166                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2167                         free_extent_buffer(next);
2168                         reada_walk_down(root, cur, path->slots[*level]);
2169                         next = read_tree_block(root, bytenr, blocksize,
2170                                                ptr_gen);
2171                         if (!extent_buffer_uptodate(next)) {
2172                                 struct btrfs_key node_key;
2173
2174                                 btrfs_node_key_to_cpu(path->nodes[*level],
2175                                                       &node_key,
2176                                                       path->slots[*level]);
2177                                 btrfs_add_corrupt_extent_record(root->fs_info,
2178                                                 &node_key,
2179                                                 path->nodes[*level]->start,
2180                                                 root->nodesize, *level);
2181                                 err = -EIO;
2182                                 goto out;
2183                         }
2184                 }
2185
2186                 ret = check_child_node(cur, path->slots[*level], next);
2187                 if (ret) {
2188                         err = ret;
2189                         goto out;
2190                 }
2191
2192                 if (btrfs_is_leaf(next))
2193                         status = btrfs_check_leaf(root, NULL, next);
2194                 else
2195                         status = btrfs_check_node(root, NULL, next);
2196                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2197                         free_extent_buffer(next);
2198                         err = -EIO;
2199                         goto out;
2200                 }
2201
2202                 *level = *level - 1;
2203                 free_extent_buffer(path->nodes[*level]);
2204                 path->nodes[*level] = next;
2205                 path->slots[*level] = 0;
2206         }
2207 out:
2208         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2209         return err;
2210 }
2211
2212 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2213                             unsigned int ext_ref);
2214
2215 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2216                              int *level, struct node_refs *nrefs, int ext_ref)
2217 {
2218         enum btrfs_tree_block_status status;
2219         u64 bytenr;
2220         u64 ptr_gen;
2221         struct extent_buffer *next;
2222         struct extent_buffer *cur;
2223         u32 blocksize;
2224         int ret;
2225
2226         WARN_ON(*level < 0);
2227         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2228
2229         ret = update_nodes_refs(root, path->nodes[*level]->start,
2230                                 nrefs, *level);
2231         if (ret < 0)
2232                 return ret;
2233
2234         while (*level >= 0) {
2235                 WARN_ON(*level < 0);
2236                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2237                 cur = path->nodes[*level];
2238
2239                 if (btrfs_header_level(cur) != *level)
2240                         WARN_ON(1);
2241
2242                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2243                         break;
2244                 /* Don't forgot to check leaf/node validation */
2245                 if (*level == 0) {
2246                         ret = btrfs_check_leaf(root, NULL, cur);
2247                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2248                                 ret = -EIO;
2249                                 break;
2250                         }
2251                         ret = process_one_leaf_v2(root, path, nrefs,
2252                                                   level, ext_ref);
2253                         break;
2254                 } else {
2255                         ret = btrfs_check_node(root, NULL, cur);
2256                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2257                                 ret = -EIO;
2258                                 break;
2259                         }
2260                 }
2261                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2262                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2263                 blocksize = root->nodesize;
2264
2265                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2266                 if (ret)
2267                         break;
2268                 if (!nrefs->need_check[*level - 1]) {
2269                         path->slots[*level]++;
2270                         continue;
2271                 }
2272
2273                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2274                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2275                         free_extent_buffer(next);
2276                         reada_walk_down(root, cur, path->slots[*level]);
2277                         next = read_tree_block(root, bytenr, blocksize,
2278                                                ptr_gen);
2279                         if (!extent_buffer_uptodate(next)) {
2280                                 struct btrfs_key node_key;
2281
2282                                 btrfs_node_key_to_cpu(path->nodes[*level],
2283                                                       &node_key,
2284                                                       path->slots[*level]);
2285                                 btrfs_add_corrupt_extent_record(root->fs_info,
2286                                                 &node_key,
2287                                                 path->nodes[*level]->start,
2288                                                 root->nodesize, *level);
2289                                 ret = -EIO;
2290                                 break;
2291                         }
2292                 }
2293
2294                 ret = check_child_node(cur, path->slots[*level], next);
2295                 if (ret < 0) 
2296                         break;
2297
2298                 if (btrfs_is_leaf(next))
2299                         status = btrfs_check_leaf(root, NULL, next);
2300                 else
2301                         status = btrfs_check_node(root, NULL, next);
2302                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2303                         free_extent_buffer(next);
2304                         ret = -EIO;
2305                         break;
2306                 }
2307
2308                 *level = *level - 1;
2309                 free_extent_buffer(path->nodes[*level]);
2310                 path->nodes[*level] = next;
2311                 path->slots[*level] = 0;
2312         }
2313         return ret;
2314 }
2315
2316 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2317                         struct walk_control *wc, int *level)
2318 {
2319         int i;
2320         struct extent_buffer *leaf;
2321
2322         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2323                 leaf = path->nodes[i];
2324                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2325                         path->slots[i]++;
2326                         *level = i;
2327                         return 0;
2328                 } else {
2329                         free_extent_buffer(path->nodes[*level]);
2330                         path->nodes[*level] = NULL;
2331                         BUG_ON(*level > wc->active_node);
2332                         if (*level == wc->active_node)
2333                                 leave_shared_node(root, wc, *level);
2334                         *level = i + 1;
2335                 }
2336         }
2337         return 1;
2338 }
2339
2340 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2341                            int *level)
2342 {
2343         int i;
2344         struct extent_buffer *leaf;
2345
2346         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2347                 leaf = path->nodes[i];
2348                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2349                         path->slots[i]++;
2350                         *level = i;
2351                         return 0;
2352                 } else {
2353                         free_extent_buffer(path->nodes[*level]);
2354                         path->nodes[*level] = NULL;
2355                         *level = i + 1;
2356                 }
2357         }
2358         return 1;
2359 }
2360
2361 static int check_root_dir(struct inode_record *rec)
2362 {
2363         struct inode_backref *backref;
2364         int ret = -1;
2365
2366         if (!rec->found_inode_item || rec->errors)
2367                 goto out;
2368         if (rec->nlink != 1 || rec->found_link != 0)
2369                 goto out;
2370         if (list_empty(&rec->backrefs))
2371                 goto out;
2372         backref = to_inode_backref(rec->backrefs.next);
2373         if (!backref->found_inode_ref)
2374                 goto out;
2375         if (backref->index != 0 || backref->namelen != 2 ||
2376             memcmp(backref->name, "..", 2))
2377                 goto out;
2378         if (backref->found_dir_index || backref->found_dir_item)
2379                 goto out;
2380         ret = 0;
2381 out:
2382         return ret;
2383 }
2384
2385 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2386                               struct btrfs_root *root, struct btrfs_path *path,
2387                               struct inode_record *rec)
2388 {
2389         struct btrfs_inode_item *ei;
2390         struct btrfs_key key;
2391         int ret;
2392
2393         key.objectid = rec->ino;
2394         key.type = BTRFS_INODE_ITEM_KEY;
2395         key.offset = (u64)-1;
2396
2397         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2398         if (ret < 0)
2399                 goto out;
2400         if (ret) {
2401                 if (!path->slots[0]) {
2402                         ret = -ENOENT;
2403                         goto out;
2404                 }
2405                 path->slots[0]--;
2406                 ret = 0;
2407         }
2408         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2409         if (key.objectid != rec->ino) {
2410                 ret = -ENOENT;
2411                 goto out;
2412         }
2413
2414         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2415                             struct btrfs_inode_item);
2416         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2417         btrfs_mark_buffer_dirty(path->nodes[0]);
2418         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2419         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2420                root->root_key.objectid);
2421 out:
2422         btrfs_release_path(path);
2423         return ret;
2424 }
2425
2426 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2427                                     struct btrfs_root *root,
2428                                     struct btrfs_path *path,
2429                                     struct inode_record *rec)
2430 {
2431         int ret;
2432
2433         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2434         btrfs_release_path(path);
2435         if (!ret)
2436                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2437         return ret;
2438 }
2439
2440 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2441                                struct btrfs_root *root,
2442                                struct btrfs_path *path,
2443                                struct inode_record *rec)
2444 {
2445         struct btrfs_inode_item *ei;
2446         struct btrfs_key key;
2447         int ret = 0;
2448
2449         key.objectid = rec->ino;
2450         key.type = BTRFS_INODE_ITEM_KEY;
2451         key.offset = 0;
2452
2453         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2454         if (ret) {
2455                 if (ret > 0)
2456                         ret = -ENOENT;
2457                 goto out;
2458         }
2459
2460         /* Since ret == 0, no need to check anything */
2461         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2462                             struct btrfs_inode_item);
2463         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2464         btrfs_mark_buffer_dirty(path->nodes[0]);
2465         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2466         printf("reset nbytes for ino %llu root %llu\n",
2467                rec->ino, root->root_key.objectid);
2468 out:
2469         btrfs_release_path(path);
2470         return ret;
2471 }
2472
2473 static int add_missing_dir_index(struct btrfs_root *root,
2474                                  struct cache_tree *inode_cache,
2475                                  struct inode_record *rec,
2476                                  struct inode_backref *backref)
2477 {
2478         struct btrfs_path path;
2479         struct btrfs_trans_handle *trans;
2480         struct btrfs_dir_item *dir_item;
2481         struct extent_buffer *leaf;
2482         struct btrfs_key key;
2483         struct btrfs_disk_key disk_key;
2484         struct inode_record *dir_rec;
2485         unsigned long name_ptr;
2486         u32 data_size = sizeof(*dir_item) + backref->namelen;
2487         int ret;
2488
2489         trans = btrfs_start_transaction(root, 1);
2490         if (IS_ERR(trans))
2491                 return PTR_ERR(trans);
2492
2493         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2494                 (unsigned long long)rec->ino);
2495
2496         btrfs_init_path(&path);
2497         key.objectid = backref->dir;
2498         key.type = BTRFS_DIR_INDEX_KEY;
2499         key.offset = backref->index;
2500         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2501         BUG_ON(ret);
2502
2503         leaf = path.nodes[0];
2504         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2505
2506         disk_key.objectid = cpu_to_le64(rec->ino);
2507         disk_key.type = BTRFS_INODE_ITEM_KEY;
2508         disk_key.offset = 0;
2509
2510         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2511         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2512         btrfs_set_dir_data_len(leaf, dir_item, 0);
2513         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2514         name_ptr = (unsigned long)(dir_item + 1);
2515         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2516         btrfs_mark_buffer_dirty(leaf);
2517         btrfs_release_path(&path);
2518         btrfs_commit_transaction(trans, root);
2519
2520         backref->found_dir_index = 1;
2521         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2522         BUG_ON(IS_ERR(dir_rec));
2523         if (!dir_rec)
2524                 return 0;
2525         dir_rec->found_size += backref->namelen;
2526         if (dir_rec->found_size == dir_rec->isize &&
2527             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2528                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2529         if (dir_rec->found_size != dir_rec->isize)
2530                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2531
2532         return 0;
2533 }
2534
2535 static int delete_dir_index(struct btrfs_root *root,
2536                             struct inode_backref *backref)
2537 {
2538         struct btrfs_trans_handle *trans;
2539         struct btrfs_dir_item *di;
2540         struct btrfs_path path;
2541         int ret = 0;
2542
2543         trans = btrfs_start_transaction(root, 1);
2544         if (IS_ERR(trans))
2545                 return PTR_ERR(trans);
2546
2547         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2548                 (unsigned long long)backref->dir,
2549                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2550                 (unsigned long long)root->objectid);
2551
2552         btrfs_init_path(&path);
2553         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2554                                     backref->name, backref->namelen,
2555                                     backref->index, -1);
2556         if (IS_ERR(di)) {
2557                 ret = PTR_ERR(di);
2558                 btrfs_release_path(&path);
2559                 btrfs_commit_transaction(trans, root);
2560                 if (ret == -ENOENT)
2561                         return 0;
2562                 return ret;
2563         }
2564
2565         if (!di)
2566                 ret = btrfs_del_item(trans, root, &path);
2567         else
2568                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2569         BUG_ON(ret);
2570         btrfs_release_path(&path);
2571         btrfs_commit_transaction(trans, root);
2572         return ret;
2573 }
2574
2575 static int create_inode_item(struct btrfs_root *root,
2576                              struct inode_record *rec,
2577                              int root_dir)
2578 {
2579         struct btrfs_trans_handle *trans;
2580         struct btrfs_inode_item inode_item;
2581         time_t now = time(NULL);
2582         int ret;
2583
2584         trans = btrfs_start_transaction(root, 1);
2585         if (IS_ERR(trans)) {
2586                 ret = PTR_ERR(trans);
2587                 return ret;
2588         }
2589
2590         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2591                 "be incomplete, please check permissions and content after "
2592                 "the fsck completes.\n", (unsigned long long)root->objectid,
2593                 (unsigned long long)rec->ino);
2594
2595         memset(&inode_item, 0, sizeof(inode_item));
2596         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2597         if (root_dir)
2598                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2599         else
2600                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2601         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2602         if (rec->found_dir_item) {
2603                 if (rec->found_file_extent)
2604                         fprintf(stderr, "root %llu inode %llu has both a dir "
2605                                 "item and extents, unsure if it is a dir or a "
2606                                 "regular file so setting it as a directory\n",
2607                                 (unsigned long long)root->objectid,
2608                                 (unsigned long long)rec->ino);
2609                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2610                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2611         } else if (!rec->found_dir_item) {
2612                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2613                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2614         }
2615         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2616         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2617         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2618         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2619         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2620         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2621         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2622         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2623
2624         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2625         BUG_ON(ret);
2626         btrfs_commit_transaction(trans, root);
2627         return 0;
2628 }
2629
2630 static int repair_inode_backrefs(struct btrfs_root *root,
2631                                  struct inode_record *rec,
2632                                  struct cache_tree *inode_cache,
2633                                  int delete)
2634 {
2635         struct inode_backref *tmp, *backref;
2636         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2637         int ret = 0;
2638         int repaired = 0;
2639
2640         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2641                 if (!delete && rec->ino == root_dirid) {
2642                         if (!rec->found_inode_item) {
2643                                 ret = create_inode_item(root, rec, 1);
2644                                 if (ret)
2645                                         break;
2646                                 repaired++;
2647                         }
2648                 }
2649
2650                 /* Index 0 for root dir's are special, don't mess with it */
2651                 if (rec->ino == root_dirid && backref->index == 0)
2652                         continue;
2653
2654                 if (delete &&
2655                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2656                      (backref->found_dir_index && backref->found_inode_ref &&
2657                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2658                         ret = delete_dir_index(root, backref);
2659                         if (ret)
2660                                 break;
2661                         repaired++;
2662                         list_del(&backref->list);
2663                         free(backref);
2664                 }
2665
2666                 if (!delete && !backref->found_dir_index &&
2667                     backref->found_dir_item && backref->found_inode_ref) {
2668                         ret = add_missing_dir_index(root, inode_cache, rec,
2669                                                     backref);
2670                         if (ret)
2671                                 break;
2672                         repaired++;
2673                         if (backref->found_dir_item &&
2674                             backref->found_dir_index &&
2675                             backref->found_dir_index) {
2676                                 if (!backref->errors &&
2677                                     backref->found_inode_ref) {
2678                                         list_del(&backref->list);
2679                                         free(backref);
2680                                 }
2681                         }
2682                 }
2683
2684                 if (!delete && (!backref->found_dir_index &&
2685                                 !backref->found_dir_item &&
2686                                 backref->found_inode_ref)) {
2687                         struct btrfs_trans_handle *trans;
2688                         struct btrfs_key location;
2689
2690                         ret = check_dir_conflict(root, backref->name,
2691                                                  backref->namelen,
2692                                                  backref->dir,
2693                                                  backref->index);
2694                         if (ret) {
2695                                 /*
2696                                  * let nlink fixing routine to handle it,
2697                                  * which can do it better.
2698                                  */
2699                                 ret = 0;
2700                                 break;
2701                         }
2702                         location.objectid = rec->ino;
2703                         location.type = BTRFS_INODE_ITEM_KEY;
2704                         location.offset = 0;
2705
2706                         trans = btrfs_start_transaction(root, 1);
2707                         if (IS_ERR(trans)) {
2708                                 ret = PTR_ERR(trans);
2709                                 break;
2710                         }
2711                         fprintf(stderr, "adding missing dir index/item pair "
2712                                 "for inode %llu\n",
2713                                 (unsigned long long)rec->ino);
2714                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2715                                                     backref->namelen,
2716                                                     backref->dir, &location,
2717                                                     imode_to_type(rec->imode),
2718                                                     backref->index);
2719                         BUG_ON(ret);
2720                         btrfs_commit_transaction(trans, root);
2721                         repaired++;
2722                 }
2723
2724                 if (!delete && (backref->found_inode_ref &&
2725                                 backref->found_dir_index &&
2726                                 backref->found_dir_item &&
2727                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2728                                 !rec->found_inode_item)) {
2729                         ret = create_inode_item(root, rec, 0);
2730                         if (ret)
2731                                 break;
2732                         repaired++;
2733                 }
2734
2735         }
2736         return ret ? ret : repaired;
2737 }
2738
2739 /*
2740  * To determine the file type for nlink/inode_item repair
2741  *
2742  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2743  * Return -ENOENT if file type is not found.
2744  */
2745 static int find_file_type(struct inode_record *rec, u8 *type)
2746 {
2747         struct inode_backref *backref;
2748
2749         /* For inode item recovered case */
2750         if (rec->found_inode_item) {
2751                 *type = imode_to_type(rec->imode);
2752                 return 0;
2753         }
2754
2755         list_for_each_entry(backref, &rec->backrefs, list) {
2756                 if (backref->found_dir_index || backref->found_dir_item) {
2757                         *type = backref->filetype;
2758                         return 0;
2759                 }
2760         }
2761         return -ENOENT;
2762 }
2763
2764 /*
2765  * To determine the file name for nlink repair
2766  *
2767  * Return 0 if file name is found, set name and namelen.
2768  * Return -ENOENT if file name is not found.
2769  */
2770 static int find_file_name(struct inode_record *rec,
2771                           char *name, int *namelen)
2772 {
2773         struct inode_backref *backref;
2774
2775         list_for_each_entry(backref, &rec->backrefs, list) {
2776                 if (backref->found_dir_index || backref->found_dir_item ||
2777                     backref->found_inode_ref) {
2778                         memcpy(name, backref->name, backref->namelen);
2779                         *namelen = backref->namelen;
2780                         return 0;
2781                 }
2782         }
2783         return -ENOENT;
2784 }
2785
2786 /* Reset the nlink of the inode to the correct one */
2787 static int reset_nlink(struct btrfs_trans_handle *trans,
2788                        struct btrfs_root *root,
2789                        struct btrfs_path *path,
2790                        struct inode_record *rec)
2791 {
2792         struct inode_backref *backref;
2793         struct inode_backref *tmp;
2794         struct btrfs_key key;
2795         struct btrfs_inode_item *inode_item;
2796         int ret = 0;
2797
2798         /* We don't believe this either, reset it and iterate backref */
2799         rec->found_link = 0;
2800
2801         /* Remove all backref including the valid ones */
2802         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2803                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2804                                    backref->index, backref->name,
2805                                    backref->namelen, 0);
2806                 if (ret < 0)
2807                         goto out;
2808
2809                 /* remove invalid backref, so it won't be added back */
2810                 if (!(backref->found_dir_index &&
2811                       backref->found_dir_item &&
2812                       backref->found_inode_ref)) {
2813                         list_del(&backref->list);
2814                         free(backref);
2815                 } else {
2816                         rec->found_link++;
2817                 }
2818         }
2819
2820         /* Set nlink to 0 */
2821         key.objectid = rec->ino;
2822         key.type = BTRFS_INODE_ITEM_KEY;
2823         key.offset = 0;
2824         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2825         if (ret < 0)
2826                 goto out;
2827         if (ret > 0) {
2828                 ret = -ENOENT;
2829                 goto out;
2830         }
2831         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2832                                     struct btrfs_inode_item);
2833         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2834         btrfs_mark_buffer_dirty(path->nodes[0]);
2835         btrfs_release_path(path);
2836
2837         /*
2838          * Add back valid inode_ref/dir_item/dir_index,
2839          * add_link() will handle the nlink inc, so new nlink must be correct
2840          */
2841         list_for_each_entry(backref, &rec->backrefs, list) {
2842                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2843                                      backref->name, backref->namelen,
2844                                      backref->filetype, &backref->index, 1);
2845                 if (ret < 0)
2846                         goto out;
2847         }
2848 out:
2849         btrfs_release_path(path);
2850         return ret;
2851 }
2852
2853 static int get_highest_inode(struct btrfs_trans_handle *trans,
2854                                 struct btrfs_root *root,
2855                                 struct btrfs_path *path,
2856                                 u64 *highest_ino)
2857 {
2858         struct btrfs_key key, found_key;
2859         int ret;
2860
2861         btrfs_init_path(path);
2862         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2863         key.offset = -1;
2864         key.type = BTRFS_INODE_ITEM_KEY;
2865         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2866         if (ret == 1) {
2867                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2868                                 path->slots[0] - 1);
2869                 *highest_ino = found_key.objectid;
2870                 ret = 0;
2871         }
2872         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2873                 ret = -EOVERFLOW;
2874         btrfs_release_path(path);
2875         return ret;
2876 }
2877
2878 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2879                                struct btrfs_root *root,
2880                                struct btrfs_path *path,
2881                                struct inode_record *rec)
2882 {
2883         char *dir_name = "lost+found";
2884         char namebuf[BTRFS_NAME_LEN] = {0};
2885         u64 lost_found_ino;
2886         u32 mode = 0700;
2887         u8 type = 0;
2888         int namelen = 0;
2889         int name_recovered = 0;
2890         int type_recovered = 0;
2891         int ret = 0;
2892
2893         /*
2894          * Get file name and type first before these invalid inode ref
2895          * are deleted by remove_all_invalid_backref()
2896          */
2897         name_recovered = !find_file_name(rec, namebuf, &namelen);
2898         type_recovered = !find_file_type(rec, &type);
2899
2900         if (!name_recovered) {
2901                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2902                        rec->ino, rec->ino);
2903                 namelen = count_digits(rec->ino);
2904                 sprintf(namebuf, "%llu", rec->ino);
2905                 name_recovered = 1;
2906         }
2907         if (!type_recovered) {
2908                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2909                        rec->ino);
2910                 type = BTRFS_FT_REG_FILE;
2911                 type_recovered = 1;
2912         }
2913
2914         ret = reset_nlink(trans, root, path, rec);
2915         if (ret < 0) {
2916                 fprintf(stderr,
2917                         "Failed to reset nlink for inode %llu: %s\n",
2918                         rec->ino, strerror(-ret));
2919                 goto out;
2920         }
2921
2922         if (rec->found_link == 0) {
2923                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2924                 if (ret < 0)
2925                         goto out;
2926                 lost_found_ino++;
2927                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2928                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2929                                   mode);
2930                 if (ret < 0) {
2931                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2932                                 dir_name, strerror(-ret));
2933                         goto out;
2934                 }
2935                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2936                                      namebuf, namelen, type, NULL, 1);
2937                 /*
2938                  * Add ".INO" suffix several times to handle case where
2939                  * "FILENAME.INO" is already taken by another file.
2940                  */
2941                 while (ret == -EEXIST) {
2942                         /*
2943                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2944                          */
2945                         if (namelen + count_digits(rec->ino) + 1 >
2946                             BTRFS_NAME_LEN) {
2947                                 ret = -EFBIG;
2948                                 goto out;
2949                         }
2950                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2951                                  ".%llu", rec->ino);
2952                         namelen += count_digits(rec->ino) + 1;
2953                         ret = btrfs_add_link(trans, root, rec->ino,
2954                                              lost_found_ino, namebuf,
2955                                              namelen, type, NULL, 1);
2956                 }
2957                 if (ret < 0) {
2958                         fprintf(stderr,
2959                                 "Failed to link the inode %llu to %s dir: %s\n",
2960                                 rec->ino, dir_name, strerror(-ret));
2961                         goto out;
2962                 }
2963                 /*
2964                  * Just increase the found_link, don't actually add the
2965                  * backref. This will make things easier and this inode
2966                  * record will be freed after the repair is done.
2967                  * So fsck will not report problem about this inode.
2968                  */
2969                 rec->found_link++;
2970                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2971                        namelen, namebuf, dir_name);
2972         }
2973         printf("Fixed the nlink of inode %llu\n", rec->ino);
2974 out:
2975         /*
2976          * Clear the flag anyway, or we will loop forever for the same inode
2977          * as it will not be removed from the bad inode list and the dead loop
2978          * happens.
2979          */
2980         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2981         btrfs_release_path(path);
2982         return ret;
2983 }
2984
2985 /*
2986  * Check if there is any normal(reg or prealloc) file extent for given
2987  * ino.
2988  * This is used to determine the file type when neither its dir_index/item or
2989  * inode_item exists.
2990  *
2991  * This will *NOT* report error, if any error happens, just consider it does
2992  * not have any normal file extent.
2993  */
2994 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2995 {
2996         struct btrfs_path path;
2997         struct btrfs_key key;
2998         struct btrfs_key found_key;
2999         struct btrfs_file_extent_item *fi;
3000         u8 type;
3001         int ret = 0;
3002
3003         btrfs_init_path(&path);
3004         key.objectid = ino;
3005         key.type = BTRFS_EXTENT_DATA_KEY;
3006         key.offset = 0;
3007
3008         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3009         if (ret < 0) {
3010                 ret = 0;
3011                 goto out;
3012         }
3013         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3014                 ret = btrfs_next_leaf(root, &path);
3015                 if (ret) {
3016                         ret = 0;
3017                         goto out;
3018                 }
3019         }
3020         while (1) {
3021                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3022                                       path.slots[0]);
3023                 if (found_key.objectid != ino ||
3024                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3025                         break;
3026                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3027                                     struct btrfs_file_extent_item);
3028                 type = btrfs_file_extent_type(path.nodes[0], fi);
3029                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3030                         ret = 1;
3031                         goto out;
3032                 }
3033         }
3034 out:
3035         btrfs_release_path(&path);
3036         return ret;
3037 }
3038
3039 static u32 btrfs_type_to_imode(u8 type)
3040 {
3041         static u32 imode_by_btrfs_type[] = {
3042                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3043                 [BTRFS_FT_DIR]          = S_IFDIR,
3044                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3045                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3046                 [BTRFS_FT_FIFO]         = S_IFIFO,
3047                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3048                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3049         };
3050
3051         return imode_by_btrfs_type[(type)];
3052 }
3053
3054 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3055                                 struct btrfs_root *root,
3056                                 struct btrfs_path *path,
3057                                 struct inode_record *rec)
3058 {
3059         u8 filetype;
3060         u32 mode = 0700;
3061         int type_recovered = 0;
3062         int ret = 0;
3063
3064         printf("Trying to rebuild inode:%llu\n", rec->ino);
3065
3066         type_recovered = !find_file_type(rec, &filetype);
3067
3068         /*
3069          * Try to determine inode type if type not found.
3070          *
3071          * For found regular file extent, it must be FILE.
3072          * For found dir_item/index, it must be DIR.
3073          *
3074          * For undetermined one, use FILE as fallback.
3075          *
3076          * TODO:
3077          * 1. If found backref(inode_index/item is already handled) to it,
3078          *    it must be DIR.
3079          *    Need new inode-inode ref structure to allow search for that.
3080          */
3081         if (!type_recovered) {
3082                 if (rec->found_file_extent &&
3083                     find_normal_file_extent(root, rec->ino)) {
3084                         type_recovered = 1;
3085                         filetype = BTRFS_FT_REG_FILE;
3086                 } else if (rec->found_dir_item) {
3087                         type_recovered = 1;
3088                         filetype = BTRFS_FT_DIR;
3089                 } else if (!list_empty(&rec->orphan_extents)) {
3090                         type_recovered = 1;
3091                         filetype = BTRFS_FT_REG_FILE;
3092                 } else{
3093                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3094                                rec->ino);
3095                         type_recovered = 1;
3096                         filetype = BTRFS_FT_REG_FILE;
3097                 }
3098         }
3099
3100         ret = btrfs_new_inode(trans, root, rec->ino,
3101                               mode | btrfs_type_to_imode(filetype));
3102         if (ret < 0)
3103                 goto out;
3104
3105         /*
3106          * Here inode rebuild is done, we only rebuild the inode item,
3107          * don't repair the nlink(like move to lost+found).
3108          * That is the job of nlink repair.
3109          *
3110          * We just fill the record and return
3111          */
3112         rec->found_dir_item = 1;
3113         rec->imode = mode | btrfs_type_to_imode(filetype);
3114         rec->nlink = 0;
3115         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3116         /* Ensure the inode_nlinks repair function will be called */
3117         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3118 out:
3119         return ret;
3120 }
3121
3122 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3123                                       struct btrfs_root *root,
3124                                       struct btrfs_path *path,
3125                                       struct inode_record *rec)
3126 {
3127         struct orphan_data_extent *orphan;
3128         struct orphan_data_extent *tmp;
3129         int ret = 0;
3130
3131         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3132                 /*
3133                  * Check for conflicting file extents
3134                  *
3135                  * Here we don't know whether the extents is compressed or not,
3136                  * so we can only assume it not compressed nor data offset,
3137                  * and use its disk_len as extent length.
3138                  */
3139                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3140                                        orphan->offset, orphan->disk_len, 0);
3141                 btrfs_release_path(path);
3142                 if (ret < 0)
3143                         goto out;
3144                 if (!ret) {
3145                         fprintf(stderr,
3146                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3147                                 orphan->disk_bytenr, orphan->disk_len);
3148                         ret = btrfs_free_extent(trans,
3149                                         root->fs_info->extent_root,
3150                                         orphan->disk_bytenr, orphan->disk_len,
3151                                         0, root->objectid, orphan->objectid,
3152                                         orphan->offset);
3153                         if (ret < 0)
3154                                 goto out;
3155                 }
3156                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3157                                 orphan->offset, orphan->disk_bytenr,
3158                                 orphan->disk_len, orphan->disk_len);
3159                 if (ret < 0)
3160                         goto out;
3161
3162                 /* Update file size info */
3163                 rec->found_size += orphan->disk_len;
3164                 if (rec->found_size == rec->nbytes)
3165                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3166
3167                 /* Update the file extent hole info too */
3168                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3169                                            orphan->disk_len);
3170                 if (ret < 0)
3171                         goto out;
3172                 if (RB_EMPTY_ROOT(&rec->holes))
3173                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3174
3175                 list_del(&orphan->list);
3176                 free(orphan);
3177         }
3178         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3179 out:
3180         return ret;
3181 }
3182
3183 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3184                                         struct btrfs_root *root,
3185                                         struct btrfs_path *path,
3186                                         struct inode_record *rec)
3187 {
3188         struct rb_node *node;
3189         struct file_extent_hole *hole;
3190         int found = 0;
3191         int ret = 0;
3192
3193         node = rb_first(&rec->holes);
3194
3195         while (node) {
3196                 found = 1;
3197                 hole = rb_entry(node, struct file_extent_hole, node);
3198                 ret = btrfs_punch_hole(trans, root, rec->ino,
3199                                        hole->start, hole->len);
3200                 if (ret < 0)
3201                         goto out;
3202                 ret = del_file_extent_hole(&rec->holes, hole->start,
3203                                            hole->len);
3204                 if (ret < 0)
3205                         goto out;
3206                 if (RB_EMPTY_ROOT(&rec->holes))
3207                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3208                 node = rb_first(&rec->holes);
3209         }
3210         /* special case for a file losing all its file extent */
3211         if (!found) {
3212                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3213                                        round_up(rec->isize, root->sectorsize));
3214                 if (ret < 0)
3215                         goto out;
3216         }
3217         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3218                rec->ino, root->objectid);
3219 out:
3220         return ret;
3221 }
3222
3223 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3224 {
3225         struct btrfs_trans_handle *trans;
3226         struct btrfs_path path;
3227         int ret = 0;
3228
3229         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3230                              I_ERR_NO_ORPHAN_ITEM |
3231                              I_ERR_LINK_COUNT_WRONG |
3232                              I_ERR_NO_INODE_ITEM |
3233                              I_ERR_FILE_EXTENT_ORPHAN |
3234                              I_ERR_FILE_EXTENT_DISCOUNT|
3235                              I_ERR_FILE_NBYTES_WRONG)))
3236                 return rec->errors;
3237
3238         /*
3239          * For nlink repair, it may create a dir and add link, so
3240          * 2 for parent(256)'s dir_index and dir_item
3241          * 2 for lost+found dir's inode_item and inode_ref
3242          * 1 for the new inode_ref of the file
3243          * 2 for lost+found dir's dir_index and dir_item for the file
3244          */
3245         trans = btrfs_start_transaction(root, 7);
3246         if (IS_ERR(trans))
3247                 return PTR_ERR(trans);
3248
3249         btrfs_init_path(&path);
3250         if (rec->errors & I_ERR_NO_INODE_ITEM)
3251                 ret = repair_inode_no_item(trans, root, &path, rec);
3252         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3253                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3254         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3255                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3256         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3257                 ret = repair_inode_isize(trans, root, &path, rec);
3258         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3259                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3260         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3261                 ret = repair_inode_nlinks(trans, root, &path, rec);
3262         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3263                 ret = repair_inode_nbytes(trans, root, &path, rec);
3264         btrfs_commit_transaction(trans, root);
3265         btrfs_release_path(&path);
3266         return ret;
3267 }
3268
3269 static int check_inode_recs(struct btrfs_root *root,
3270                             struct cache_tree *inode_cache)
3271 {
3272         struct cache_extent *cache;
3273         struct ptr_node *node;
3274         struct inode_record *rec;
3275         struct inode_backref *backref;
3276         int stage = 0;
3277         int ret = 0;
3278         int err = 0;
3279         u64 error = 0;
3280         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3281
3282         if (btrfs_root_refs(&root->root_item) == 0) {
3283                 if (!cache_tree_empty(inode_cache))
3284                         fprintf(stderr, "warning line %d\n", __LINE__);
3285                 return 0;
3286         }
3287
3288         /*
3289          * We need to repair backrefs first because we could change some of the
3290          * errors in the inode recs.
3291          *
3292          * We also need to go through and delete invalid backrefs first and then
3293          * add the correct ones second.  We do this because we may get EEXIST
3294          * when adding back the correct index because we hadn't yet deleted the
3295          * invalid index.
3296          *
3297          * For example, if we were missing a dir index then the directories
3298          * isize would be wrong, so if we fixed the isize to what we thought it
3299          * would be and then fixed the backref we'd still have a invalid fs, so
3300          * we need to add back the dir index and then check to see if the isize
3301          * is still wrong.
3302          */
3303         while (stage < 3) {
3304                 stage++;
3305                 if (stage == 3 && !err)
3306                         break;
3307
3308                 cache = search_cache_extent(inode_cache, 0);
3309                 while (repair && cache) {
3310                         node = container_of(cache, struct ptr_node, cache);
3311                         rec = node->data;
3312                         cache = next_cache_extent(cache);
3313
3314                         /* Need to free everything up and rescan */
3315                         if (stage == 3) {
3316                                 remove_cache_extent(inode_cache, &node->cache);
3317                                 free(node);
3318                                 free_inode_rec(rec);
3319                                 continue;
3320                         }
3321
3322                         if (list_empty(&rec->backrefs))
3323                                 continue;
3324
3325                         ret = repair_inode_backrefs(root, rec, inode_cache,
3326                                                     stage == 1);
3327                         if (ret < 0) {
3328                                 err = ret;
3329                                 stage = 2;
3330                                 break;
3331                         } if (ret > 0) {
3332                                 err = -EAGAIN;
3333                         }
3334                 }
3335         }
3336         if (err)
3337                 return err;
3338
3339         rec = get_inode_rec(inode_cache, root_dirid, 0);
3340         BUG_ON(IS_ERR(rec));
3341         if (rec) {
3342                 ret = check_root_dir(rec);
3343                 if (ret) {
3344                         fprintf(stderr, "root %llu root dir %llu error\n",
3345                                 (unsigned long long)root->root_key.objectid,
3346                                 (unsigned long long)root_dirid);
3347                         print_inode_error(root, rec);
3348                         error++;
3349                 }
3350         } else {
3351                 if (repair) {
3352                         struct btrfs_trans_handle *trans;
3353
3354                         trans = btrfs_start_transaction(root, 1);
3355                         if (IS_ERR(trans)) {
3356                                 err = PTR_ERR(trans);
3357                                 return err;
3358                         }
3359
3360                         fprintf(stderr,
3361                                 "root %llu missing its root dir, recreating\n",
3362                                 (unsigned long long)root->objectid);
3363
3364                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3365                         BUG_ON(ret);
3366
3367                         btrfs_commit_transaction(trans, root);
3368                         return -EAGAIN;
3369                 }
3370
3371                 fprintf(stderr, "root %llu root dir %llu not found\n",
3372                         (unsigned long long)root->root_key.objectid,
3373                         (unsigned long long)root_dirid);
3374         }
3375
3376         while (1) {
3377                 cache = search_cache_extent(inode_cache, 0);
3378                 if (!cache)
3379                         break;
3380                 node = container_of(cache, struct ptr_node, cache);
3381                 rec = node->data;
3382                 remove_cache_extent(inode_cache, &node->cache);
3383                 free(node);
3384                 if (rec->ino == root_dirid ||
3385                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3386                         free_inode_rec(rec);
3387                         continue;
3388                 }
3389
3390                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3391                         ret = check_orphan_item(root, rec->ino);
3392                         if (ret == 0)
3393                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3394                         if (can_free_inode_rec(rec)) {
3395                                 free_inode_rec(rec);
3396                                 continue;
3397                         }
3398                 }
3399
3400                 if (!rec->found_inode_item)
3401                         rec->errors |= I_ERR_NO_INODE_ITEM;
3402                 if (rec->found_link != rec->nlink)
3403                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3404                 if (repair) {
3405                         ret = try_repair_inode(root, rec);
3406                         if (ret == 0 && can_free_inode_rec(rec)) {
3407                                 free_inode_rec(rec);
3408                                 continue;
3409                         }
3410                         ret = 0;
3411                 }
3412
3413                 if (!(repair && ret == 0))
3414                         error++;
3415                 print_inode_error(root, rec);
3416                 list_for_each_entry(backref, &rec->backrefs, list) {
3417                         if (!backref->found_dir_item)
3418                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3419                         if (!backref->found_dir_index)
3420                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3421                         if (!backref->found_inode_ref)
3422                                 backref->errors |= REF_ERR_NO_INODE_REF;
3423                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3424                                 " namelen %u name %s filetype %d errors %x",
3425                                 (unsigned long long)backref->dir,
3426                                 (unsigned long long)backref->index,
3427                                 backref->namelen, backref->name,
3428                                 backref->filetype, backref->errors);
3429                         print_ref_error(backref->errors);
3430                 }
3431                 free_inode_rec(rec);
3432         }
3433         return (error > 0) ? -1 : 0;
3434 }
3435
3436 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3437                                         u64 objectid)
3438 {
3439         struct cache_extent *cache;
3440         struct root_record *rec = NULL;
3441         int ret;
3442
3443         cache = lookup_cache_extent(root_cache, objectid, 1);
3444         if (cache) {
3445                 rec = container_of(cache, struct root_record, cache);
3446         } else {
3447                 rec = calloc(1, sizeof(*rec));
3448                 if (!rec)
3449                         return ERR_PTR(-ENOMEM);
3450                 rec->objectid = objectid;
3451                 INIT_LIST_HEAD(&rec->backrefs);
3452                 rec->cache.start = objectid;
3453                 rec->cache.size = 1;
3454
3455                 ret = insert_cache_extent(root_cache, &rec->cache);
3456                 if (ret)
3457                         return ERR_PTR(-EEXIST);
3458         }
3459         return rec;
3460 }
3461
3462 static struct root_backref *get_root_backref(struct root_record *rec,
3463                                              u64 ref_root, u64 dir, u64 index,
3464                                              const char *name, int namelen)
3465 {
3466         struct root_backref *backref;
3467
3468         list_for_each_entry(backref, &rec->backrefs, list) {
3469                 if (backref->ref_root != ref_root || backref->dir != dir ||
3470                     backref->namelen != namelen)
3471                         continue;
3472                 if (memcmp(name, backref->name, namelen))
3473                         continue;
3474                 return backref;
3475         }
3476
3477         backref = calloc(1, sizeof(*backref) + namelen + 1);
3478         if (!backref)
3479                 return NULL;
3480         backref->ref_root = ref_root;
3481         backref->dir = dir;
3482         backref->index = index;
3483         backref->namelen = namelen;
3484         memcpy(backref->name, name, namelen);
3485         backref->name[namelen] = '\0';
3486         list_add_tail(&backref->list, &rec->backrefs);
3487         return backref;
3488 }
3489
3490 static void free_root_record(struct cache_extent *cache)
3491 {
3492         struct root_record *rec;
3493         struct root_backref *backref;
3494
3495         rec = container_of(cache, struct root_record, cache);
3496         while (!list_empty(&rec->backrefs)) {
3497                 backref = to_root_backref(rec->backrefs.next);
3498                 list_del(&backref->list);
3499                 free(backref);
3500         }
3501
3502         free(rec);
3503 }
3504
3505 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3506
3507 static int add_root_backref(struct cache_tree *root_cache,
3508                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3509                             const char *name, int namelen,
3510                             int item_type, int errors)
3511 {
3512         struct root_record *rec;
3513         struct root_backref *backref;
3514
3515         rec = get_root_rec(root_cache, root_id);
3516         BUG_ON(IS_ERR(rec));
3517         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3518         BUG_ON(!backref);
3519
3520         backref->errors |= errors;
3521
3522         if (item_type != BTRFS_DIR_ITEM_KEY) {
3523                 if (backref->found_dir_index || backref->found_back_ref ||
3524                     backref->found_forward_ref) {
3525                         if (backref->index != index)
3526                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3527                 } else {
3528                         backref->index = index;
3529                 }
3530         }
3531
3532         if (item_type == BTRFS_DIR_ITEM_KEY) {
3533                 if (backref->found_forward_ref)
3534                         rec->found_ref++;
3535                 backref->found_dir_item = 1;
3536         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3537                 backref->found_dir_index = 1;
3538         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3539                 if (backref->found_forward_ref)
3540                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3541                 else if (backref->found_dir_item)
3542                         rec->found_ref++;
3543                 backref->found_forward_ref = 1;
3544         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3545                 if (backref->found_back_ref)
3546                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3547                 backref->found_back_ref = 1;
3548         } else {
3549                 BUG_ON(1);
3550         }
3551
3552         if (backref->found_forward_ref && backref->found_dir_item)
3553                 backref->reachable = 1;
3554         return 0;
3555 }
3556
3557 static int merge_root_recs(struct btrfs_root *root,
3558                            struct cache_tree *src_cache,
3559                            struct cache_tree *dst_cache)
3560 {
3561         struct cache_extent *cache;
3562         struct ptr_node *node;
3563         struct inode_record *rec;
3564         struct inode_backref *backref;
3565         int ret = 0;
3566
3567         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3568                 free_inode_recs_tree(src_cache);
3569                 return 0;
3570         }
3571
3572         while (1) {
3573                 cache = search_cache_extent(src_cache, 0);
3574                 if (!cache)
3575                         break;
3576                 node = container_of(cache, struct ptr_node, cache);
3577                 rec = node->data;
3578                 remove_cache_extent(src_cache, &node->cache);
3579                 free(node);
3580
3581                 ret = is_child_root(root, root->objectid, rec->ino);
3582                 if (ret < 0)
3583                         break;
3584                 else if (ret == 0)
3585                         goto skip;
3586
3587                 list_for_each_entry(backref, &rec->backrefs, list) {
3588                         BUG_ON(backref->found_inode_ref);
3589                         if (backref->found_dir_item)
3590                                 add_root_backref(dst_cache, rec->ino,
3591                                         root->root_key.objectid, backref->dir,
3592                                         backref->index, backref->name,
3593                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3594                                         backref->errors);
3595                         if (backref->found_dir_index)
3596                                 add_root_backref(dst_cache, rec->ino,
3597                                         root->root_key.objectid, backref->dir,
3598                                         backref->index, backref->name,
3599                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3600                                         backref->errors);
3601                 }
3602 skip:
3603                 free_inode_rec(rec);
3604         }
3605         if (ret < 0)
3606                 return ret;
3607         return 0;
3608 }
3609
3610 static int check_root_refs(struct btrfs_root *root,
3611                            struct cache_tree *root_cache)
3612 {
3613         struct root_record *rec;
3614         struct root_record *ref_root;
3615         struct root_backref *backref;
3616         struct cache_extent *cache;
3617         int loop = 1;
3618         int ret;
3619         int error;
3620         int errors = 0;
3621
3622         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3623         BUG_ON(IS_ERR(rec));
3624         rec->found_ref = 1;
3625
3626         /* fixme: this can not detect circular references */
3627         while (loop) {
3628                 loop = 0;
3629                 cache = search_cache_extent(root_cache, 0);
3630                 while (1) {
3631                         if (!cache)
3632                                 break;
3633                         rec = container_of(cache, struct root_record, cache);
3634                         cache = next_cache_extent(cache);
3635
3636                         if (rec->found_ref == 0)
3637                                 continue;
3638
3639                         list_for_each_entry(backref, &rec->backrefs, list) {
3640                                 if (!backref->reachable)
3641                                         continue;
3642
3643                                 ref_root = get_root_rec(root_cache,
3644                                                         backref->ref_root);
3645                                 BUG_ON(IS_ERR(ref_root));
3646                                 if (ref_root->found_ref > 0)
3647                                         continue;
3648
3649                                 backref->reachable = 0;
3650                                 rec->found_ref--;
3651                                 if (rec->found_ref == 0)
3652                                         loop = 1;
3653                         }
3654                 }
3655         }
3656
3657         cache = search_cache_extent(root_cache, 0);
3658         while (1) {
3659                 if (!cache)
3660                         break;
3661                 rec = container_of(cache, struct root_record, cache);
3662                 cache = next_cache_extent(cache);
3663
3664                 if (rec->found_ref == 0 &&
3665                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3666                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3667                         ret = check_orphan_item(root->fs_info->tree_root,
3668                                                 rec->objectid);
3669                         if (ret == 0)
3670                                 continue;
3671
3672                         /*
3673                          * If we don't have a root item then we likely just have
3674                          * a dir item in a snapshot for this root but no actual
3675                          * ref key or anything so it's meaningless.
3676                          */
3677                         if (!rec->found_root_item)
3678                                 continue;
3679                         errors++;
3680                         fprintf(stderr, "fs tree %llu not referenced\n",
3681                                 (unsigned long long)rec->objectid);
3682                 }
3683
3684                 error = 0;
3685                 if (rec->found_ref > 0 && !rec->found_root_item)
3686                         error = 1;
3687                 list_for_each_entry(backref, &rec->backrefs, list) {
3688                         if (!backref->found_dir_item)
3689                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3690                         if (!backref->found_dir_index)
3691                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3692                         if (!backref->found_back_ref)
3693                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3694                         if (!backref->found_forward_ref)
3695                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3696                         if (backref->reachable && backref->errors)
3697                                 error = 1;
3698                 }
3699                 if (!error)
3700                         continue;
3701
3702                 errors++;
3703                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3704                         (unsigned long long)rec->objectid, rec->found_ref,
3705                          rec->found_root_item ? "" : "not found");
3706
3707                 list_for_each_entry(backref, &rec->backrefs, list) {
3708                         if (!backref->reachable)
3709                                 continue;
3710                         if (!backref->errors && rec->found_root_item)
3711                                 continue;
3712                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3713                                 " index %llu namelen %u name %s errors %x\n",
3714                                 (unsigned long long)backref->ref_root,
3715                                 (unsigned long long)backref->dir,
3716                                 (unsigned long long)backref->index,
3717                                 backref->namelen, backref->name,
3718                                 backref->errors);
3719                         print_ref_error(backref->errors);
3720                 }
3721         }
3722         return errors > 0 ? 1 : 0;
3723 }
3724
3725 static int process_root_ref(struct extent_buffer *eb, int slot,
3726                             struct btrfs_key *key,
3727                             struct cache_tree *root_cache)
3728 {
3729         u64 dirid;
3730         u64 index;
3731         u32 len;
3732         u32 name_len;
3733         struct btrfs_root_ref *ref;
3734         char namebuf[BTRFS_NAME_LEN];
3735         int error;
3736
3737         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3738
3739         dirid = btrfs_root_ref_dirid(eb, ref);
3740         index = btrfs_root_ref_sequence(eb, ref);
3741         name_len = btrfs_root_ref_name_len(eb, ref);
3742
3743         if (name_len <= BTRFS_NAME_LEN) {
3744                 len = name_len;
3745                 error = 0;
3746         } else {
3747                 len = BTRFS_NAME_LEN;
3748                 error = REF_ERR_NAME_TOO_LONG;
3749         }
3750         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3751
3752         if (key->type == BTRFS_ROOT_REF_KEY) {
3753                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3754                                  index, namebuf, len, key->type, error);
3755         } else {
3756                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3757                                  index, namebuf, len, key->type, error);
3758         }
3759         return 0;
3760 }
3761
3762 static void free_corrupt_block(struct cache_extent *cache)
3763 {
3764         struct btrfs_corrupt_block *corrupt;
3765
3766         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3767         free(corrupt);
3768 }
3769
3770 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3771
3772 /*
3773  * Repair the btree of the given root.
3774  *
3775  * The fix is to remove the node key in corrupt_blocks cache_tree.
3776  * and rebalance the tree.
3777  * After the fix, the btree should be writeable.
3778  */
3779 static int repair_btree(struct btrfs_root *root,
3780                         struct cache_tree *corrupt_blocks)
3781 {
3782         struct btrfs_trans_handle *trans;
3783         struct btrfs_path path;
3784         struct btrfs_corrupt_block *corrupt;
3785         struct cache_extent *cache;
3786         struct btrfs_key key;
3787         u64 offset;
3788         int level;
3789         int ret = 0;
3790
3791         if (cache_tree_empty(corrupt_blocks))
3792                 return 0;
3793
3794         trans = btrfs_start_transaction(root, 1);
3795         if (IS_ERR(trans)) {
3796                 ret = PTR_ERR(trans);
3797                 fprintf(stderr, "Error starting transaction: %s\n",
3798                         strerror(-ret));
3799                 return ret;
3800         }
3801         btrfs_init_path(&path);
3802         cache = first_cache_extent(corrupt_blocks);
3803         while (cache) {
3804                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3805                                        cache);
3806                 level = corrupt->level;
3807                 path.lowest_level = level;
3808                 key.objectid = corrupt->key.objectid;
3809                 key.type = corrupt->key.type;
3810                 key.offset = corrupt->key.offset;
3811
3812                 /*
3813                  * Here we don't want to do any tree balance, since it may
3814                  * cause a balance with corrupted brother leaf/node,
3815                  * so ins_len set to 0 here.
3816                  * Balance will be done after all corrupt node/leaf is deleted.
3817                  */
3818                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3819                 if (ret < 0)
3820                         goto out;
3821                 offset = btrfs_node_blockptr(path.nodes[level],
3822                                              path.slots[level]);
3823
3824                 /* Remove the ptr */
3825                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3826                 if (ret < 0)
3827                         goto out;
3828                 /*
3829                  * Remove the corresponding extent
3830                  * return value is not concerned.
3831                  */
3832                 btrfs_release_path(&path);
3833                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3834                                         0, root->root_key.objectid,
3835                                         level - 1, 0);
3836                 cache = next_cache_extent(cache);
3837         }
3838
3839         /* Balance the btree using btrfs_search_slot() */
3840         cache = first_cache_extent(corrupt_blocks);
3841         while (cache) {
3842                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3843                                        cache);
3844                 memcpy(&key, &corrupt->key, sizeof(key));
3845                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3846                 if (ret < 0)
3847                         goto out;
3848                 /* return will always >0 since it won't find the item */
3849                 ret = 0;
3850                 btrfs_release_path(&path);
3851                 cache = next_cache_extent(cache);
3852         }
3853 out:
3854         btrfs_commit_transaction(trans, root);
3855         btrfs_release_path(&path);
3856         return ret;
3857 }
3858
3859 static int check_fs_root(struct btrfs_root *root,
3860                          struct cache_tree *root_cache,
3861                          struct walk_control *wc)
3862 {
3863         int ret = 0;
3864         int err = 0;
3865         int wret;
3866         int level;
3867         struct btrfs_path path;
3868         struct shared_node root_node;
3869         struct root_record *rec;
3870         struct btrfs_root_item *root_item = &root->root_item;
3871         struct cache_tree corrupt_blocks;
3872         struct orphan_data_extent *orphan;
3873         struct orphan_data_extent *tmp;
3874         enum btrfs_tree_block_status status;
3875         struct node_refs nrefs;
3876
3877         /*
3878          * Reuse the corrupt_block cache tree to record corrupted tree block
3879          *
3880          * Unlike the usage in extent tree check, here we do it in a per
3881          * fs/subvol tree base.
3882          */
3883         cache_tree_init(&corrupt_blocks);
3884         root->fs_info->corrupt_blocks = &corrupt_blocks;
3885
3886         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3887                 rec = get_root_rec(root_cache, root->root_key.objectid);
3888                 BUG_ON(IS_ERR(rec));
3889                 if (btrfs_root_refs(root_item) > 0)
3890                         rec->found_root_item = 1;
3891         }
3892
3893         btrfs_init_path(&path);
3894         memset(&root_node, 0, sizeof(root_node));
3895         cache_tree_init(&root_node.root_cache);
3896         cache_tree_init(&root_node.inode_cache);
3897         memset(&nrefs, 0, sizeof(nrefs));
3898
3899         /* Move the orphan extent record to corresponding inode_record */
3900         list_for_each_entry_safe(orphan, tmp,
3901                                  &root->orphan_data_extents, list) {
3902                 struct inode_record *inode;
3903
3904                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3905                                       1);
3906                 BUG_ON(IS_ERR(inode));
3907                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3908                 list_move(&orphan->list, &inode->orphan_extents);
3909         }
3910
3911         level = btrfs_header_level(root->node);
3912         memset(wc->nodes, 0, sizeof(wc->nodes));
3913         wc->nodes[level] = &root_node;
3914         wc->active_node = level;
3915         wc->root_level = level;
3916
3917         /* We may not have checked the root block, lets do that now */
3918         if (btrfs_is_leaf(root->node))
3919                 status = btrfs_check_leaf(root, NULL, root->node);
3920         else
3921                 status = btrfs_check_node(root, NULL, root->node);
3922         if (status != BTRFS_TREE_BLOCK_CLEAN)
3923                 return -EIO;
3924
3925         if (btrfs_root_refs(root_item) > 0 ||
3926             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3927                 path.nodes[level] = root->node;
3928                 extent_buffer_get(root->node);
3929                 path.slots[level] = 0;
3930         } else {
3931                 struct btrfs_key key;
3932                 struct btrfs_disk_key found_key;
3933
3934                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3935                 level = root_item->drop_level;
3936                 path.lowest_level = level;
3937                 if (level > btrfs_header_level(root->node) ||
3938                     level >= BTRFS_MAX_LEVEL) {
3939                         error("ignoring invalid drop level: %u", level);
3940                         goto skip_walking;
3941                 }
3942                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3943                 if (wret < 0)
3944                         goto skip_walking;
3945                 btrfs_node_key(path.nodes[level], &found_key,
3946                                 path.slots[level]);
3947                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3948                                         sizeof(found_key)));
3949         }
3950
3951         while (1) {
3952                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3953                 if (wret < 0)
3954                         ret = wret;
3955                 if (wret != 0)
3956                         break;
3957
3958                 wret = walk_up_tree(root, &path, wc, &level);
3959                 if (wret < 0)
3960                         ret = wret;
3961                 if (wret != 0)
3962                         break;
3963         }
3964 skip_walking:
3965         btrfs_release_path(&path);
3966
3967         if (!cache_tree_empty(&corrupt_blocks)) {
3968                 struct cache_extent *cache;
3969                 struct btrfs_corrupt_block *corrupt;
3970
3971                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3972                        root->root_key.objectid);
3973                 cache = first_cache_extent(&corrupt_blocks);
3974                 while (cache) {
3975                         corrupt = container_of(cache,
3976                                                struct btrfs_corrupt_block,
3977                                                cache);
3978                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3979                                cache->start, corrupt->level,
3980                                corrupt->key.objectid, corrupt->key.type,
3981                                corrupt->key.offset);
3982                         cache = next_cache_extent(cache);
3983                 }
3984                 if (repair) {
3985                         printf("Try to repair the btree for root %llu\n",
3986                                root->root_key.objectid);
3987                         ret = repair_btree(root, &corrupt_blocks);
3988                         if (ret < 0)
3989                                 fprintf(stderr, "Failed to repair btree: %s\n",
3990                                         strerror(-ret));
3991                         if (!ret)
3992                                 printf("Btree for root %llu is fixed\n",
3993                                        root->root_key.objectid);
3994                 }
3995         }
3996
3997         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3998         if (err < 0)
3999                 ret = err;
4000
4001         if (root_node.current) {
4002                 root_node.current->checked = 1;
4003                 maybe_free_inode_rec(&root_node.inode_cache,
4004                                 root_node.current);
4005         }
4006
4007         err = check_inode_recs(root, &root_node.inode_cache);
4008         if (!ret)
4009                 ret = err;
4010
4011         free_corrupt_blocks_tree(&corrupt_blocks);
4012         root->fs_info->corrupt_blocks = NULL;
4013         free_orphan_data_extents(&root->orphan_data_extents);
4014         return ret;
4015 }
4016
4017 static int fs_root_objectid(u64 objectid)
4018 {
4019         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4020             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4021                 return 1;
4022         return is_fstree(objectid);
4023 }
4024
4025 static int check_fs_roots(struct btrfs_root *root,
4026                           struct cache_tree *root_cache)
4027 {
4028         struct btrfs_path path;
4029         struct btrfs_key key;
4030         struct walk_control wc;
4031         struct extent_buffer *leaf, *tree_node;
4032         struct btrfs_root *tmp_root;
4033         struct btrfs_root *tree_root = root->fs_info->tree_root;
4034         int ret;
4035         int err = 0;
4036
4037         if (ctx.progress_enabled) {
4038                 ctx.tp = TASK_FS_ROOTS;
4039                 task_start(ctx.info);
4040         }
4041
4042         /*
4043          * Just in case we made any changes to the extent tree that weren't
4044          * reflected into the free space cache yet.
4045          */
4046         if (repair)
4047                 reset_cached_block_groups(root->fs_info);
4048         memset(&wc, 0, sizeof(wc));
4049         cache_tree_init(&wc.shared);
4050         btrfs_init_path(&path);
4051
4052 again:
4053         key.offset = 0;
4054         key.objectid = 0;
4055         key.type = BTRFS_ROOT_ITEM_KEY;
4056         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4057         if (ret < 0) {
4058                 err = 1;
4059                 goto out;
4060         }
4061         tree_node = tree_root->node;
4062         while (1) {
4063                 if (tree_node != tree_root->node) {
4064                         free_root_recs_tree(root_cache);
4065                         btrfs_release_path(&path);
4066                         goto again;
4067                 }
4068                 leaf = path.nodes[0];
4069                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4070                         ret = btrfs_next_leaf(tree_root, &path);
4071                         if (ret) {
4072                                 if (ret < 0)
4073                                         err = 1;
4074                                 break;
4075                         }
4076                         leaf = path.nodes[0];
4077                 }
4078                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4079                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4080                     fs_root_objectid(key.objectid)) {
4081                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4082                                 tmp_root = btrfs_read_fs_root_no_cache(
4083                                                 root->fs_info, &key);
4084                         } else {
4085                                 key.offset = (u64)-1;
4086                                 tmp_root = btrfs_read_fs_root(
4087                                                 root->fs_info, &key);
4088                         }
4089                         if (IS_ERR(tmp_root)) {
4090                                 err = 1;
4091                                 goto next;
4092                         }
4093                         ret = check_fs_root(tmp_root, root_cache, &wc);
4094                         if (ret == -EAGAIN) {
4095                                 free_root_recs_tree(root_cache);
4096                                 btrfs_release_path(&path);
4097                                 goto again;
4098                         }
4099                         if (ret)
4100                                 err = 1;
4101                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4102                                 btrfs_free_fs_root(tmp_root);
4103                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4104                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4105                         process_root_ref(leaf, path.slots[0], &key,
4106                                          root_cache);
4107                 }
4108 next:
4109                 path.slots[0]++;
4110         }
4111 out:
4112         btrfs_release_path(&path);
4113         if (err)
4114                 free_extent_cache_tree(&wc.shared);
4115         if (!cache_tree_empty(&wc.shared))
4116                 fprintf(stderr, "warning line %d\n", __LINE__);
4117
4118         task_stop(ctx.info);
4119
4120         return err;
4121 }
4122
4123 /*
4124  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4125  * INODE_REF/INODE_EXTREF match.
4126  *
4127  * @root:       the root of the fs/file tree
4128  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4129  * @key:        the key of the DIR_ITEM/DIR_INDEX
4130  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4131  *              distinguish root_dir between normal dir/file
4132  * @name:       the name in the INODE_REF/INODE_EXTREF
4133  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4134  * @mode:       the st_mode of INODE_ITEM
4135  *
4136  * Return 0 if no error occurred.
4137  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4138  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4139  * dir/file.
4140  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4141  * not match for normal dir/file.
4142  */
4143 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4144                          struct btrfs_key *key, u64 index, char *name,
4145                          u32 namelen, u32 mode)
4146 {
4147         struct btrfs_path path;
4148         struct extent_buffer *node;
4149         struct btrfs_dir_item *di;
4150         struct btrfs_key location;
4151         char namebuf[BTRFS_NAME_LEN] = {0};
4152         u32 total;
4153         u32 cur = 0;
4154         u32 len;
4155         u32 name_len;
4156         u32 data_len;
4157         u8 filetype;
4158         int slot;
4159         int ret;
4160
4161         btrfs_init_path(&path);
4162         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4163         if (ret < 0) {
4164                 ret = DIR_ITEM_MISSING;
4165                 goto out;
4166         }
4167
4168         /* Process root dir and goto out*/
4169         if (index == 0) {
4170                 if (ret == 0) {
4171                         ret = ROOT_DIR_ERROR;
4172                         error(
4173                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4174                                 root->objectid,
4175                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4176                                         "REF" : "EXTREF",
4177                                 ref_key->objectid, ref_key->offset,
4178                                 key->type == BTRFS_DIR_ITEM_KEY ?
4179                                         "DIR_ITEM" : "DIR_INDEX");
4180                 } else {
4181                         ret = 0;
4182                 }
4183
4184                 goto out;
4185         }
4186
4187         /* Process normal file/dir */
4188         if (ret > 0) {
4189                 ret = DIR_ITEM_MISSING;
4190                 error(
4191                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4192                         root->objectid,
4193                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4194                         ref_key->objectid, ref_key->offset,
4195                         key->type == BTRFS_DIR_ITEM_KEY ?
4196                                 "DIR_ITEM" : "DIR_INDEX",
4197                         key->objectid, key->offset, namelen, name,
4198                         imode_to_type(mode));
4199                 goto out;
4200         }
4201
4202         /* Check whether inode_id/filetype/name match */
4203         node = path.nodes[0];
4204         slot = path.slots[0];
4205         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4206         total = btrfs_item_size_nr(node, slot);
4207         while (cur < total) {
4208                 ret = DIR_ITEM_MISMATCH;
4209                 name_len = btrfs_dir_name_len(node, di);
4210                 data_len = btrfs_dir_data_len(node, di);
4211
4212                 btrfs_dir_item_key_to_cpu(node, di, &location);
4213                 if (location.objectid != ref_key->objectid ||
4214                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4215                     location.offset != 0)
4216                         goto next;
4217
4218                 filetype = btrfs_dir_type(node, di);
4219                 if (imode_to_type(mode) != filetype)
4220                         goto next;
4221
4222                 if (name_len <= BTRFS_NAME_LEN) {
4223                         len = name_len;
4224                 } else {
4225                         len = BTRFS_NAME_LEN;
4226                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4227                         root->objectid,
4228                         key->type == BTRFS_DIR_ITEM_KEY ?
4229                         "DIR_ITEM" : "DIR_INDEX",
4230                         key->objectid, key->offset, name_len);
4231                 }
4232                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4233                 if (len != namelen || strncmp(namebuf, name, len))
4234                         goto next;
4235
4236                 ret = 0;
4237                 goto out;
4238 next:
4239                 len = sizeof(*di) + name_len + data_len;
4240                 di = (struct btrfs_dir_item *)((char *)di + len);
4241                 cur += len;
4242         }
4243         if (ret == DIR_ITEM_MISMATCH)
4244                 error(
4245                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4246                         root->objectid,
4247                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4248                         ref_key->objectid, ref_key->offset,
4249                         key->type == BTRFS_DIR_ITEM_KEY ?
4250                                 "DIR_ITEM" : "DIR_INDEX",
4251                         key->objectid, key->offset, namelen, name,
4252                         imode_to_type(mode));
4253 out:
4254         btrfs_release_path(&path);
4255         return ret;
4256 }
4257
4258 /*
4259  * Traverse the given INODE_REF and call find_dir_item() to find related
4260  * DIR_ITEM/DIR_INDEX.
4261  *
4262  * @root:       the root of the fs/file tree
4263  * @ref_key:    the key of the INODE_REF
4264  * @refs:       the count of INODE_REF
4265  * @mode:       the st_mode of INODE_ITEM
4266  *
4267  * Return 0 if no error occurred.
4268  */
4269 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4270                            struct extent_buffer *node, int slot, u64 *refs,
4271                            int mode)
4272 {
4273         struct btrfs_key key;
4274         struct btrfs_inode_ref *ref;
4275         char namebuf[BTRFS_NAME_LEN] = {0};
4276         u32 total;
4277         u32 cur = 0;
4278         u32 len;
4279         u32 name_len;
4280         u64 index;
4281         int ret, err = 0;
4282
4283         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4284         total = btrfs_item_size_nr(node, slot);
4285
4286 next:
4287         /* Update inode ref count */
4288         (*refs)++;
4289
4290         index = btrfs_inode_ref_index(node, ref);
4291         name_len = btrfs_inode_ref_name_len(node, ref);
4292         if (name_len <= BTRFS_NAME_LEN) {
4293                 len = name_len;
4294         } else {
4295                 len = BTRFS_NAME_LEN;
4296                 warning("root %llu INODE_REF[%llu %llu] name too long",
4297                         root->objectid, ref_key->objectid, ref_key->offset);
4298         }
4299
4300         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4301
4302         /* Check root dir ref name */
4303         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4304                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4305                       root->objectid, ref_key->objectid, ref_key->offset,
4306                       namebuf);
4307                 err |= ROOT_DIR_ERROR;
4308         }
4309
4310         /* Find related DIR_INDEX */
4311         key.objectid = ref_key->offset;
4312         key.type = BTRFS_DIR_INDEX_KEY;
4313         key.offset = index;
4314         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4315         err |= ret;
4316
4317         /* Find related dir_item */
4318         key.objectid = ref_key->offset;
4319         key.type = BTRFS_DIR_ITEM_KEY;
4320         key.offset = btrfs_name_hash(namebuf, len);
4321         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4322         err |= ret;
4323
4324         len = sizeof(*ref) + name_len;
4325         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4326         cur += len;
4327         if (cur < total)
4328                 goto next;
4329
4330         return err;
4331 }
4332
4333 /*
4334  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4335  * DIR_ITEM/DIR_INDEX.
4336  *
4337  * @root:       the root of the fs/file tree
4338  * @ref_key:    the key of the INODE_EXTREF
4339  * @refs:       the count of INODE_EXTREF
4340  * @mode:       the st_mode of INODE_ITEM
4341  *
4342  * Return 0 if no error occurred.
4343  */
4344 static int check_inode_extref(struct btrfs_root *root,
4345                               struct btrfs_key *ref_key,
4346                               struct extent_buffer *node, int slot, u64 *refs,
4347                               int mode)
4348 {
4349         struct btrfs_key key;
4350         struct btrfs_inode_extref *extref;
4351         char namebuf[BTRFS_NAME_LEN] = {0};
4352         u32 total;
4353         u32 cur = 0;
4354         u32 len;
4355         u32 name_len;
4356         u64 index;
4357         u64 parent;
4358         int ret;
4359         int err = 0;
4360
4361         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4362         total = btrfs_item_size_nr(node, slot);
4363
4364 next:
4365         /* update inode ref count */
4366         (*refs)++;
4367         name_len = btrfs_inode_extref_name_len(node, extref);
4368         index = btrfs_inode_extref_index(node, extref);
4369         parent = btrfs_inode_extref_parent(node, extref);
4370         if (name_len <= BTRFS_NAME_LEN) {
4371                 len = name_len;
4372         } else {
4373                 len = BTRFS_NAME_LEN;
4374                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4375                         root->objectid, ref_key->objectid, ref_key->offset);
4376         }
4377         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4378
4379         /* Check root dir ref name */
4380         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4381                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4382                       root->objectid, ref_key->objectid, ref_key->offset,
4383                       namebuf);
4384                 err |= ROOT_DIR_ERROR;
4385         }
4386
4387         /* find related dir_index */
4388         key.objectid = parent;
4389         key.type = BTRFS_DIR_INDEX_KEY;
4390         key.offset = index;
4391         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4392         err |= ret;
4393
4394         /* find related dir_item */
4395         key.objectid = parent;
4396         key.type = BTRFS_DIR_ITEM_KEY;
4397         key.offset = btrfs_name_hash(namebuf, len);
4398         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4399         err |= ret;
4400
4401         len = sizeof(*extref) + name_len;
4402         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4403         cur += len;
4404
4405         if (cur < total)
4406                 goto next;
4407
4408         return err;
4409 }
4410
4411 /*
4412  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4413  * DIR_ITEM/DIR_INDEX match.
4414  *
4415  * @root:       the root of the fs/file tree
4416  * @key:        the key of the INODE_REF/INODE_EXTREF
4417  * @name:       the name in the INODE_REF/INODE_EXTREF
4418  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4419  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4420  * to (u64)-1
4421  * @ext_ref:    the EXTENDED_IREF feature
4422  *
4423  * Return 0 if no error occurred.
4424  * Return >0 for error bitmap
4425  */
4426 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4427                           char *name, int namelen, u64 index,
4428                           unsigned int ext_ref)
4429 {
4430         struct btrfs_path path;
4431         struct btrfs_inode_ref *ref;
4432         struct btrfs_inode_extref *extref;
4433         struct extent_buffer *node;
4434         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4435         u32 total;
4436         u32 cur = 0;
4437         u32 len;
4438         u32 ref_namelen;
4439         u64 ref_index;
4440         u64 parent;
4441         u64 dir_id;
4442         int slot;
4443         int ret;
4444
4445         btrfs_init_path(&path);
4446         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4447         if (ret) {
4448                 ret = INODE_REF_MISSING;
4449                 goto extref;
4450         }
4451
4452         node = path.nodes[0];
4453         slot = path.slots[0];
4454
4455         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4456         total = btrfs_item_size_nr(node, slot);
4457
4458         /* Iterate all entry of INODE_REF */
4459         while (cur < total) {
4460                 ret = INODE_REF_MISSING;
4461
4462                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4463                 ref_index = btrfs_inode_ref_index(node, ref);
4464                 if (index != (u64)-1 && index != ref_index)
4465                         goto next_ref;
4466
4467                 if (ref_namelen <= BTRFS_NAME_LEN) {
4468                         len = ref_namelen;
4469                 } else {
4470                         len = BTRFS_NAME_LEN;
4471                         warning("root %llu INODE %s[%llu %llu] name too long",
4472                                 root->objectid,
4473                                 key->type == BTRFS_INODE_REF_KEY ?
4474                                         "REF" : "EXTREF",
4475                                 key->objectid, key->offset);
4476                 }
4477                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4478                                    len);
4479
4480                 if (len != namelen || strncmp(ref_namebuf, name, len))
4481                         goto next_ref;
4482
4483                 ret = 0;
4484                 goto out;
4485 next_ref:
4486                 len = sizeof(*ref) + ref_namelen;
4487                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4488                 cur += len;
4489         }
4490
4491 extref:
4492         /* Skip if not support EXTENDED_IREF feature */
4493         if (!ext_ref)
4494                 goto out;
4495
4496         btrfs_release_path(&path);
4497         btrfs_init_path(&path);
4498
4499         dir_id = key->offset;
4500         key->type = BTRFS_INODE_EXTREF_KEY;
4501         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4502
4503         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4504         if (ret) {
4505                 ret = INODE_REF_MISSING;
4506                 goto out;
4507         }
4508
4509         node = path.nodes[0];
4510         slot = path.slots[0];
4511
4512         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4513         cur = 0;
4514         total = btrfs_item_size_nr(node, slot);
4515
4516         /* Iterate all entry of INODE_EXTREF */
4517         while (cur < total) {
4518                 ret = INODE_REF_MISSING;
4519
4520                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4521                 ref_index = btrfs_inode_extref_index(node, extref);
4522                 parent = btrfs_inode_extref_parent(node, extref);
4523                 if (index != (u64)-1 && index != ref_index)
4524                         goto next_extref;
4525
4526                 if (parent != dir_id)
4527                         goto next_extref;
4528
4529                 if (ref_namelen <= BTRFS_NAME_LEN) {
4530                         len = ref_namelen;
4531                 } else {
4532                         len = BTRFS_NAME_LEN;
4533                         warning("root %llu INODE %s[%llu %llu] name too long",
4534                                 root->objectid,
4535                                 key->type == BTRFS_INODE_REF_KEY ?
4536                                         "REF" : "EXTREF",
4537                                 key->objectid, key->offset);
4538                 }
4539                 read_extent_buffer(node, ref_namebuf,
4540                                    (unsigned long)(extref + 1), len);
4541
4542                 if (len != namelen || strncmp(ref_namebuf, name, len))
4543                         goto next_extref;
4544
4545                 ret = 0;
4546                 goto out;
4547
4548 next_extref:
4549                 len = sizeof(*extref) + ref_namelen;
4550                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4551                 cur += len;
4552
4553         }
4554 out:
4555         btrfs_release_path(&path);
4556         return ret;
4557 }
4558
4559 /*
4560  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4561  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4562  *
4563  * @root:       the root of the fs/file tree
4564  * @key:        the key of the INODE_REF/INODE_EXTREF
4565  * @size:       the st_size of the INODE_ITEM
4566  * @ext_ref:    the EXTENDED_IREF feature
4567  *
4568  * Return 0 if no error occurred.
4569  */
4570 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4571                           struct extent_buffer *node, int slot, u64 *size,
4572                           unsigned int ext_ref)
4573 {
4574         struct btrfs_dir_item *di;
4575         struct btrfs_inode_item *ii;
4576         struct btrfs_path path;
4577         struct btrfs_key location;
4578         char namebuf[BTRFS_NAME_LEN] = {0};
4579         u32 total;
4580         u32 cur = 0;
4581         u32 len;
4582         u32 name_len;
4583         u32 data_len;
4584         u8 filetype;
4585         u32 mode;
4586         u64 index;
4587         int ret;
4588         int err = 0;
4589
4590         /*
4591          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4592          * ignore index check.
4593          */
4594         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4595
4596         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4597         total = btrfs_item_size_nr(node, slot);
4598
4599         while (cur < total) {
4600                 data_len = btrfs_dir_data_len(node, di);
4601                 if (data_len)
4602                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4603                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4604                               "DIR_ITEM" : "DIR_INDEX",
4605                               key->objectid, key->offset, data_len);
4606
4607                 name_len = btrfs_dir_name_len(node, di);
4608                 if (name_len <= BTRFS_NAME_LEN) {
4609                         len = name_len;
4610                 } else {
4611                         len = BTRFS_NAME_LEN;
4612                         warning("root %llu %s[%llu %llu] name too long",
4613                                 root->objectid,
4614                                 key->type == BTRFS_DIR_ITEM_KEY ?
4615                                 "DIR_ITEM" : "DIR_INDEX",
4616                                 key->objectid, key->offset);
4617                 }
4618                 (*size) += name_len;
4619
4620                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4621                 filetype = btrfs_dir_type(node, di);
4622
4623                 btrfs_init_path(&path);
4624                 btrfs_dir_item_key_to_cpu(node, di, &location);
4625
4626                 /* Ignore related ROOT_ITEM check */
4627                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4628                         goto next;
4629
4630                 /* Check relative INODE_ITEM(existence/filetype) */
4631                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4632                 if (ret) {
4633                         err |= INODE_ITEM_MISSING;
4634                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4635                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4636                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4637                               key->offset, location.objectid, name_len,
4638                               namebuf, filetype);
4639                         goto next;
4640                 }
4641
4642                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4643                                     struct btrfs_inode_item);
4644                 mode = btrfs_inode_mode(path.nodes[0], ii);
4645
4646                 if (imode_to_type(mode) != filetype) {
4647                         err |= INODE_ITEM_MISMATCH;
4648                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4649                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4650                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4651                               key->offset, name_len, namebuf, filetype);
4652                 }
4653
4654                 /* Check relative INODE_REF/INODE_EXTREF */
4655                 location.type = BTRFS_INODE_REF_KEY;
4656                 location.offset = key->objectid;
4657                 ret = find_inode_ref(root, &location, namebuf, len,
4658                                        index, ext_ref);
4659                 err |= ret;
4660                 if (ret & INODE_REF_MISSING)
4661                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4662                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4663                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4664                               key->offset, name_len, namebuf, filetype);
4665
4666 next:
4667                 btrfs_release_path(&path);
4668                 len = sizeof(*di) + name_len + data_len;
4669                 di = (struct btrfs_dir_item *)((char *)di + len);
4670                 cur += len;
4671
4672                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4673                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4674                               root->objectid, key->objectid, key->offset);
4675                         break;
4676                 }
4677         }
4678
4679         return err;
4680 }
4681
4682 /*
4683  * Check file extent datasum/hole, update the size of the file extents,
4684  * check and update the last offset of the file extent.
4685  *
4686  * @root:       the root of fs/file tree.
4687  * @fkey:       the key of the file extent.
4688  * @nodatasum:  INODE_NODATASUM feature.
4689  * @size:       the sum of all EXTENT_DATA items size for this inode.
4690  * @end:        the offset of the last extent.
4691  *
4692  * Return 0 if no error occurred.
4693  */
4694 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4695                              struct extent_buffer *node, int slot,
4696                              unsigned int nodatasum, u64 *size, u64 *end)
4697 {
4698         struct btrfs_file_extent_item *fi;
4699         u64 disk_bytenr;
4700         u64 disk_num_bytes;
4701         u64 extent_num_bytes;
4702         u64 found;
4703         unsigned int extent_type;
4704         unsigned int is_hole;
4705         int ret;
4706         int err = 0;
4707
4708         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4709
4710         extent_type = btrfs_file_extent_type(node, fi);
4711         /* Skip if file extent is inline */
4712         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4713                 struct btrfs_item *e = btrfs_item_nr(slot);
4714                 u32 item_inline_len;
4715
4716                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4717                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4718                 if (extent_num_bytes == 0 ||
4719                     extent_num_bytes != item_inline_len)
4720                         err |= FILE_EXTENT_ERROR;
4721                 *size += extent_num_bytes;
4722                 return err;
4723         }
4724
4725         /* Check extent type */
4726         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4727                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4728                 err |= FILE_EXTENT_ERROR;
4729                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4730                       root->objectid, fkey->objectid, fkey->offset);
4731                 return err;
4732         }
4733
4734         /* Check REG_EXTENT/PREALLOC_EXTENT */
4735         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4736         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4737         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4738         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4739
4740         /* Check EXTENT_DATA datasum */
4741         ret = count_csum_range(root, disk_bytenr, disk_num_bytes, &found);
4742         if (found > 0 && nodatasum) {
4743                 err |= ODD_CSUM_ITEM;
4744                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4745                       root->objectid, fkey->objectid, fkey->offset);
4746         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4747                    !is_hole &&
4748                    (ret < 0 || found == 0 || found < disk_num_bytes)) {
4749                 err |= CSUM_ITEM_MISSING;
4750                 error("root %llu EXTENT_DATA[%llu %llu] datasum missing",
4751                       root->objectid, fkey->objectid, fkey->offset);
4752         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && found > 0) {
4753                 err |= ODD_CSUM_ITEM;
4754                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have datasum",
4755                       root->objectid, fkey->objectid, fkey->offset);
4756         }
4757
4758         /* Check EXTENT_DATA hole */
4759         if (no_holes && is_hole) {
4760                 err |= FILE_EXTENT_ERROR;
4761                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4762                       root->objectid, fkey->objectid, fkey->offset);
4763         } else if (!no_holes && *end != fkey->offset) {
4764                 err |= FILE_EXTENT_ERROR;
4765                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4766                       root->objectid, fkey->objectid, fkey->offset);
4767         }
4768
4769         *end += extent_num_bytes;
4770         if (!is_hole)
4771                 *size += extent_num_bytes;
4772
4773         return err;
4774 }
4775
4776 /*
4777  * Check INODE_ITEM and related ITEMs (the same inode number)
4778  * 1. check link count
4779  * 2. check inode ref/extref
4780  * 3. check dir item/index
4781  *
4782  * @ext_ref:    the EXTENDED_IREF feature
4783  *
4784  * Return 0 if no error occurred.
4785  * Return >0 for error or hit the traversal is done(by error bitmap)
4786  */
4787 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4788                             unsigned int ext_ref)
4789 {
4790         struct extent_buffer *node;
4791         struct btrfs_inode_item *ii;
4792         struct btrfs_key key;
4793         u64 inode_id;
4794         u32 mode;
4795         u64 nlink;
4796         u64 nbytes;
4797         u64 isize;
4798         u64 size = 0;
4799         u64 refs = 0;
4800         u64 extent_end = 0;
4801         u64 extent_size = 0;
4802         unsigned int dir;
4803         unsigned int nodatasum;
4804         int slot;
4805         int ret;
4806         int err = 0;
4807
4808         node = path->nodes[0];
4809         slot = path->slots[0];
4810
4811         btrfs_item_key_to_cpu(node, &key, slot);
4812         inode_id = key.objectid;
4813
4814         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4815                 ret = btrfs_next_item(root, path);
4816                 if (ret > 0)
4817                         err |= LAST_ITEM;
4818                 return err;
4819         }
4820
4821         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4822         isize = btrfs_inode_size(node, ii);
4823         nbytes = btrfs_inode_nbytes(node, ii);
4824         mode = btrfs_inode_mode(node, ii);
4825         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4826         nlink = btrfs_inode_nlink(node, ii);
4827         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4828
4829         while (1) {
4830                 ret = btrfs_next_item(root, path);
4831                 if (ret < 0) {
4832                         /* out will fill 'err' rusing current statistics */
4833                         goto out;
4834                 } else if (ret > 0) {
4835                         err |= LAST_ITEM;
4836                         goto out;
4837                 }
4838
4839                 node = path->nodes[0];
4840                 slot = path->slots[0];
4841                 btrfs_item_key_to_cpu(node, &key, slot);
4842                 if (key.objectid != inode_id)
4843                         goto out;
4844
4845                 switch (key.type) {
4846                 case BTRFS_INODE_REF_KEY:
4847                         ret = check_inode_ref(root, &key, node, slot, &refs,
4848                                               mode);
4849                         err |= ret;
4850                         break;
4851                 case BTRFS_INODE_EXTREF_KEY:
4852                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4853                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4854                                         root->objectid, key.objectid,
4855                                         key.offset);
4856                         ret = check_inode_extref(root, &key, node, slot, &refs,
4857                                                  mode);
4858                         err |= ret;
4859                         break;
4860                 case BTRFS_DIR_ITEM_KEY:
4861                 case BTRFS_DIR_INDEX_KEY:
4862                         if (!dir) {
4863                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4864                                         root->objectid, inode_id,
4865                                         imode_to_type(mode), key.objectid,
4866                                         key.offset);
4867                         }
4868                         ret = check_dir_item(root, &key, node, slot, &size,
4869                                              ext_ref);
4870                         err |= ret;
4871                         break;
4872                 case BTRFS_EXTENT_DATA_KEY:
4873                         if (dir) {
4874                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4875                                         root->objectid, inode_id, key.objectid,
4876                                         key.offset);
4877                         }
4878                         ret = check_file_extent(root, &key, node, slot,
4879                                                 nodatasum, &extent_size,
4880                                                 &extent_end);
4881                         err |= ret;
4882                         break;
4883                 case BTRFS_XATTR_ITEM_KEY:
4884                         break;
4885                 default:
4886                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4887                               key.objectid, key.type, key.offset);
4888                 }
4889         }
4890
4891 out:
4892         /* verify INODE_ITEM nlink/isize/nbytes */
4893         if (dir) {
4894                 if (nlink != 1) {
4895                         err |= LINK_COUNT_ERROR;
4896                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4897                               root->objectid, inode_id, nlink);
4898                 }
4899
4900                 /*
4901                  * Just a warning, as dir inode nbytes is just an
4902                  * instructive value.
4903                  */
4904                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4905                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4906                                 root->objectid, inode_id, root->nodesize);
4907                 }
4908
4909                 if (isize != size) {
4910                         err |= ISIZE_ERROR;
4911                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4912                               root->objectid, inode_id, isize, size);
4913                 }
4914         } else {
4915                 if (nlink != refs) {
4916                         err |= LINK_COUNT_ERROR;
4917                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4918                               root->objectid, inode_id, nlink, refs);
4919                 } else if (!nlink) {
4920                         err |= ORPHAN_ITEM;
4921                 }
4922
4923                 if (!nbytes && !no_holes && extent_end < isize) {
4924                         err |= NBYTES_ERROR;
4925                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4926                               root->objectid, inode_id, isize);
4927                 }
4928
4929                 if (nbytes != extent_size) {
4930                         err |= NBYTES_ERROR;
4931                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4932                               root->objectid, inode_id, nbytes, extent_size);
4933                 }
4934         }
4935
4936         return err;
4937 }
4938
4939 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4940 {
4941         struct btrfs_path path;
4942         struct btrfs_key key;
4943         int err = 0;
4944         int ret;
4945
4946         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4947         key.type = BTRFS_INODE_ITEM_KEY;
4948         key.offset = 0;
4949
4950         /* For root being dropped, we don't need to check first inode */
4951         if (btrfs_root_refs(&root->root_item) == 0 &&
4952             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
4953             key.objectid)
4954                 return 0;
4955
4956         btrfs_init_path(&path);
4957
4958         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4959         if (ret < 0)
4960                 goto out;
4961         if (ret > 0) {
4962                 ret = 0;
4963                 err |= INODE_ITEM_MISSING;
4964         }
4965
4966         err |= check_inode_item(root, &path, ext_ref);
4967         err &= ~LAST_ITEM;
4968         if (err && !ret)
4969                 ret = -EIO;
4970 out:
4971         btrfs_release_path(&path);
4972         return ret;
4973 }
4974
4975 /*
4976  * Iterate all item on the tree and call check_inode_item() to check.
4977  *
4978  * @root:       the root of the tree to be checked.
4979  * @ext_ref:    the EXTENDED_IREF feature
4980  *
4981  * Return 0 if no error found.
4982  * Return <0 for error.
4983  */
4984 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
4985 {
4986         struct btrfs_path path;
4987         struct node_refs nrefs;
4988         struct btrfs_root_item *root_item = &root->root_item;
4989         int ret, wret;
4990         int level;
4991
4992         /*
4993          * We need to manually check the first inode item(256)
4994          * As the following traversal function will only start from
4995          * the first inode item in the leaf, if inode item(256) is missing
4996          * we will just skip it forever.
4997          */
4998         ret = check_fs_first_inode(root, ext_ref);
4999         if (ret < 0)
5000                 return ret;
5001
5002         memset(&nrefs, 0, sizeof(nrefs));
5003         level = btrfs_header_level(root->node);
5004         btrfs_init_path(&path);
5005
5006         if (btrfs_root_refs(root_item) > 0 ||
5007             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5008                 path.nodes[level] = root->node;
5009                 path.slots[level] = 0;
5010                 extent_buffer_get(root->node);
5011         } else {
5012                 struct btrfs_key key;
5013
5014                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5015                 level = root_item->drop_level;
5016                 path.lowest_level = level;
5017                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5018                 if (ret < 0)
5019                         goto out;
5020                 ret = 0;
5021         }
5022
5023         while (1) {
5024                 wret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5025                 if (wret < 0)
5026                         ret = wret;
5027                 if (wret != 0)
5028                         break;
5029
5030                 wret = walk_up_tree_v2(root, &path, &level);
5031                 if (wret < 0)
5032                         ret = wret;
5033                 if (wret != 0)
5034                         break;
5035         }
5036
5037 out:
5038         btrfs_release_path(&path);
5039         return ret;
5040 }
5041
5042 /*
5043  * Find the relative ref for root_ref and root_backref.
5044  *
5045  * @root:       the root of the root tree.
5046  * @ref_key:    the key of the root ref.
5047  *
5048  * Return 0 if no error occurred.
5049  */
5050 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5051                           struct extent_buffer *node, int slot)
5052 {
5053         struct btrfs_path path;
5054         struct btrfs_key key;
5055         struct btrfs_root_ref *ref;
5056         struct btrfs_root_ref *backref;
5057         char ref_name[BTRFS_NAME_LEN] = {0};
5058         char backref_name[BTRFS_NAME_LEN] = {0};
5059         u64 ref_dirid;
5060         u64 ref_seq;
5061         u32 ref_namelen;
5062         u64 backref_dirid;
5063         u64 backref_seq;
5064         u32 backref_namelen;
5065         u32 len;
5066         int ret;
5067         int err = 0;
5068
5069         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5070         ref_dirid = btrfs_root_ref_dirid(node, ref);
5071         ref_seq = btrfs_root_ref_sequence(node, ref);
5072         ref_namelen = btrfs_root_ref_name_len(node, ref);
5073
5074         if (ref_namelen <= BTRFS_NAME_LEN) {
5075                 len = ref_namelen;
5076         } else {
5077                 len = BTRFS_NAME_LEN;
5078                 warning("%s[%llu %llu] ref_name too long",
5079                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5080                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5081                         ref_key->offset);
5082         }
5083         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5084
5085         /* Find relative root_ref */
5086         key.objectid = ref_key->offset;
5087         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5088         key.offset = ref_key->objectid;
5089
5090         btrfs_init_path(&path);
5091         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5092         if (ret) {
5093                 err |= ROOT_REF_MISSING;
5094                 error("%s[%llu %llu] couldn't find relative ref",
5095                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5096                       "ROOT_REF" : "ROOT_BACKREF",
5097                       ref_key->objectid, ref_key->offset);
5098                 goto out;
5099         }
5100
5101         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5102                                  struct btrfs_root_ref);
5103         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5104         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5105         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5106
5107         if (backref_namelen <= BTRFS_NAME_LEN) {
5108                 len = backref_namelen;
5109         } else {
5110                 len = BTRFS_NAME_LEN;
5111                 warning("%s[%llu %llu] ref_name too long",
5112                         key.type == BTRFS_ROOT_REF_KEY ?
5113                         "ROOT_REF" : "ROOT_BACKREF",
5114                         key.objectid, key.offset);
5115         }
5116         read_extent_buffer(path.nodes[0], backref_name,
5117                            (unsigned long)(backref + 1), len);
5118
5119         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5120             ref_namelen != backref_namelen ||
5121             strncmp(ref_name, backref_name, len)) {
5122                 err |= ROOT_REF_MISMATCH;
5123                 error("%s[%llu %llu] mismatch relative ref",
5124                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5125                       "ROOT_REF" : "ROOT_BACKREF",
5126                       ref_key->objectid, ref_key->offset);
5127         }
5128 out:
5129         btrfs_release_path(&path);
5130         return err;
5131 }
5132
5133 /*
5134  * Check all fs/file tree in low_memory mode.
5135  *
5136  * 1. for fs tree root item, call check_fs_root_v2()
5137  * 2. for fs tree root ref/backref, call check_root_ref()
5138  *
5139  * Return 0 if no error occurred.
5140  */
5141 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5142 {
5143         struct btrfs_root *tree_root = fs_info->tree_root;
5144         struct btrfs_root *cur_root = NULL;
5145         struct btrfs_path path;
5146         struct btrfs_key key;
5147         struct extent_buffer *node;
5148         unsigned int ext_ref;
5149         int slot;
5150         int ret;
5151         int err = 0;
5152
5153         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5154
5155         btrfs_init_path(&path);
5156         key.objectid = BTRFS_FS_TREE_OBJECTID;
5157         key.offset = 0;
5158         key.type = BTRFS_ROOT_ITEM_KEY;
5159
5160         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5161         if (ret < 0) {
5162                 err = ret;
5163                 goto out;
5164         } else if (ret > 0) {
5165                 err = -ENOENT;
5166                 goto out;
5167         }
5168
5169         while (1) {
5170                 node = path.nodes[0];
5171                 slot = path.slots[0];
5172                 btrfs_item_key_to_cpu(node, &key, slot);
5173                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5174                         goto out;
5175                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5176                     fs_root_objectid(key.objectid)) {
5177                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5178                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5179                                                                        &key);
5180                         } else {
5181                                 key.offset = (u64)-1;
5182                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5183                         }
5184
5185                         if (IS_ERR(cur_root)) {
5186                                 error("Fail to read fs/subvol tree: %lld",
5187                                       key.objectid);
5188                                 err = -EIO;
5189                                 goto next;
5190                         }
5191
5192                         ret = check_fs_root_v2(cur_root, ext_ref);
5193                         err |= ret;
5194
5195                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5196                                 btrfs_free_fs_root(cur_root);
5197                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5198                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5199                         ret = check_root_ref(tree_root, &key, node, slot);
5200                         err |= ret;
5201                 }
5202 next:
5203                 ret = btrfs_next_item(tree_root, &path);
5204                 if (ret > 0)
5205                         goto out;
5206                 if (ret < 0) {
5207                         err = ret;
5208                         goto out;
5209                 }
5210         }
5211
5212 out:
5213         btrfs_release_path(&path);
5214         return err;
5215 }
5216
5217 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5218 {
5219         struct list_head *cur = rec->backrefs.next;
5220         struct extent_backref *back;
5221         struct tree_backref *tback;
5222         struct data_backref *dback;
5223         u64 found = 0;
5224         int err = 0;
5225
5226         while(cur != &rec->backrefs) {
5227                 back = to_extent_backref(cur);
5228                 cur = cur->next;
5229                 if (!back->found_extent_tree) {
5230                         err = 1;
5231                         if (!print_errs)
5232                                 goto out;
5233                         if (back->is_data) {
5234                                 dback = to_data_backref(back);
5235                                 fprintf(stderr, "Backref %llu %s %llu"
5236                                         " owner %llu offset %llu num_refs %lu"
5237                                         " not found in extent tree\n",
5238                                         (unsigned long long)rec->start,
5239                                         back->full_backref ?
5240                                         "parent" : "root",
5241                                         back->full_backref ?
5242                                         (unsigned long long)dback->parent:
5243                                         (unsigned long long)dback->root,
5244                                         (unsigned long long)dback->owner,
5245                                         (unsigned long long)dback->offset,
5246                                         (unsigned long)dback->num_refs);
5247                         } else {
5248                                 tback = to_tree_backref(back);
5249                                 fprintf(stderr, "Backref %llu parent %llu"
5250                                         " root %llu not found in extent tree\n",
5251                                         (unsigned long long)rec->start,
5252                                         (unsigned long long)tback->parent,
5253                                         (unsigned long long)tback->root);
5254                         }
5255                 }
5256                 if (!back->is_data && !back->found_ref) {
5257                         err = 1;
5258                         if (!print_errs)
5259                                 goto out;
5260                         tback = to_tree_backref(back);
5261                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5262                                 (unsigned long long)rec->start,
5263                                 back->full_backref ? "parent" : "root",
5264                                 back->full_backref ?
5265                                 (unsigned long long)tback->parent :
5266                                 (unsigned long long)tback->root, back);
5267                 }
5268                 if (back->is_data) {
5269                         dback = to_data_backref(back);
5270                         if (dback->found_ref != dback->num_refs) {
5271                                 err = 1;
5272                                 if (!print_errs)
5273                                         goto out;
5274                                 fprintf(stderr, "Incorrect local backref count"
5275                                         " on %llu %s %llu owner %llu"
5276                                         " offset %llu found %u wanted %u back %p\n",
5277                                         (unsigned long long)rec->start,
5278                                         back->full_backref ?
5279                                         "parent" : "root",
5280                                         back->full_backref ?
5281                                         (unsigned long long)dback->parent:
5282                                         (unsigned long long)dback->root,
5283                                         (unsigned long long)dback->owner,
5284                                         (unsigned long long)dback->offset,
5285                                         dback->found_ref, dback->num_refs, back);
5286                         }
5287                         if (dback->disk_bytenr != rec->start) {
5288                                 err = 1;
5289                                 if (!print_errs)
5290                                         goto out;
5291                                 fprintf(stderr, "Backref disk bytenr does not"
5292                                         " match extent record, bytenr=%llu, "
5293                                         "ref bytenr=%llu\n",
5294                                         (unsigned long long)rec->start,
5295                                         (unsigned long long)dback->disk_bytenr);
5296                         }
5297
5298                         if (dback->bytes != rec->nr) {
5299                                 err = 1;
5300                                 if (!print_errs)
5301                                         goto out;
5302                                 fprintf(stderr, "Backref bytes do not match "
5303                                         "extent backref, bytenr=%llu, ref "
5304                                         "bytes=%llu, backref bytes=%llu\n",
5305                                         (unsigned long long)rec->start,
5306                                         (unsigned long long)rec->nr,
5307                                         (unsigned long long)dback->bytes);
5308                         }
5309                 }
5310                 if (!back->is_data) {
5311                         found += 1;
5312                 } else {
5313                         dback = to_data_backref(back);
5314                         found += dback->found_ref;
5315                 }
5316         }
5317         if (found != rec->refs) {
5318                 err = 1;
5319                 if (!print_errs)
5320                         goto out;
5321                 fprintf(stderr, "Incorrect global backref count "
5322                         "on %llu found %llu wanted %llu\n",
5323                         (unsigned long long)rec->start,
5324                         (unsigned long long)found,
5325                         (unsigned long long)rec->refs);
5326         }
5327 out:
5328         return err;
5329 }
5330
5331 static int free_all_extent_backrefs(struct extent_record *rec)
5332 {
5333         struct extent_backref *back;
5334         struct list_head *cur;
5335         while (!list_empty(&rec->backrefs)) {
5336                 cur = rec->backrefs.next;
5337                 back = to_extent_backref(cur);
5338                 list_del(cur);
5339                 free(back);
5340         }
5341         return 0;
5342 }
5343
5344 static void free_extent_record_cache(struct cache_tree *extent_cache)
5345 {
5346         struct cache_extent *cache;
5347         struct extent_record *rec;
5348
5349         while (1) {
5350                 cache = first_cache_extent(extent_cache);
5351                 if (!cache)
5352                         break;
5353                 rec = container_of(cache, struct extent_record, cache);
5354                 remove_cache_extent(extent_cache, cache);
5355                 free_all_extent_backrefs(rec);
5356                 free(rec);
5357         }
5358 }
5359
5360 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5361                                  struct extent_record *rec)
5362 {
5363         if (rec->content_checked && rec->owner_ref_checked &&
5364             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5365             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5366             !rec->bad_full_backref && !rec->crossing_stripes &&
5367             !rec->wrong_chunk_type) {
5368                 remove_cache_extent(extent_cache, &rec->cache);
5369                 free_all_extent_backrefs(rec);
5370                 list_del_init(&rec->list);
5371                 free(rec);
5372         }
5373         return 0;
5374 }
5375
5376 static int check_owner_ref(struct btrfs_root *root,
5377                             struct extent_record *rec,
5378                             struct extent_buffer *buf)
5379 {
5380         struct extent_backref *node;
5381         struct tree_backref *back;
5382         struct btrfs_root *ref_root;
5383         struct btrfs_key key;
5384         struct btrfs_path path;
5385         struct extent_buffer *parent;
5386         int level;
5387         int found = 0;
5388         int ret;
5389
5390         list_for_each_entry(node, &rec->backrefs, list) {
5391                 if (node->is_data)
5392                         continue;
5393                 if (!node->found_ref)
5394                         continue;
5395                 if (node->full_backref)
5396                         continue;
5397                 back = to_tree_backref(node);
5398                 if (btrfs_header_owner(buf) == back->root)
5399                         return 0;
5400         }
5401         BUG_ON(rec->is_root);
5402
5403         /* try to find the block by search corresponding fs tree */
5404         key.objectid = btrfs_header_owner(buf);
5405         key.type = BTRFS_ROOT_ITEM_KEY;
5406         key.offset = (u64)-1;
5407
5408         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5409         if (IS_ERR(ref_root))
5410                 return 1;
5411
5412         level = btrfs_header_level(buf);
5413         if (level == 0)
5414                 btrfs_item_key_to_cpu(buf, &key, 0);
5415         else
5416                 btrfs_node_key_to_cpu(buf, &key, 0);
5417
5418         btrfs_init_path(&path);
5419         path.lowest_level = level + 1;
5420         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5421         if (ret < 0)
5422                 return 0;
5423
5424         parent = path.nodes[level + 1];
5425         if (parent && buf->start == btrfs_node_blockptr(parent,
5426                                                         path.slots[level + 1]))
5427                 found = 1;
5428
5429         btrfs_release_path(&path);
5430         return found ? 0 : 1;
5431 }
5432
5433 static int is_extent_tree_record(struct extent_record *rec)
5434 {
5435         struct list_head *cur = rec->backrefs.next;
5436         struct extent_backref *node;
5437         struct tree_backref *back;
5438         int is_extent = 0;
5439
5440         while(cur != &rec->backrefs) {
5441                 node = to_extent_backref(cur);
5442                 cur = cur->next;
5443                 if (node->is_data)
5444                         return 0;
5445                 back = to_tree_backref(node);
5446                 if (node->full_backref)
5447                         return 0;
5448                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5449                         is_extent = 1;
5450         }
5451         return is_extent;
5452 }
5453
5454
5455 static int record_bad_block_io(struct btrfs_fs_info *info,
5456                                struct cache_tree *extent_cache,
5457                                u64 start, u64 len)
5458 {
5459         struct extent_record *rec;
5460         struct cache_extent *cache;
5461         struct btrfs_key key;
5462
5463         cache = lookup_cache_extent(extent_cache, start, len);
5464         if (!cache)
5465                 return 0;
5466
5467         rec = container_of(cache, struct extent_record, cache);
5468         if (!is_extent_tree_record(rec))
5469                 return 0;
5470
5471         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5472         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5473 }
5474
5475 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5476                        struct extent_buffer *buf, int slot)
5477 {
5478         if (btrfs_header_level(buf)) {
5479                 struct btrfs_key_ptr ptr1, ptr2;
5480
5481                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5482                                    sizeof(struct btrfs_key_ptr));
5483                 read_extent_buffer(buf, &ptr2,
5484                                    btrfs_node_key_ptr_offset(slot + 1),
5485                                    sizeof(struct btrfs_key_ptr));
5486                 write_extent_buffer(buf, &ptr1,
5487                                     btrfs_node_key_ptr_offset(slot + 1),
5488                                     sizeof(struct btrfs_key_ptr));
5489                 write_extent_buffer(buf, &ptr2,
5490                                     btrfs_node_key_ptr_offset(slot),
5491                                     sizeof(struct btrfs_key_ptr));
5492                 if (slot == 0) {
5493                         struct btrfs_disk_key key;
5494                         btrfs_node_key(buf, &key, 0);
5495                         btrfs_fixup_low_keys(root, path, &key,
5496                                              btrfs_header_level(buf) + 1);
5497                 }
5498         } else {
5499                 struct btrfs_item *item1, *item2;
5500                 struct btrfs_key k1, k2;
5501                 char *item1_data, *item2_data;
5502                 u32 item1_offset, item2_offset, item1_size, item2_size;
5503
5504                 item1 = btrfs_item_nr(slot);
5505                 item2 = btrfs_item_nr(slot + 1);
5506                 btrfs_item_key_to_cpu(buf, &k1, slot);
5507                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5508                 item1_offset = btrfs_item_offset(buf, item1);
5509                 item2_offset = btrfs_item_offset(buf, item2);
5510                 item1_size = btrfs_item_size(buf, item1);
5511                 item2_size = btrfs_item_size(buf, item2);
5512
5513                 item1_data = malloc(item1_size);
5514                 if (!item1_data)
5515                         return -ENOMEM;
5516                 item2_data = malloc(item2_size);
5517                 if (!item2_data) {
5518                         free(item1_data);
5519                         return -ENOMEM;
5520                 }
5521
5522                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5523                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5524
5525                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5526                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5527                 free(item1_data);
5528                 free(item2_data);
5529
5530                 btrfs_set_item_offset(buf, item1, item2_offset);
5531                 btrfs_set_item_offset(buf, item2, item1_offset);
5532                 btrfs_set_item_size(buf, item1, item2_size);
5533                 btrfs_set_item_size(buf, item2, item1_size);
5534
5535                 path->slots[0] = slot;
5536                 btrfs_set_item_key_unsafe(root, path, &k2);
5537                 path->slots[0] = slot + 1;
5538                 btrfs_set_item_key_unsafe(root, path, &k1);
5539         }
5540         return 0;
5541 }
5542
5543 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5544 {
5545         struct extent_buffer *buf;
5546         struct btrfs_key k1, k2;
5547         int i;
5548         int level = path->lowest_level;
5549         int ret = -EIO;
5550
5551         buf = path->nodes[level];
5552         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5553                 if (level) {
5554                         btrfs_node_key_to_cpu(buf, &k1, i);
5555                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5556                 } else {
5557                         btrfs_item_key_to_cpu(buf, &k1, i);
5558                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5559                 }
5560                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5561                         continue;
5562                 ret = swap_values(root, path, buf, i);
5563                 if (ret)
5564                         break;
5565                 btrfs_mark_buffer_dirty(buf);
5566                 i = 0;
5567         }
5568         return ret;
5569 }
5570
5571 static int delete_bogus_item(struct btrfs_root *root,
5572                              struct btrfs_path *path,
5573                              struct extent_buffer *buf, int slot)
5574 {
5575         struct btrfs_key key;
5576         int nritems = btrfs_header_nritems(buf);
5577
5578         btrfs_item_key_to_cpu(buf, &key, slot);
5579
5580         /* These are all the keys we can deal with missing. */
5581         if (key.type != BTRFS_DIR_INDEX_KEY &&
5582             key.type != BTRFS_EXTENT_ITEM_KEY &&
5583             key.type != BTRFS_METADATA_ITEM_KEY &&
5584             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5585             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5586                 return -1;
5587
5588         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5589                (unsigned long long)key.objectid, key.type,
5590                (unsigned long long)key.offset, slot, buf->start);
5591         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5592                               btrfs_item_nr_offset(slot + 1),
5593                               sizeof(struct btrfs_item) *
5594                               (nritems - slot - 1));
5595         btrfs_set_header_nritems(buf, nritems - 1);
5596         if (slot == 0) {
5597                 struct btrfs_disk_key disk_key;
5598
5599                 btrfs_item_key(buf, &disk_key, 0);
5600                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5601         }
5602         btrfs_mark_buffer_dirty(buf);
5603         return 0;
5604 }
5605
5606 static int fix_item_offset(struct btrfs_trans_handle *trans,
5607                            struct btrfs_root *root,
5608                            struct btrfs_path *path)
5609 {
5610         struct extent_buffer *buf;
5611         int i;
5612         int ret = 0;
5613
5614         /* We should only get this for leaves */
5615         BUG_ON(path->lowest_level);
5616         buf = path->nodes[0];
5617 again:
5618         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5619                 unsigned int shift = 0, offset;
5620
5621                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5622                     BTRFS_LEAF_DATA_SIZE(root)) {
5623                         if (btrfs_item_end_nr(buf, i) >
5624                             BTRFS_LEAF_DATA_SIZE(root)) {
5625                                 ret = delete_bogus_item(root, path, buf, i);
5626                                 if (!ret)
5627                                         goto again;
5628                                 fprintf(stderr, "item is off the end of the "
5629                                         "leaf, can't fix\n");
5630                                 ret = -EIO;
5631                                 break;
5632                         }
5633                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5634                                 btrfs_item_end_nr(buf, i);
5635                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5636                            btrfs_item_offset_nr(buf, i - 1)) {
5637                         if (btrfs_item_end_nr(buf, i) >
5638                             btrfs_item_offset_nr(buf, i - 1)) {
5639                                 ret = delete_bogus_item(root, path, buf, i);
5640                                 if (!ret)
5641                                         goto again;
5642                                 fprintf(stderr, "items overlap, can't fix\n");
5643                                 ret = -EIO;
5644                                 break;
5645                         }
5646                         shift = btrfs_item_offset_nr(buf, i - 1) -
5647                                 btrfs_item_end_nr(buf, i);
5648                 }
5649                 if (!shift)
5650                         continue;
5651
5652                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5653                        i, shift, (unsigned long long)buf->start);
5654                 offset = btrfs_item_offset_nr(buf, i);
5655                 memmove_extent_buffer(buf,
5656                                       btrfs_leaf_data(buf) + offset + shift,
5657                                       btrfs_leaf_data(buf) + offset,
5658                                       btrfs_item_size_nr(buf, i));
5659                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5660                                       offset + shift);
5661                 btrfs_mark_buffer_dirty(buf);
5662         }
5663
5664         /*
5665          * We may have moved things, in which case we want to exit so we don't
5666          * write those changes out.  Once we have proper abort functionality in
5667          * progs this can be changed to something nicer.
5668          */
5669         BUG_ON(ret);
5670         return ret;
5671 }
5672
5673 /*
5674  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5675  * then just return -EIO.
5676  */
5677 static int try_to_fix_bad_block(struct btrfs_root *root,
5678                                 struct extent_buffer *buf,
5679                                 enum btrfs_tree_block_status status)
5680 {
5681         struct btrfs_trans_handle *trans;
5682         struct ulist *roots;
5683         struct ulist_node *node;
5684         struct btrfs_root *search_root;
5685         struct btrfs_path path;
5686         struct ulist_iterator iter;
5687         struct btrfs_key root_key, key;
5688         int ret;
5689
5690         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5691             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5692                 return -EIO;
5693
5694         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5695         if (ret)
5696                 return -EIO;
5697
5698         btrfs_init_path(&path);
5699         ULIST_ITER_INIT(&iter);
5700         while ((node = ulist_next(roots, &iter))) {
5701                 root_key.objectid = node->val;
5702                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5703                 root_key.offset = (u64)-1;
5704
5705                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5706                 if (IS_ERR(root)) {
5707                         ret = -EIO;
5708                         break;
5709                 }
5710
5711
5712                 trans = btrfs_start_transaction(search_root, 0);
5713                 if (IS_ERR(trans)) {
5714                         ret = PTR_ERR(trans);
5715                         break;
5716                 }
5717
5718                 path.lowest_level = btrfs_header_level(buf);
5719                 path.skip_check_block = 1;
5720                 if (path.lowest_level)
5721                         btrfs_node_key_to_cpu(buf, &key, 0);
5722                 else
5723                         btrfs_item_key_to_cpu(buf, &key, 0);
5724                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5725                 if (ret) {
5726                         ret = -EIO;
5727                         btrfs_commit_transaction(trans, search_root);
5728                         break;
5729                 }
5730                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5731                         ret = fix_key_order(search_root, &path);
5732                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5733                         ret = fix_item_offset(trans, search_root, &path);
5734                 if (ret) {
5735                         btrfs_commit_transaction(trans, search_root);
5736                         break;
5737                 }
5738                 btrfs_release_path(&path);
5739                 btrfs_commit_transaction(trans, search_root);
5740         }
5741         ulist_free(roots);
5742         btrfs_release_path(&path);
5743         return ret;
5744 }
5745
5746 static int check_block(struct btrfs_root *root,
5747                        struct cache_tree *extent_cache,
5748                        struct extent_buffer *buf, u64 flags)
5749 {
5750         struct extent_record *rec;
5751         struct cache_extent *cache;
5752         struct btrfs_key key;
5753         enum btrfs_tree_block_status status;
5754         int ret = 0;
5755         int level;
5756
5757         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5758         if (!cache)
5759                 return 1;
5760         rec = container_of(cache, struct extent_record, cache);
5761         rec->generation = btrfs_header_generation(buf);
5762
5763         level = btrfs_header_level(buf);
5764         if (btrfs_header_nritems(buf) > 0) {
5765
5766                 if (level == 0)
5767                         btrfs_item_key_to_cpu(buf, &key, 0);
5768                 else
5769                         btrfs_node_key_to_cpu(buf, &key, 0);
5770
5771                 rec->info_objectid = key.objectid;
5772         }
5773         rec->info_level = level;
5774
5775         if (btrfs_is_leaf(buf))
5776                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5777         else
5778                 status = btrfs_check_node(root, &rec->parent_key, buf);
5779
5780         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5781                 if (repair)
5782                         status = try_to_fix_bad_block(root, buf, status);
5783                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5784                         ret = -EIO;
5785                         fprintf(stderr, "bad block %llu\n",
5786                                 (unsigned long long)buf->start);
5787                 } else {
5788                         /*
5789                          * Signal to callers we need to start the scan over
5790                          * again since we'll have cowed blocks.
5791                          */
5792                         ret = -EAGAIN;
5793                 }
5794         } else {
5795                 rec->content_checked = 1;
5796                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5797                         rec->owner_ref_checked = 1;
5798                 else {
5799                         ret = check_owner_ref(root, rec, buf);
5800                         if (!ret)
5801                                 rec->owner_ref_checked = 1;
5802                 }
5803         }
5804         if (!ret)
5805                 maybe_free_extent_rec(extent_cache, rec);
5806         return ret;
5807 }
5808
5809 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5810                                                 u64 parent, u64 root)
5811 {
5812         struct list_head *cur = rec->backrefs.next;
5813         struct extent_backref *node;
5814         struct tree_backref *back;
5815
5816         while(cur != &rec->backrefs) {
5817                 node = to_extent_backref(cur);
5818                 cur = cur->next;
5819                 if (node->is_data)
5820                         continue;
5821                 back = to_tree_backref(node);
5822                 if (parent > 0) {
5823                         if (!node->full_backref)
5824                                 continue;
5825                         if (parent == back->parent)
5826                                 return back;
5827                 } else {
5828                         if (node->full_backref)
5829                                 continue;
5830                         if (back->root == root)
5831                                 return back;
5832                 }
5833         }
5834         return NULL;
5835 }
5836
5837 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5838                                                 u64 parent, u64 root)
5839 {
5840         struct tree_backref *ref = malloc(sizeof(*ref));
5841
5842         if (!ref)
5843                 return NULL;
5844         memset(&ref->node, 0, sizeof(ref->node));
5845         if (parent > 0) {
5846                 ref->parent = parent;
5847                 ref->node.full_backref = 1;
5848         } else {
5849                 ref->root = root;
5850                 ref->node.full_backref = 0;
5851         }
5852         list_add_tail(&ref->node.list, &rec->backrefs);
5853
5854         return ref;
5855 }
5856
5857 static struct data_backref *find_data_backref(struct extent_record *rec,
5858                                                 u64 parent, u64 root,
5859                                                 u64 owner, u64 offset,
5860                                                 int found_ref,
5861                                                 u64 disk_bytenr, u64 bytes)
5862 {
5863         struct list_head *cur = rec->backrefs.next;
5864         struct extent_backref *node;
5865         struct data_backref *back;
5866
5867         while(cur != &rec->backrefs) {
5868                 node = to_extent_backref(cur);
5869                 cur = cur->next;
5870                 if (!node->is_data)
5871                         continue;
5872                 back = to_data_backref(node);
5873                 if (parent > 0) {
5874                         if (!node->full_backref)
5875                                 continue;
5876                         if (parent == back->parent)
5877                                 return back;
5878                 } else {
5879                         if (node->full_backref)
5880                                 continue;
5881                         if (back->root == root && back->owner == owner &&
5882                             back->offset == offset) {
5883                                 if (found_ref && node->found_ref &&
5884                                     (back->bytes != bytes ||
5885                                     back->disk_bytenr != disk_bytenr))
5886                                         continue;
5887                                 return back;
5888                         }
5889                 }
5890         }
5891         return NULL;
5892 }
5893
5894 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5895                                                 u64 parent, u64 root,
5896                                                 u64 owner, u64 offset,
5897                                                 u64 max_size)
5898 {
5899         struct data_backref *ref = malloc(sizeof(*ref));
5900
5901         if (!ref)
5902                 return NULL;
5903         memset(&ref->node, 0, sizeof(ref->node));
5904         ref->node.is_data = 1;
5905
5906         if (parent > 0) {
5907                 ref->parent = parent;
5908                 ref->owner = 0;
5909                 ref->offset = 0;
5910                 ref->node.full_backref = 1;
5911         } else {
5912                 ref->root = root;
5913                 ref->owner = owner;
5914                 ref->offset = offset;
5915                 ref->node.full_backref = 0;
5916         }
5917         ref->bytes = max_size;
5918         ref->found_ref = 0;
5919         ref->num_refs = 0;
5920         list_add_tail(&ref->node.list, &rec->backrefs);
5921         if (max_size > rec->max_size)
5922                 rec->max_size = max_size;
5923         return ref;
5924 }
5925
5926 /* Check if the type of extent matches with its chunk */
5927 static void check_extent_type(struct extent_record *rec)
5928 {
5929         struct btrfs_block_group_cache *bg_cache;
5930
5931         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5932         if (!bg_cache)
5933                 return;
5934
5935         /* data extent, check chunk directly*/
5936         if (!rec->metadata) {
5937                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5938                         rec->wrong_chunk_type = 1;
5939                 return;
5940         }
5941
5942         /* metadata extent, check the obvious case first */
5943         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5944                                  BTRFS_BLOCK_GROUP_METADATA))) {
5945                 rec->wrong_chunk_type = 1;
5946                 return;
5947         }
5948
5949         /*
5950          * Check SYSTEM extent, as it's also marked as metadata, we can only
5951          * make sure it's a SYSTEM extent by its backref
5952          */
5953         if (!list_empty(&rec->backrefs)) {
5954                 struct extent_backref *node;
5955                 struct tree_backref *tback;
5956                 u64 bg_type;
5957
5958                 node = to_extent_backref(rec->backrefs.next);
5959                 if (node->is_data) {
5960                         /* tree block shouldn't have data backref */
5961                         rec->wrong_chunk_type = 1;
5962                         return;
5963                 }
5964                 tback = container_of(node, struct tree_backref, node);
5965
5966                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5967                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5968                 else
5969                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
5970                 if (!(bg_cache->flags & bg_type))
5971                         rec->wrong_chunk_type = 1;
5972         }
5973 }
5974
5975 /*
5976  * Allocate a new extent record, fill default values from @tmpl and insert int
5977  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
5978  * the cache, otherwise it fails.
5979  */
5980 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
5981                 struct extent_record *tmpl)
5982 {
5983         struct extent_record *rec;
5984         int ret = 0;
5985
5986         rec = malloc(sizeof(*rec));
5987         if (!rec)
5988                 return -ENOMEM;
5989         rec->start = tmpl->start;
5990         rec->max_size = tmpl->max_size;
5991         rec->nr = max(tmpl->nr, tmpl->max_size);
5992         rec->found_rec = tmpl->found_rec;
5993         rec->content_checked = tmpl->content_checked;
5994         rec->owner_ref_checked = tmpl->owner_ref_checked;
5995         rec->num_duplicates = 0;
5996         rec->metadata = tmpl->metadata;
5997         rec->flag_block_full_backref = FLAG_UNSET;
5998         rec->bad_full_backref = 0;
5999         rec->crossing_stripes = 0;
6000         rec->wrong_chunk_type = 0;
6001         rec->is_root = tmpl->is_root;
6002         rec->refs = tmpl->refs;
6003         rec->extent_item_refs = tmpl->extent_item_refs;
6004         rec->parent_generation = tmpl->parent_generation;
6005         INIT_LIST_HEAD(&rec->backrefs);
6006         INIT_LIST_HEAD(&rec->dups);
6007         INIT_LIST_HEAD(&rec->list);
6008         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6009         rec->cache.start = tmpl->start;
6010         rec->cache.size = tmpl->nr;
6011         ret = insert_cache_extent(extent_cache, &rec->cache);
6012         if (ret) {
6013                 free(rec);
6014                 return ret;
6015         }
6016         bytes_used += rec->nr;
6017
6018         if (tmpl->metadata)
6019                 rec->crossing_stripes = check_crossing_stripes(global_info,
6020                                 rec->start, global_info->tree_root->nodesize);
6021         check_extent_type(rec);
6022         return ret;
6023 }
6024
6025 /*
6026  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6027  * some are hints:
6028  * - refs              - if found, increase refs
6029  * - is_root           - if found, set
6030  * - content_checked   - if found, set
6031  * - owner_ref_checked - if found, set
6032  *
6033  * If not found, create a new one, initialize and insert.
6034  */
6035 static int add_extent_rec(struct cache_tree *extent_cache,
6036                 struct extent_record *tmpl)
6037 {
6038         struct extent_record *rec;
6039         struct cache_extent *cache;
6040         int ret = 0;
6041         int dup = 0;
6042
6043         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6044         if (cache) {
6045                 rec = container_of(cache, struct extent_record, cache);
6046                 if (tmpl->refs)
6047                         rec->refs++;
6048                 if (rec->nr == 1)
6049                         rec->nr = max(tmpl->nr, tmpl->max_size);
6050
6051                 /*
6052                  * We need to make sure to reset nr to whatever the extent
6053                  * record says was the real size, this way we can compare it to
6054                  * the backrefs.
6055                  */
6056                 if (tmpl->found_rec) {
6057                         if (tmpl->start != rec->start || rec->found_rec) {
6058                                 struct extent_record *tmp;
6059
6060                                 dup = 1;
6061                                 if (list_empty(&rec->list))
6062                                         list_add_tail(&rec->list,
6063                                                       &duplicate_extents);
6064
6065                                 /*
6066                                  * We have to do this song and dance in case we
6067                                  * find an extent record that falls inside of
6068                                  * our current extent record but does not have
6069                                  * the same objectid.
6070                                  */
6071                                 tmp = malloc(sizeof(*tmp));
6072                                 if (!tmp)
6073                                         return -ENOMEM;
6074                                 tmp->start = tmpl->start;
6075                                 tmp->max_size = tmpl->max_size;
6076                                 tmp->nr = tmpl->nr;
6077                                 tmp->found_rec = 1;
6078                                 tmp->metadata = tmpl->metadata;
6079                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6080                                 INIT_LIST_HEAD(&tmp->list);
6081                                 list_add_tail(&tmp->list, &rec->dups);
6082                                 rec->num_duplicates++;
6083                         } else {
6084                                 rec->nr = tmpl->nr;
6085                                 rec->found_rec = 1;
6086                         }
6087                 }
6088
6089                 if (tmpl->extent_item_refs && !dup) {
6090                         if (rec->extent_item_refs) {
6091                                 fprintf(stderr, "block %llu rec "
6092                                         "extent_item_refs %llu, passed %llu\n",
6093                                         (unsigned long long)tmpl->start,
6094                                         (unsigned long long)
6095                                                         rec->extent_item_refs,
6096                                         (unsigned long long)tmpl->extent_item_refs);
6097                         }
6098                         rec->extent_item_refs = tmpl->extent_item_refs;
6099                 }
6100                 if (tmpl->is_root)
6101                         rec->is_root = 1;
6102                 if (tmpl->content_checked)
6103                         rec->content_checked = 1;
6104                 if (tmpl->owner_ref_checked)
6105                         rec->owner_ref_checked = 1;
6106                 memcpy(&rec->parent_key, &tmpl->parent_key,
6107                                 sizeof(tmpl->parent_key));
6108                 if (tmpl->parent_generation)
6109                         rec->parent_generation = tmpl->parent_generation;
6110                 if (rec->max_size < tmpl->max_size)
6111                         rec->max_size = tmpl->max_size;
6112
6113                 /*
6114                  * A metadata extent can't cross stripe_len boundary, otherwise
6115                  * kernel scrub won't be able to handle it.
6116                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6117                  * it.
6118                  */
6119                 if (tmpl->metadata)
6120                         rec->crossing_stripes = check_crossing_stripes(
6121                                         global_info, rec->start,
6122                                         global_info->tree_root->nodesize);
6123                 check_extent_type(rec);
6124                 maybe_free_extent_rec(extent_cache, rec);
6125                 return ret;
6126         }
6127
6128         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6129
6130         return ret;
6131 }
6132
6133 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6134                             u64 parent, u64 root, int found_ref)
6135 {
6136         struct extent_record *rec;
6137         struct tree_backref *back;
6138         struct cache_extent *cache;
6139         int ret;
6140
6141         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6142         if (!cache) {
6143                 struct extent_record tmpl;
6144
6145                 memset(&tmpl, 0, sizeof(tmpl));
6146                 tmpl.start = bytenr;
6147                 tmpl.nr = 1;
6148                 tmpl.metadata = 1;
6149
6150                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6151                 if (ret)
6152                         return ret;
6153
6154                 /* really a bug in cache_extent implement now */
6155                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6156                 if (!cache)
6157                         return -ENOENT;
6158         }
6159
6160         rec = container_of(cache, struct extent_record, cache);
6161         if (rec->start != bytenr) {
6162                 /*
6163                  * Several cause, from unaligned bytenr to over lapping extents
6164                  */
6165                 return -EEXIST;
6166         }
6167
6168         back = find_tree_backref(rec, parent, root);
6169         if (!back) {
6170                 back = alloc_tree_backref(rec, parent, root);
6171                 if (!back)
6172                         return -ENOMEM;
6173         }
6174
6175         if (found_ref) {
6176                 if (back->node.found_ref) {
6177                         fprintf(stderr, "Extent back ref already exists "
6178                                 "for %llu parent %llu root %llu \n",
6179                                 (unsigned long long)bytenr,
6180                                 (unsigned long long)parent,
6181                                 (unsigned long long)root);
6182                 }
6183                 back->node.found_ref = 1;
6184         } else {
6185                 if (back->node.found_extent_tree) {
6186                         fprintf(stderr, "Extent back ref already exists "
6187                                 "for %llu parent %llu root %llu \n",
6188                                 (unsigned long long)bytenr,
6189                                 (unsigned long long)parent,
6190                                 (unsigned long long)root);
6191                 }
6192                 back->node.found_extent_tree = 1;
6193         }
6194         check_extent_type(rec);
6195         maybe_free_extent_rec(extent_cache, rec);
6196         return 0;
6197 }
6198
6199 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6200                             u64 parent, u64 root, u64 owner, u64 offset,
6201                             u32 num_refs, int found_ref, u64 max_size)
6202 {
6203         struct extent_record *rec;
6204         struct data_backref *back;
6205         struct cache_extent *cache;
6206         int ret;
6207
6208         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6209         if (!cache) {
6210                 struct extent_record tmpl;
6211
6212                 memset(&tmpl, 0, sizeof(tmpl));
6213                 tmpl.start = bytenr;
6214                 tmpl.nr = 1;
6215                 tmpl.max_size = max_size;
6216
6217                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6218                 if (ret)
6219                         return ret;
6220
6221                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6222                 if (!cache)
6223                         abort();
6224         }
6225
6226         rec = container_of(cache, struct extent_record, cache);
6227         if (rec->max_size < max_size)
6228                 rec->max_size = max_size;
6229
6230         /*
6231          * If found_ref is set then max_size is the real size and must match the
6232          * existing refs.  So if we have already found a ref then we need to
6233          * make sure that this ref matches the existing one, otherwise we need
6234          * to add a new backref so we can notice that the backrefs don't match
6235          * and we need to figure out who is telling the truth.  This is to
6236          * account for that awful fsync bug I introduced where we'd end up with
6237          * a btrfs_file_extent_item that would have its length include multiple
6238          * prealloc extents or point inside of a prealloc extent.
6239          */
6240         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6241                                  bytenr, max_size);
6242         if (!back) {
6243                 back = alloc_data_backref(rec, parent, root, owner, offset,
6244                                           max_size);
6245                 BUG_ON(!back);
6246         }
6247
6248         if (found_ref) {
6249                 BUG_ON(num_refs != 1);
6250                 if (back->node.found_ref)
6251                         BUG_ON(back->bytes != max_size);
6252                 back->node.found_ref = 1;
6253                 back->found_ref += 1;
6254                 back->bytes = max_size;
6255                 back->disk_bytenr = bytenr;
6256                 rec->refs += 1;
6257                 rec->content_checked = 1;
6258                 rec->owner_ref_checked = 1;
6259         } else {
6260                 if (back->node.found_extent_tree) {
6261                         fprintf(stderr, "Extent back ref already exists "
6262                                 "for %llu parent %llu root %llu "
6263                                 "owner %llu offset %llu num_refs %lu\n",
6264                                 (unsigned long long)bytenr,
6265                                 (unsigned long long)parent,
6266                                 (unsigned long long)root,
6267                                 (unsigned long long)owner,
6268                                 (unsigned long long)offset,
6269                                 (unsigned long)num_refs);
6270                 }
6271                 back->num_refs = num_refs;
6272                 back->node.found_extent_tree = 1;
6273         }
6274         maybe_free_extent_rec(extent_cache, rec);
6275         return 0;
6276 }
6277
6278 static int add_pending(struct cache_tree *pending,
6279                        struct cache_tree *seen, u64 bytenr, u32 size)
6280 {
6281         int ret;
6282         ret = add_cache_extent(seen, bytenr, size);
6283         if (ret)
6284                 return ret;
6285         add_cache_extent(pending, bytenr, size);
6286         return 0;
6287 }
6288
6289 static int pick_next_pending(struct cache_tree *pending,
6290                         struct cache_tree *reada,
6291                         struct cache_tree *nodes,
6292                         u64 last, struct block_info *bits, int bits_nr,
6293                         int *reada_bits)
6294 {
6295         unsigned long node_start = last;
6296         struct cache_extent *cache;
6297         int ret;
6298
6299         cache = search_cache_extent(reada, 0);
6300         if (cache) {
6301                 bits[0].start = cache->start;
6302                 bits[0].size = cache->size;
6303                 *reada_bits = 1;
6304                 return 1;
6305         }
6306         *reada_bits = 0;
6307         if (node_start > 32768)
6308                 node_start -= 32768;
6309
6310         cache = search_cache_extent(nodes, node_start);
6311         if (!cache)
6312                 cache = search_cache_extent(nodes, 0);
6313
6314         if (!cache) {
6315                  cache = search_cache_extent(pending, 0);
6316                  if (!cache)
6317                          return 0;
6318                  ret = 0;
6319                  do {
6320                          bits[ret].start = cache->start;
6321                          bits[ret].size = cache->size;
6322                          cache = next_cache_extent(cache);
6323                          ret++;
6324                  } while (cache && ret < bits_nr);
6325                  return ret;
6326         }
6327
6328         ret = 0;
6329         do {
6330                 bits[ret].start = cache->start;
6331                 bits[ret].size = cache->size;
6332                 cache = next_cache_extent(cache);
6333                 ret++;
6334         } while (cache && ret < bits_nr);
6335
6336         if (bits_nr - ret > 8) {
6337                 u64 lookup = bits[0].start + bits[0].size;
6338                 struct cache_extent *next;
6339                 next = search_cache_extent(pending, lookup);
6340                 while(next) {
6341                         if (next->start - lookup > 32768)
6342                                 break;
6343                         bits[ret].start = next->start;
6344                         bits[ret].size = next->size;
6345                         lookup = next->start + next->size;
6346                         ret++;
6347                         if (ret == bits_nr)
6348                                 break;
6349                         next = next_cache_extent(next);
6350                         if (!next)
6351                                 break;
6352                 }
6353         }
6354         return ret;
6355 }
6356
6357 static void free_chunk_record(struct cache_extent *cache)
6358 {
6359         struct chunk_record *rec;
6360
6361         rec = container_of(cache, struct chunk_record, cache);
6362         list_del_init(&rec->list);
6363         list_del_init(&rec->dextents);
6364         free(rec);
6365 }
6366
6367 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6368 {
6369         cache_tree_free_extents(chunk_cache, free_chunk_record);
6370 }
6371
6372 static void free_device_record(struct rb_node *node)
6373 {
6374         struct device_record *rec;
6375
6376         rec = container_of(node, struct device_record, node);
6377         free(rec);
6378 }
6379
6380 FREE_RB_BASED_TREE(device_cache, free_device_record);
6381
6382 int insert_block_group_record(struct block_group_tree *tree,
6383                               struct block_group_record *bg_rec)
6384 {
6385         int ret;
6386
6387         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6388         if (ret)
6389                 return ret;
6390
6391         list_add_tail(&bg_rec->list, &tree->block_groups);
6392         return 0;
6393 }
6394
6395 static void free_block_group_record(struct cache_extent *cache)
6396 {
6397         struct block_group_record *rec;
6398
6399         rec = container_of(cache, struct block_group_record, cache);
6400         list_del_init(&rec->list);
6401         free(rec);
6402 }
6403
6404 void free_block_group_tree(struct block_group_tree *tree)
6405 {
6406         cache_tree_free_extents(&tree->tree, free_block_group_record);
6407 }
6408
6409 int insert_device_extent_record(struct device_extent_tree *tree,
6410                                 struct device_extent_record *de_rec)
6411 {
6412         int ret;
6413
6414         /*
6415          * Device extent is a bit different from the other extents, because
6416          * the extents which belong to the different devices may have the
6417          * same start and size, so we need use the special extent cache
6418          * search/insert functions.
6419          */
6420         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6421         if (ret)
6422                 return ret;
6423
6424         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6425         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6426         return 0;
6427 }
6428
6429 static void free_device_extent_record(struct cache_extent *cache)
6430 {
6431         struct device_extent_record *rec;
6432
6433         rec = container_of(cache, struct device_extent_record, cache);
6434         if (!list_empty(&rec->chunk_list))
6435                 list_del_init(&rec->chunk_list);
6436         if (!list_empty(&rec->device_list))
6437                 list_del_init(&rec->device_list);
6438         free(rec);
6439 }
6440
6441 void free_device_extent_tree(struct device_extent_tree *tree)
6442 {
6443         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6444 }
6445
6446 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6447 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6448                                  struct extent_buffer *leaf, int slot)
6449 {
6450         struct btrfs_extent_ref_v0 *ref0;
6451         struct btrfs_key key;
6452         int ret;
6453
6454         btrfs_item_key_to_cpu(leaf, &key, slot);
6455         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6456         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6457                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6458                                 0, 0);
6459         } else {
6460                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6461                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6462         }
6463         return ret;
6464 }
6465 #endif
6466
6467 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6468                                             struct btrfs_key *key,
6469                                             int slot)
6470 {
6471         struct btrfs_chunk *ptr;
6472         struct chunk_record *rec;
6473         int num_stripes, i;
6474
6475         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6476         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6477
6478         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6479         if (!rec) {
6480                 fprintf(stderr, "memory allocation failed\n");
6481                 exit(-1);
6482         }
6483
6484         INIT_LIST_HEAD(&rec->list);
6485         INIT_LIST_HEAD(&rec->dextents);
6486         rec->bg_rec = NULL;
6487
6488         rec->cache.start = key->offset;
6489         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6490
6491         rec->generation = btrfs_header_generation(leaf);
6492
6493         rec->objectid = key->objectid;
6494         rec->type = key->type;
6495         rec->offset = key->offset;
6496
6497         rec->length = rec->cache.size;
6498         rec->owner = btrfs_chunk_owner(leaf, ptr);
6499         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6500         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6501         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6502         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6503         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6504         rec->num_stripes = num_stripes;
6505         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6506
6507         for (i = 0; i < rec->num_stripes; ++i) {
6508                 rec->stripes[i].devid =
6509                         btrfs_stripe_devid_nr(leaf, ptr, i);
6510                 rec->stripes[i].offset =
6511                         btrfs_stripe_offset_nr(leaf, ptr, i);
6512                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6513                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6514                                 BTRFS_UUID_SIZE);
6515         }
6516
6517         return rec;
6518 }
6519
6520 static int process_chunk_item(struct cache_tree *chunk_cache,
6521                               struct btrfs_key *key, struct extent_buffer *eb,
6522                               int slot)
6523 {
6524         struct chunk_record *rec;
6525         struct btrfs_chunk *chunk;
6526         int ret = 0;
6527
6528         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6529         /*
6530          * Do extra check for this chunk item,
6531          *
6532          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6533          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6534          * and owner<->key_type check.
6535          */
6536         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6537                                       key->offset);
6538         if (ret < 0) {
6539                 error("chunk(%llu, %llu) is not valid, ignore it",
6540                       key->offset, btrfs_chunk_length(eb, chunk));
6541                 return 0;
6542         }
6543         rec = btrfs_new_chunk_record(eb, key, slot);
6544         ret = insert_cache_extent(chunk_cache, &rec->cache);
6545         if (ret) {
6546                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6547                         rec->offset, rec->length);
6548                 free(rec);
6549         }
6550
6551         return ret;
6552 }
6553
6554 static int process_device_item(struct rb_root *dev_cache,
6555                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6556 {
6557         struct btrfs_dev_item *ptr;
6558         struct device_record *rec;
6559         int ret = 0;
6560
6561         ptr = btrfs_item_ptr(eb,
6562                 slot, struct btrfs_dev_item);
6563
6564         rec = malloc(sizeof(*rec));
6565         if (!rec) {
6566                 fprintf(stderr, "memory allocation failed\n");
6567                 return -ENOMEM;
6568         }
6569
6570         rec->devid = key->offset;
6571         rec->generation = btrfs_header_generation(eb);
6572
6573         rec->objectid = key->objectid;
6574         rec->type = key->type;
6575         rec->offset = key->offset;
6576
6577         rec->devid = btrfs_device_id(eb, ptr);
6578         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6579         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6580
6581         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6582         if (ret) {
6583                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6584                 free(rec);
6585         }
6586
6587         return ret;
6588 }
6589
6590 struct block_group_record *
6591 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6592                              int slot)
6593 {
6594         struct btrfs_block_group_item *ptr;
6595         struct block_group_record *rec;
6596
6597         rec = calloc(1, sizeof(*rec));
6598         if (!rec) {
6599                 fprintf(stderr, "memory allocation failed\n");
6600                 exit(-1);
6601         }
6602
6603         rec->cache.start = key->objectid;
6604         rec->cache.size = key->offset;
6605
6606         rec->generation = btrfs_header_generation(leaf);
6607
6608         rec->objectid = key->objectid;
6609         rec->type = key->type;
6610         rec->offset = key->offset;
6611
6612         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6613         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6614
6615         INIT_LIST_HEAD(&rec->list);
6616
6617         return rec;
6618 }
6619
6620 static int process_block_group_item(struct block_group_tree *block_group_cache,
6621                                     struct btrfs_key *key,
6622                                     struct extent_buffer *eb, int slot)
6623 {
6624         struct block_group_record *rec;
6625         int ret = 0;
6626
6627         rec = btrfs_new_block_group_record(eb, key, slot);
6628         ret = insert_block_group_record(block_group_cache, rec);
6629         if (ret) {
6630                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6631                         rec->objectid, rec->offset);
6632                 free(rec);
6633         }
6634
6635         return ret;
6636 }
6637
6638 struct device_extent_record *
6639 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6640                                struct btrfs_key *key, int slot)
6641 {
6642         struct device_extent_record *rec;
6643         struct btrfs_dev_extent *ptr;
6644
6645         rec = calloc(1, sizeof(*rec));
6646         if (!rec) {
6647                 fprintf(stderr, "memory allocation failed\n");
6648                 exit(-1);
6649         }
6650
6651         rec->cache.objectid = key->objectid;
6652         rec->cache.start = key->offset;
6653
6654         rec->generation = btrfs_header_generation(leaf);
6655
6656         rec->objectid = key->objectid;
6657         rec->type = key->type;
6658         rec->offset = key->offset;
6659
6660         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6661         rec->chunk_objecteid =
6662                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6663         rec->chunk_offset =
6664                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6665         rec->length = btrfs_dev_extent_length(leaf, ptr);
6666         rec->cache.size = rec->length;
6667
6668         INIT_LIST_HEAD(&rec->chunk_list);
6669         INIT_LIST_HEAD(&rec->device_list);
6670
6671         return rec;
6672 }
6673
6674 static int
6675 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6676                            struct btrfs_key *key, struct extent_buffer *eb,
6677                            int slot)
6678 {
6679         struct device_extent_record *rec;
6680         int ret;
6681
6682         rec = btrfs_new_device_extent_record(eb, key, slot);
6683         ret = insert_device_extent_record(dev_extent_cache, rec);
6684         if (ret) {
6685                 fprintf(stderr,
6686                         "Device extent[%llu, %llu, %llu] existed.\n",
6687                         rec->objectid, rec->offset, rec->length);
6688                 free(rec);
6689         }
6690
6691         return ret;
6692 }
6693
6694 static int process_extent_item(struct btrfs_root *root,
6695                                struct cache_tree *extent_cache,
6696                                struct extent_buffer *eb, int slot)
6697 {
6698         struct btrfs_extent_item *ei;
6699         struct btrfs_extent_inline_ref *iref;
6700         struct btrfs_extent_data_ref *dref;
6701         struct btrfs_shared_data_ref *sref;
6702         struct btrfs_key key;
6703         struct extent_record tmpl;
6704         unsigned long end;
6705         unsigned long ptr;
6706         int ret;
6707         int type;
6708         u32 item_size = btrfs_item_size_nr(eb, slot);
6709         u64 refs = 0;
6710         u64 offset;
6711         u64 num_bytes;
6712         int metadata = 0;
6713
6714         btrfs_item_key_to_cpu(eb, &key, slot);
6715
6716         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6717                 metadata = 1;
6718                 num_bytes = root->nodesize;
6719         } else {
6720                 num_bytes = key.offset;
6721         }
6722
6723         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6724                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6725                       key.objectid, root->sectorsize);
6726                 return -EIO;
6727         }
6728         if (item_size < sizeof(*ei)) {
6729 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6730                 struct btrfs_extent_item_v0 *ei0;
6731                 BUG_ON(item_size != sizeof(*ei0));
6732                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6733                 refs = btrfs_extent_refs_v0(eb, ei0);
6734 #else
6735                 BUG();
6736 #endif
6737                 memset(&tmpl, 0, sizeof(tmpl));
6738                 tmpl.start = key.objectid;
6739                 tmpl.nr = num_bytes;
6740                 tmpl.extent_item_refs = refs;
6741                 tmpl.metadata = metadata;
6742                 tmpl.found_rec = 1;
6743                 tmpl.max_size = num_bytes;
6744
6745                 return add_extent_rec(extent_cache, &tmpl);
6746         }
6747
6748         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6749         refs = btrfs_extent_refs(eb, ei);
6750         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6751                 metadata = 1;
6752         else
6753                 metadata = 0;
6754         if (metadata && num_bytes != root->nodesize) {
6755                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6756                       num_bytes, root->nodesize);
6757                 return -EIO;
6758         }
6759         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6760                 error("ignore invalid data extent, length %llu is not aligned to %u",
6761                       num_bytes, root->sectorsize);
6762                 return -EIO;
6763         }
6764
6765         memset(&tmpl, 0, sizeof(tmpl));
6766         tmpl.start = key.objectid;
6767         tmpl.nr = num_bytes;
6768         tmpl.extent_item_refs = refs;
6769         tmpl.metadata = metadata;
6770         tmpl.found_rec = 1;
6771         tmpl.max_size = num_bytes;
6772         add_extent_rec(extent_cache, &tmpl);
6773
6774         ptr = (unsigned long)(ei + 1);
6775         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6776             key.type == BTRFS_EXTENT_ITEM_KEY)
6777                 ptr += sizeof(struct btrfs_tree_block_info);
6778
6779         end = (unsigned long)ei + item_size;
6780         while (ptr < end) {
6781                 iref = (struct btrfs_extent_inline_ref *)ptr;
6782                 type = btrfs_extent_inline_ref_type(eb, iref);
6783                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6784                 switch (type) {
6785                 case BTRFS_TREE_BLOCK_REF_KEY:
6786                         ret = add_tree_backref(extent_cache, key.objectid,
6787                                         0, offset, 0);
6788                         if (ret < 0)
6789                                 error("add_tree_backref failed: %s",
6790                                       strerror(-ret));
6791                         break;
6792                 case BTRFS_SHARED_BLOCK_REF_KEY:
6793                         ret = add_tree_backref(extent_cache, key.objectid,
6794                                         offset, 0, 0);
6795                         if (ret < 0)
6796                                 error("add_tree_backref failed: %s",
6797                                       strerror(-ret));
6798                         break;
6799                 case BTRFS_EXTENT_DATA_REF_KEY:
6800                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6801                         add_data_backref(extent_cache, key.objectid, 0,
6802                                         btrfs_extent_data_ref_root(eb, dref),
6803                                         btrfs_extent_data_ref_objectid(eb,
6804                                                                        dref),
6805                                         btrfs_extent_data_ref_offset(eb, dref),
6806                                         btrfs_extent_data_ref_count(eb, dref),
6807                                         0, num_bytes);
6808                         break;
6809                 case BTRFS_SHARED_DATA_REF_KEY:
6810                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6811                         add_data_backref(extent_cache, key.objectid, offset,
6812                                         0, 0, 0,
6813                                         btrfs_shared_data_ref_count(eb, sref),
6814                                         0, num_bytes);
6815                         break;
6816                 default:
6817                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6818                                 key.objectid, key.type, num_bytes);
6819                         goto out;
6820                 }
6821                 ptr += btrfs_extent_inline_ref_size(type);
6822         }
6823         WARN_ON(ptr > end);
6824 out:
6825         return 0;
6826 }
6827
6828 static int check_cache_range(struct btrfs_root *root,
6829                              struct btrfs_block_group_cache *cache,
6830                              u64 offset, u64 bytes)
6831 {
6832         struct btrfs_free_space *entry;
6833         u64 *logical;
6834         u64 bytenr;
6835         int stripe_len;
6836         int i, nr, ret;
6837
6838         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6839                 bytenr = btrfs_sb_offset(i);
6840                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6841                                        cache->key.objectid, bytenr, 0,
6842                                        &logical, &nr, &stripe_len);
6843                 if (ret)
6844                         return ret;
6845
6846                 while (nr--) {
6847                         if (logical[nr] + stripe_len <= offset)
6848                                 continue;
6849                         if (offset + bytes <= logical[nr])
6850                                 continue;
6851                         if (logical[nr] == offset) {
6852                                 if (stripe_len >= bytes) {
6853                                         free(logical);
6854                                         return 0;
6855                                 }
6856                                 bytes -= stripe_len;
6857                                 offset += stripe_len;
6858                         } else if (logical[nr] < offset) {
6859                                 if (logical[nr] + stripe_len >=
6860                                     offset + bytes) {
6861                                         free(logical);
6862                                         return 0;
6863                                 }
6864                                 bytes = (offset + bytes) -
6865                                         (logical[nr] + stripe_len);
6866                                 offset = logical[nr] + stripe_len;
6867                         } else {
6868                                 /*
6869                                  * Could be tricky, the super may land in the
6870                                  * middle of the area we're checking.  First
6871                                  * check the easiest case, it's at the end.
6872                                  */
6873                                 if (logical[nr] + stripe_len >=
6874                                     bytes + offset) {
6875                                         bytes = logical[nr] - offset;
6876                                         continue;
6877                                 }
6878
6879                                 /* Check the left side */
6880                                 ret = check_cache_range(root, cache,
6881                                                         offset,
6882                                                         logical[nr] - offset);
6883                                 if (ret) {
6884                                         free(logical);
6885                                         return ret;
6886                                 }
6887
6888                                 /* Now we continue with the right side */
6889                                 bytes = (offset + bytes) -
6890                                         (logical[nr] + stripe_len);
6891                                 offset = logical[nr] + stripe_len;
6892                         }
6893                 }
6894
6895                 free(logical);
6896         }
6897
6898         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6899         if (!entry) {
6900                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6901                         offset, offset+bytes);
6902                 return -EINVAL;
6903         }
6904
6905         if (entry->offset != offset) {
6906                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6907                         entry->offset);
6908                 return -EINVAL;
6909         }
6910
6911         if (entry->bytes != bytes) {
6912                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6913                         bytes, entry->bytes, offset);
6914                 return -EINVAL;
6915         }
6916
6917         unlink_free_space(cache->free_space_ctl, entry);
6918         free(entry);
6919         return 0;
6920 }
6921
6922 static int verify_space_cache(struct btrfs_root *root,
6923                               struct btrfs_block_group_cache *cache)
6924 {
6925         struct btrfs_path path;
6926         struct extent_buffer *leaf;
6927         struct btrfs_key key;
6928         u64 last;
6929         int ret = 0;
6930
6931         root = root->fs_info->extent_root;
6932
6933         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6934
6935         btrfs_init_path(&path);
6936         key.objectid = last;
6937         key.offset = 0;
6938         key.type = BTRFS_EXTENT_ITEM_KEY;
6939         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6940         if (ret < 0)
6941                 goto out;
6942         ret = 0;
6943         while (1) {
6944                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6945                         ret = btrfs_next_leaf(root, &path);
6946                         if (ret < 0)
6947                                 goto out;
6948                         if (ret > 0) {
6949                                 ret = 0;
6950                                 break;
6951                         }
6952                 }
6953                 leaf = path.nodes[0];
6954                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6955                 if (key.objectid >= cache->key.offset + cache->key.objectid)
6956                         break;
6957                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6958                     key.type != BTRFS_METADATA_ITEM_KEY) {
6959                         path.slots[0]++;
6960                         continue;
6961                 }
6962
6963                 if (last == key.objectid) {
6964                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
6965                                 last = key.objectid + key.offset;
6966                         else
6967                                 last = key.objectid + root->nodesize;
6968                         path.slots[0]++;
6969                         continue;
6970                 }
6971
6972                 ret = check_cache_range(root, cache, last,
6973                                         key.objectid - last);
6974                 if (ret)
6975                         break;
6976                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6977                         last = key.objectid + key.offset;
6978                 else
6979                         last = key.objectid + root->nodesize;
6980                 path.slots[0]++;
6981         }
6982
6983         if (last < cache->key.objectid + cache->key.offset)
6984                 ret = check_cache_range(root, cache, last,
6985                                         cache->key.objectid +
6986                                         cache->key.offset - last);
6987
6988 out:
6989         btrfs_release_path(&path);
6990
6991         if (!ret &&
6992             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
6993                 fprintf(stderr, "There are still entries left in the space "
6994                         "cache\n");
6995                 ret = -EINVAL;
6996         }
6997
6998         return ret;
6999 }
7000
7001 static int check_space_cache(struct btrfs_root *root)
7002 {
7003         struct btrfs_block_group_cache *cache;
7004         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7005         int ret;
7006         int error = 0;
7007
7008         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7009             btrfs_super_generation(root->fs_info->super_copy) !=
7010             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7011                 printf("cache and super generation don't match, space cache "
7012                        "will be invalidated\n");
7013                 return 0;
7014         }
7015
7016         if (ctx.progress_enabled) {
7017                 ctx.tp = TASK_FREE_SPACE;
7018                 task_start(ctx.info);
7019         }
7020
7021         while (1) {
7022                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7023                 if (!cache)
7024                         break;
7025
7026                 start = cache->key.objectid + cache->key.offset;
7027                 if (!cache->free_space_ctl) {
7028                         if (btrfs_init_free_space_ctl(cache,
7029                                                       root->sectorsize)) {
7030                                 ret = -ENOMEM;
7031                                 break;
7032                         }
7033                 } else {
7034                         btrfs_remove_free_space_cache(cache);
7035                 }
7036
7037                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7038                         ret = exclude_super_stripes(root, cache);
7039                         if (ret) {
7040                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7041                                         strerror(-ret));
7042                                 error++;
7043                                 continue;
7044                         }
7045                         ret = load_free_space_tree(root->fs_info, cache);
7046                         free_excluded_extents(root, cache);
7047                         if (ret < 0) {
7048                                 fprintf(stderr, "could not load free space tree: %s\n",
7049                                         strerror(-ret));
7050                                 error++;
7051                                 continue;
7052                         }
7053                         error += ret;
7054                 } else {
7055                         ret = load_free_space_cache(root->fs_info, cache);
7056                         if (!ret)
7057                                 continue;
7058                 }
7059
7060                 ret = verify_space_cache(root, cache);
7061                 if (ret) {
7062                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7063                                 cache->key.objectid);
7064                         error++;
7065                 }
7066         }
7067
7068         task_stop(ctx.info);
7069
7070         return error ? -EINVAL : 0;
7071 }
7072
7073 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7074                         u64 num_bytes, unsigned long leaf_offset,
7075                         struct extent_buffer *eb) {
7076
7077         u64 offset = 0;
7078         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7079         char *data;
7080         unsigned long csum_offset;
7081         u32 csum;
7082         u32 csum_expected;
7083         u64 read_len;
7084         u64 data_checked = 0;
7085         u64 tmp;
7086         int ret = 0;
7087         int mirror;
7088         int num_copies;
7089
7090         if (num_bytes % root->sectorsize)
7091                 return -EINVAL;
7092
7093         data = malloc(num_bytes);
7094         if (!data)
7095                 return -ENOMEM;
7096
7097         while (offset < num_bytes) {
7098                 mirror = 0;
7099 again:
7100                 read_len = num_bytes - offset;
7101                 /* read as much space once a time */
7102                 ret = read_extent_data(root, data + offset,
7103                                 bytenr + offset, &read_len, mirror);
7104                 if (ret)
7105                         goto out;
7106                 data_checked = 0;
7107                 /* verify every 4k data's checksum */
7108                 while (data_checked < read_len) {
7109                         csum = ~(u32)0;
7110                         tmp = offset + data_checked;
7111
7112                         csum = btrfs_csum_data((char *)data + tmp,
7113                                                csum, root->sectorsize);
7114                         btrfs_csum_final(csum, (u8 *)&csum);
7115
7116                         csum_offset = leaf_offset +
7117                                  tmp / root->sectorsize * csum_size;
7118                         read_extent_buffer(eb, (char *)&csum_expected,
7119                                            csum_offset, csum_size);
7120                         /* try another mirror */
7121                         if (csum != csum_expected) {
7122                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7123                                                 mirror, bytenr + tmp,
7124                                                 csum, csum_expected);
7125                                 num_copies = btrfs_num_copies(
7126                                                 &root->fs_info->mapping_tree,
7127                                                 bytenr, num_bytes);
7128                                 if (mirror < num_copies - 1) {
7129                                         mirror += 1;
7130                                         goto again;
7131                                 }
7132                         }
7133                         data_checked += root->sectorsize;
7134                 }
7135                 offset += read_len;
7136         }
7137 out:
7138         free(data);
7139         return ret;
7140 }
7141
7142 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7143                                u64 num_bytes)
7144 {
7145         struct btrfs_path path;
7146         struct extent_buffer *leaf;
7147         struct btrfs_key key;
7148         int ret;
7149
7150         btrfs_init_path(&path);
7151         key.objectid = bytenr;
7152         key.type = BTRFS_EXTENT_ITEM_KEY;
7153         key.offset = (u64)-1;
7154
7155 again:
7156         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7157                                 0, 0);
7158         if (ret < 0) {
7159                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7160                 btrfs_release_path(&path);
7161                 return ret;
7162         } else if (ret) {
7163                 if (path.slots[0] > 0) {
7164                         path.slots[0]--;
7165                 } else {
7166                         ret = btrfs_prev_leaf(root, &path);
7167                         if (ret < 0) {
7168                                 goto out;
7169                         } else if (ret > 0) {
7170                                 ret = 0;
7171                                 goto out;
7172                         }
7173                 }
7174         }
7175
7176         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7177
7178         /*
7179          * Block group items come before extent items if they have the same
7180          * bytenr, so walk back one more just in case.  Dear future traveller,
7181          * first congrats on mastering time travel.  Now if it's not too much
7182          * trouble could you go back to 2006 and tell Chris to make the
7183          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7184          * EXTENT_ITEM_KEY please?
7185          */
7186         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7187                 if (path.slots[0] > 0) {
7188                         path.slots[0]--;
7189                 } else {
7190                         ret = btrfs_prev_leaf(root, &path);
7191                         if (ret < 0) {
7192                                 goto out;
7193                         } else if (ret > 0) {
7194                                 ret = 0;
7195                                 goto out;
7196                         }
7197                 }
7198                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7199         }
7200
7201         while (num_bytes) {
7202                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7203                         ret = btrfs_next_leaf(root, &path);
7204                         if (ret < 0) {
7205                                 fprintf(stderr, "Error going to next leaf "
7206                                         "%d\n", ret);
7207                                 btrfs_release_path(&path);
7208                                 return ret;
7209                         } else if (ret) {
7210                                 break;
7211                         }
7212                 }
7213                 leaf = path.nodes[0];
7214                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7215                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7216                         path.slots[0]++;
7217                         continue;
7218                 }
7219                 if (key.objectid + key.offset < bytenr) {
7220                         path.slots[0]++;
7221                         continue;
7222                 }
7223                 if (key.objectid > bytenr + num_bytes)
7224                         break;
7225
7226                 if (key.objectid == bytenr) {
7227                         if (key.offset >= num_bytes) {
7228                                 num_bytes = 0;
7229                                 break;
7230                         }
7231                         num_bytes -= key.offset;
7232                         bytenr += key.offset;
7233                 } else if (key.objectid < bytenr) {
7234                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7235                                 num_bytes = 0;
7236                                 break;
7237                         }
7238                         num_bytes = (bytenr + num_bytes) -
7239                                 (key.objectid + key.offset);
7240                         bytenr = key.objectid + key.offset;
7241                 } else {
7242                         if (key.objectid + key.offset < bytenr + num_bytes) {
7243                                 u64 new_start = key.objectid + key.offset;
7244                                 u64 new_bytes = bytenr + num_bytes - new_start;
7245
7246                                 /*
7247                                  * Weird case, the extent is in the middle of
7248                                  * our range, we'll have to search one side
7249                                  * and then the other.  Not sure if this happens
7250                                  * in real life, but no harm in coding it up
7251                                  * anyway just in case.
7252                                  */
7253                                 btrfs_release_path(&path);
7254                                 ret = check_extent_exists(root, new_start,
7255                                                           new_bytes);
7256                                 if (ret) {
7257                                         fprintf(stderr, "Right section didn't "
7258                                                 "have a record\n");
7259                                         break;
7260                                 }
7261                                 num_bytes = key.objectid - bytenr;
7262                                 goto again;
7263                         }
7264                         num_bytes = key.objectid - bytenr;
7265                 }
7266                 path.slots[0]++;
7267         }
7268         ret = 0;
7269
7270 out:
7271         if (num_bytes && !ret) {
7272                 fprintf(stderr, "There are no extents for csum range "
7273                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7274                 ret = 1;
7275         }
7276
7277         btrfs_release_path(&path);
7278         return ret;
7279 }
7280
7281 static int check_csums(struct btrfs_root *root)
7282 {
7283         struct btrfs_path path;
7284         struct extent_buffer *leaf;
7285         struct btrfs_key key;
7286         u64 offset = 0, num_bytes = 0;
7287         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7288         int errors = 0;
7289         int ret;
7290         u64 data_len;
7291         unsigned long leaf_offset;
7292
7293         root = root->fs_info->csum_root;
7294         if (!extent_buffer_uptodate(root->node)) {
7295                 fprintf(stderr, "No valid csum tree found\n");
7296                 return -ENOENT;
7297         }
7298
7299         btrfs_init_path(&path);
7300         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7301         key.type = BTRFS_EXTENT_CSUM_KEY;
7302         key.offset = 0;
7303         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7304         if (ret < 0) {
7305                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7306                 btrfs_release_path(&path);
7307                 return ret;
7308         }
7309
7310         if (ret > 0 && path.slots[0])
7311                 path.slots[0]--;
7312         ret = 0;
7313
7314         while (1) {
7315                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7316                         ret = btrfs_next_leaf(root, &path);
7317                         if (ret < 0) {
7318                                 fprintf(stderr, "Error going to next leaf "
7319                                         "%d\n", ret);
7320                                 break;
7321                         }
7322                         if (ret)
7323                                 break;
7324                 }
7325                 leaf = path.nodes[0];
7326
7327                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7328                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7329                         path.slots[0]++;
7330                         continue;
7331                 }
7332
7333                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7334                               csum_size) * root->sectorsize;
7335                 if (!check_data_csum)
7336                         goto skip_csum_check;
7337                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7338                 ret = check_extent_csums(root, key.offset, data_len,
7339                                          leaf_offset, leaf);
7340                 if (ret)
7341                         break;
7342 skip_csum_check:
7343                 if (!num_bytes) {
7344                         offset = key.offset;
7345                 } else if (key.offset != offset + num_bytes) {
7346                         ret = check_extent_exists(root, offset, num_bytes);
7347                         if (ret) {
7348                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7349                                         "there is no extent record\n",
7350                                         offset, offset+num_bytes);
7351                                 errors++;
7352                         }
7353                         offset = key.offset;
7354                         num_bytes = 0;
7355                 }
7356                 num_bytes += data_len;
7357                 path.slots[0]++;
7358         }
7359
7360         btrfs_release_path(&path);
7361         return errors;
7362 }
7363
7364 static int is_dropped_key(struct btrfs_key *key,
7365                           struct btrfs_key *drop_key) {
7366         if (key->objectid < drop_key->objectid)
7367                 return 1;
7368         else if (key->objectid == drop_key->objectid) {
7369                 if (key->type < drop_key->type)
7370                         return 1;
7371                 else if (key->type == drop_key->type) {
7372                         if (key->offset < drop_key->offset)
7373                                 return 1;
7374                 }
7375         }
7376         return 0;
7377 }
7378
7379 /*
7380  * Here are the rules for FULL_BACKREF.
7381  *
7382  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7383  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7384  *      FULL_BACKREF set.
7385  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7386  *    if it happened after the relocation occurred since we'll have dropped the
7387  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7388  *    have no real way to know for sure.
7389  *
7390  * We process the blocks one root at a time, and we start from the lowest root
7391  * objectid and go to the highest.  So we can just lookup the owner backref for
7392  * the record and if we don't find it then we know it doesn't exist and we have
7393  * a FULL BACKREF.
7394  *
7395  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7396  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7397  * be set or not and then we can check later once we've gathered all the refs.
7398  */
7399 static int calc_extent_flag(struct btrfs_root *root,
7400                            struct cache_tree *extent_cache,
7401                            struct extent_buffer *buf,
7402                            struct root_item_record *ri,
7403                            u64 *flags)
7404 {
7405         struct extent_record *rec;
7406         struct cache_extent *cache;
7407         struct tree_backref *tback;
7408         u64 owner = 0;
7409
7410         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7411         /* we have added this extent before */
7412         if (!cache)
7413                 return -ENOENT;
7414
7415         rec = container_of(cache, struct extent_record, cache);
7416
7417         /*
7418          * Except file/reloc tree, we can not have
7419          * FULL BACKREF MODE
7420          */
7421         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7422                 goto normal;
7423         /*
7424          * root node
7425          */
7426         if (buf->start == ri->bytenr)
7427                 goto normal;
7428
7429         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7430                 goto full_backref;
7431
7432         owner = btrfs_header_owner(buf);
7433         if (owner == ri->objectid)
7434                 goto normal;
7435
7436         tback = find_tree_backref(rec, 0, owner);
7437         if (!tback)
7438                 goto full_backref;
7439 normal:
7440         *flags = 0;
7441         if (rec->flag_block_full_backref != FLAG_UNSET &&
7442             rec->flag_block_full_backref != 0)
7443                 rec->bad_full_backref = 1;
7444         return 0;
7445 full_backref:
7446         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7447         if (rec->flag_block_full_backref != FLAG_UNSET &&
7448             rec->flag_block_full_backref != 1)
7449                 rec->bad_full_backref = 1;
7450         return 0;
7451 }
7452
7453 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7454 {
7455         fprintf(stderr, "Invalid key type(");
7456         print_key_type(stderr, 0, key_type);
7457         fprintf(stderr, ") found in root(");
7458         print_objectid(stderr, rootid, 0);
7459         fprintf(stderr, ")\n");
7460 }
7461
7462 /*
7463  * Check if the key is valid with its extent buffer.
7464  *
7465  * This is a early check in case invalid key exists in a extent buffer
7466  * This is not comprehensive yet, but should prevent wrong key/item passed
7467  * further
7468  */
7469 static int check_type_with_root(u64 rootid, u8 key_type)
7470 {
7471         switch (key_type) {
7472         /* Only valid in chunk tree */
7473         case BTRFS_DEV_ITEM_KEY:
7474         case BTRFS_CHUNK_ITEM_KEY:
7475                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7476                         goto err;
7477                 break;
7478         /* valid in csum and log tree */
7479         case BTRFS_CSUM_TREE_OBJECTID:
7480                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7481                       is_fstree(rootid)))
7482                         goto err;
7483                 break;
7484         case BTRFS_EXTENT_ITEM_KEY:
7485         case BTRFS_METADATA_ITEM_KEY:
7486         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7487                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7488                         goto err;
7489                 break;
7490         case BTRFS_ROOT_ITEM_KEY:
7491                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7492                         goto err;
7493                 break;
7494         case BTRFS_DEV_EXTENT_KEY:
7495                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7496                         goto err;
7497                 break;
7498         }
7499         return 0;
7500 err:
7501         report_mismatch_key_root(key_type, rootid);
7502         return -EINVAL;
7503 }
7504
7505 static int run_next_block(struct btrfs_root *root,
7506                           struct block_info *bits,
7507                           int bits_nr,
7508                           u64 *last,
7509                           struct cache_tree *pending,
7510                           struct cache_tree *seen,
7511                           struct cache_tree *reada,
7512                           struct cache_tree *nodes,
7513                           struct cache_tree *extent_cache,
7514                           struct cache_tree *chunk_cache,
7515                           struct rb_root *dev_cache,
7516                           struct block_group_tree *block_group_cache,
7517                           struct device_extent_tree *dev_extent_cache,
7518                           struct root_item_record *ri)
7519 {
7520         struct extent_buffer *buf;
7521         struct extent_record *rec = NULL;
7522         u64 bytenr;
7523         u32 size;
7524         u64 parent;
7525         u64 owner;
7526         u64 flags;
7527         u64 ptr;
7528         u64 gen = 0;
7529         int ret = 0;
7530         int i;
7531         int nritems;
7532         struct btrfs_key key;
7533         struct cache_extent *cache;
7534         int reada_bits;
7535
7536         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7537                                     bits_nr, &reada_bits);
7538         if (nritems == 0)
7539                 return 1;
7540
7541         if (!reada_bits) {
7542                 for(i = 0; i < nritems; i++) {
7543                         ret = add_cache_extent(reada, bits[i].start,
7544                                                bits[i].size);
7545                         if (ret == -EEXIST)
7546                                 continue;
7547
7548                         /* fixme, get the parent transid */
7549                         readahead_tree_block(root, bits[i].start,
7550                                              bits[i].size, 0);
7551                 }
7552         }
7553         *last = bits[0].start;
7554         bytenr = bits[0].start;
7555         size = bits[0].size;
7556
7557         cache = lookup_cache_extent(pending, bytenr, size);
7558         if (cache) {
7559                 remove_cache_extent(pending, cache);
7560                 free(cache);
7561         }
7562         cache = lookup_cache_extent(reada, bytenr, size);
7563         if (cache) {
7564                 remove_cache_extent(reada, cache);
7565                 free(cache);
7566         }
7567         cache = lookup_cache_extent(nodes, bytenr, size);
7568         if (cache) {
7569                 remove_cache_extent(nodes, cache);
7570                 free(cache);
7571         }
7572         cache = lookup_cache_extent(extent_cache, bytenr, size);
7573         if (cache) {
7574                 rec = container_of(cache, struct extent_record, cache);
7575                 gen = rec->parent_generation;
7576         }
7577
7578         /* fixme, get the real parent transid */
7579         buf = read_tree_block(root, bytenr, size, gen);
7580         if (!extent_buffer_uptodate(buf)) {
7581                 record_bad_block_io(root->fs_info,
7582                                     extent_cache, bytenr, size);
7583                 goto out;
7584         }
7585
7586         nritems = btrfs_header_nritems(buf);
7587
7588         flags = 0;
7589         if (!init_extent_tree) {
7590                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7591                                        btrfs_header_level(buf), 1, NULL,
7592                                        &flags);
7593                 if (ret < 0) {
7594                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7595                         if (ret < 0) {
7596                                 fprintf(stderr, "Couldn't calc extent flags\n");
7597                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7598                         }
7599                 }
7600         } else {
7601                 flags = 0;
7602                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7603                 if (ret < 0) {
7604                         fprintf(stderr, "Couldn't calc extent flags\n");
7605                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7606                 }
7607         }
7608
7609         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7610                 if (ri != NULL &&
7611                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7612                     ri->objectid == btrfs_header_owner(buf)) {
7613                         /*
7614                          * Ok we got to this block from it's original owner and
7615                          * we have FULL_BACKREF set.  Relocation can leave
7616                          * converted blocks over so this is altogether possible,
7617                          * however it's not possible if the generation > the
7618                          * last snapshot, so check for this case.
7619                          */
7620                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7621                             btrfs_header_generation(buf) > ri->last_snapshot) {
7622                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7623                                 rec->bad_full_backref = 1;
7624                         }
7625                 }
7626         } else {
7627                 if (ri != NULL &&
7628                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7629                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7630                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7631                         rec->bad_full_backref = 1;
7632                 }
7633         }
7634
7635         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7636                 rec->flag_block_full_backref = 1;
7637                 parent = bytenr;
7638                 owner = 0;
7639         } else {
7640                 rec->flag_block_full_backref = 0;
7641                 parent = 0;
7642                 owner = btrfs_header_owner(buf);
7643         }
7644
7645         ret = check_block(root, extent_cache, buf, flags);
7646         if (ret)
7647                 goto out;
7648
7649         if (btrfs_is_leaf(buf)) {
7650                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7651                 for (i = 0; i < nritems; i++) {
7652                         struct btrfs_file_extent_item *fi;
7653                         btrfs_item_key_to_cpu(buf, &key, i);
7654                         /*
7655                          * Check key type against the leaf owner.
7656                          * Could filter quite a lot of early error if
7657                          * owner is correct
7658                          */
7659                         if (check_type_with_root(btrfs_header_owner(buf),
7660                                                  key.type)) {
7661                                 fprintf(stderr, "ignoring invalid key\n");
7662                                 continue;
7663                         }
7664                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7665                                 process_extent_item(root, extent_cache, buf,
7666                                                     i);
7667                                 continue;
7668                         }
7669                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7670                                 process_extent_item(root, extent_cache, buf,
7671                                                     i);
7672                                 continue;
7673                         }
7674                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7675                                 total_csum_bytes +=
7676                                         btrfs_item_size_nr(buf, i);
7677                                 continue;
7678                         }
7679                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7680                                 process_chunk_item(chunk_cache, &key, buf, i);
7681                                 continue;
7682                         }
7683                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7684                                 process_device_item(dev_cache, &key, buf, i);
7685                                 continue;
7686                         }
7687                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7688                                 process_block_group_item(block_group_cache,
7689                                         &key, buf, i);
7690                                 continue;
7691                         }
7692                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7693                                 process_device_extent_item(dev_extent_cache,
7694                                         &key, buf, i);
7695                                 continue;
7696
7697                         }
7698                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7699 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7700                                 process_extent_ref_v0(extent_cache, buf, i);
7701 #else
7702                                 BUG();
7703 #endif
7704                                 continue;
7705                         }
7706
7707                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7708                                 ret = add_tree_backref(extent_cache,
7709                                                 key.objectid, 0, key.offset, 0);
7710                                 if (ret < 0)
7711                                         error("add_tree_backref failed: %s",
7712                                               strerror(-ret));
7713                                 continue;
7714                         }
7715                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7716                                 ret = add_tree_backref(extent_cache,
7717                                                 key.objectid, key.offset, 0, 0);
7718                                 if (ret < 0)
7719                                         error("add_tree_backref failed: %s",
7720                                               strerror(-ret));
7721                                 continue;
7722                         }
7723                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7724                                 struct btrfs_extent_data_ref *ref;
7725                                 ref = btrfs_item_ptr(buf, i,
7726                                                 struct btrfs_extent_data_ref);
7727                                 add_data_backref(extent_cache,
7728                                         key.objectid, 0,
7729                                         btrfs_extent_data_ref_root(buf, ref),
7730                                         btrfs_extent_data_ref_objectid(buf,
7731                                                                        ref),
7732                                         btrfs_extent_data_ref_offset(buf, ref),
7733                                         btrfs_extent_data_ref_count(buf, ref),
7734                                         0, root->sectorsize);
7735                                 continue;
7736                         }
7737                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7738                                 struct btrfs_shared_data_ref *ref;
7739                                 ref = btrfs_item_ptr(buf, i,
7740                                                 struct btrfs_shared_data_ref);
7741                                 add_data_backref(extent_cache,
7742                                         key.objectid, key.offset, 0, 0, 0,
7743                                         btrfs_shared_data_ref_count(buf, ref),
7744                                         0, root->sectorsize);
7745                                 continue;
7746                         }
7747                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7748                                 struct bad_item *bad;
7749
7750                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7751                                         continue;
7752                                 if (!owner)
7753                                         continue;
7754                                 bad = malloc(sizeof(struct bad_item));
7755                                 if (!bad)
7756                                         continue;
7757                                 INIT_LIST_HEAD(&bad->list);
7758                                 memcpy(&bad->key, &key,
7759                                        sizeof(struct btrfs_key));
7760                                 bad->root_id = owner;
7761                                 list_add_tail(&bad->list, &delete_items);
7762                                 continue;
7763                         }
7764                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7765                                 continue;
7766                         fi = btrfs_item_ptr(buf, i,
7767                                             struct btrfs_file_extent_item);
7768                         if (btrfs_file_extent_type(buf, fi) ==
7769                             BTRFS_FILE_EXTENT_INLINE)
7770                                 continue;
7771                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7772                                 continue;
7773
7774                         data_bytes_allocated +=
7775                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7776                         if (data_bytes_allocated < root->sectorsize) {
7777                                 abort();
7778                         }
7779                         data_bytes_referenced +=
7780                                 btrfs_file_extent_num_bytes(buf, fi);
7781                         add_data_backref(extent_cache,
7782                                 btrfs_file_extent_disk_bytenr(buf, fi),
7783                                 parent, owner, key.objectid, key.offset -
7784                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7785                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7786                 }
7787         } else {
7788                 int level;
7789                 struct btrfs_key first_key;
7790
7791                 first_key.objectid = 0;
7792
7793                 if (nritems > 0)
7794                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7795                 level = btrfs_header_level(buf);
7796                 for (i = 0; i < nritems; i++) {
7797                         struct extent_record tmpl;
7798
7799                         ptr = btrfs_node_blockptr(buf, i);
7800                         size = root->nodesize;
7801                         btrfs_node_key_to_cpu(buf, &key, i);
7802                         if (ri != NULL) {
7803                                 if ((level == ri->drop_level)
7804                                     && is_dropped_key(&key, &ri->drop_key)) {
7805                                         continue;
7806                                 }
7807                         }
7808
7809                         memset(&tmpl, 0, sizeof(tmpl));
7810                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7811                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7812                         tmpl.start = ptr;
7813                         tmpl.nr = size;
7814                         tmpl.refs = 1;
7815                         tmpl.metadata = 1;
7816                         tmpl.max_size = size;
7817                         ret = add_extent_rec(extent_cache, &tmpl);
7818                         if (ret < 0)
7819                                 goto out;
7820
7821                         ret = add_tree_backref(extent_cache, ptr, parent,
7822                                         owner, 1);
7823                         if (ret < 0) {
7824                                 error("add_tree_backref failed: %s",
7825                                       strerror(-ret));
7826                                 continue;
7827                         }
7828
7829                         if (level > 1) {
7830                                 add_pending(nodes, seen, ptr, size);
7831                         } else {
7832                                 add_pending(pending, seen, ptr, size);
7833                         }
7834                 }
7835                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7836                                       nritems) * sizeof(struct btrfs_key_ptr);
7837         }
7838         total_btree_bytes += buf->len;
7839         if (fs_root_objectid(btrfs_header_owner(buf)))
7840                 total_fs_tree_bytes += buf->len;
7841         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7842                 total_extent_tree_bytes += buf->len;
7843         if (!found_old_backref &&
7844             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7845             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7846             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7847                 found_old_backref = 1;
7848 out:
7849         free_extent_buffer(buf);
7850         return ret;
7851 }
7852
7853 static int add_root_to_pending(struct extent_buffer *buf,
7854                                struct cache_tree *extent_cache,
7855                                struct cache_tree *pending,
7856                                struct cache_tree *seen,
7857                                struct cache_tree *nodes,
7858                                u64 objectid)
7859 {
7860         struct extent_record tmpl;
7861         int ret;
7862
7863         if (btrfs_header_level(buf) > 0)
7864                 add_pending(nodes, seen, buf->start, buf->len);
7865         else
7866                 add_pending(pending, seen, buf->start, buf->len);
7867
7868         memset(&tmpl, 0, sizeof(tmpl));
7869         tmpl.start = buf->start;
7870         tmpl.nr = buf->len;
7871         tmpl.is_root = 1;
7872         tmpl.refs = 1;
7873         tmpl.metadata = 1;
7874         tmpl.max_size = buf->len;
7875         add_extent_rec(extent_cache, &tmpl);
7876
7877         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7878             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7879                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7880                                 0, 1);
7881         else
7882                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7883                                 1);
7884         return ret;
7885 }
7886
7887 /* as we fix the tree, we might be deleting blocks that
7888  * we're tracking for repair.  This hook makes sure we
7889  * remove any backrefs for blocks as we are fixing them.
7890  */
7891 static int free_extent_hook(struct btrfs_trans_handle *trans,
7892                             struct btrfs_root *root,
7893                             u64 bytenr, u64 num_bytes, u64 parent,
7894                             u64 root_objectid, u64 owner, u64 offset,
7895                             int refs_to_drop)
7896 {
7897         struct extent_record *rec;
7898         struct cache_extent *cache;
7899         int is_data;
7900         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7901
7902         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7903         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7904         if (!cache)
7905                 return 0;
7906
7907         rec = container_of(cache, struct extent_record, cache);
7908         if (is_data) {
7909                 struct data_backref *back;
7910                 back = find_data_backref(rec, parent, root_objectid, owner,
7911                                          offset, 1, bytenr, num_bytes);
7912                 if (!back)
7913                         goto out;
7914                 if (back->node.found_ref) {
7915                         back->found_ref -= refs_to_drop;
7916                         if (rec->refs)
7917                                 rec->refs -= refs_to_drop;
7918                 }
7919                 if (back->node.found_extent_tree) {
7920                         back->num_refs -= refs_to_drop;
7921                         if (rec->extent_item_refs)
7922                                 rec->extent_item_refs -= refs_to_drop;
7923                 }
7924                 if (back->found_ref == 0)
7925                         back->node.found_ref = 0;
7926                 if (back->num_refs == 0)
7927                         back->node.found_extent_tree = 0;
7928
7929                 if (!back->node.found_extent_tree && back->node.found_ref) {
7930                         list_del(&back->node.list);
7931                         free(back);
7932                 }
7933         } else {
7934                 struct tree_backref *back;
7935                 back = find_tree_backref(rec, parent, root_objectid);
7936                 if (!back)
7937                         goto out;
7938                 if (back->node.found_ref) {
7939                         if (rec->refs)
7940                                 rec->refs--;
7941                         back->node.found_ref = 0;
7942                 }
7943                 if (back->node.found_extent_tree) {
7944                         if (rec->extent_item_refs)
7945                                 rec->extent_item_refs--;
7946                         back->node.found_extent_tree = 0;
7947                 }
7948                 if (!back->node.found_extent_tree && back->node.found_ref) {
7949                         list_del(&back->node.list);
7950                         free(back);
7951                 }
7952         }
7953         maybe_free_extent_rec(extent_cache, rec);
7954 out:
7955         return 0;
7956 }
7957
7958 static int delete_extent_records(struct btrfs_trans_handle *trans,
7959                                  struct btrfs_root *root,
7960                                  struct btrfs_path *path,
7961                                  u64 bytenr)
7962 {
7963         struct btrfs_key key;
7964         struct btrfs_key found_key;
7965         struct extent_buffer *leaf;
7966         int ret;
7967         int slot;
7968
7969
7970         key.objectid = bytenr;
7971         key.type = (u8)-1;
7972         key.offset = (u64)-1;
7973
7974         while(1) {
7975                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7976                                         &key, path, 0, 1);
7977                 if (ret < 0)
7978                         break;
7979
7980                 if (ret > 0) {
7981                         ret = 0;
7982                         if (path->slots[0] == 0)
7983                                 break;
7984                         path->slots[0]--;
7985                 }
7986                 ret = 0;
7987
7988                 leaf = path->nodes[0];
7989                 slot = path->slots[0];
7990
7991                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7992                 if (found_key.objectid != bytenr)
7993                         break;
7994
7995                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
7996                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
7997                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7998                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
7999                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8000                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8001                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8002                         btrfs_release_path(path);
8003                         if (found_key.type == 0) {
8004                                 if (found_key.offset == 0)
8005                                         break;
8006                                 key.offset = found_key.offset - 1;
8007                                 key.type = found_key.type;
8008                         }
8009                         key.type = found_key.type - 1;
8010                         key.offset = (u64)-1;
8011                         continue;
8012                 }
8013
8014                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8015                         found_key.objectid, found_key.type, found_key.offset);
8016
8017                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8018                 if (ret)
8019                         break;
8020                 btrfs_release_path(path);
8021
8022                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8023                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8024                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8025                                 found_key.offset : root->nodesize;
8026
8027                         ret = btrfs_update_block_group(trans, root, bytenr,
8028                                                        bytes, 0, 0);
8029                         if (ret)
8030                                 break;
8031                 }
8032         }
8033
8034         btrfs_release_path(path);
8035         return ret;
8036 }
8037
8038 /*
8039  * for a single backref, this will allocate a new extent
8040  * and add the backref to it.
8041  */
8042 static int record_extent(struct btrfs_trans_handle *trans,
8043                          struct btrfs_fs_info *info,
8044                          struct btrfs_path *path,
8045                          struct extent_record *rec,
8046                          struct extent_backref *back,
8047                          int allocated, u64 flags)
8048 {
8049         int ret = 0;
8050         struct btrfs_root *extent_root = info->extent_root;
8051         struct extent_buffer *leaf;
8052         struct btrfs_key ins_key;
8053         struct btrfs_extent_item *ei;
8054         struct data_backref *dback;
8055         struct btrfs_tree_block_info *bi;
8056
8057         if (!back->is_data)
8058                 rec->max_size = max_t(u64, rec->max_size,
8059                                     info->extent_root->nodesize);
8060
8061         if (!allocated) {
8062                 u32 item_size = sizeof(*ei);
8063
8064                 if (!back->is_data)
8065                         item_size += sizeof(*bi);
8066
8067                 ins_key.objectid = rec->start;
8068                 ins_key.offset = rec->max_size;
8069                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8070
8071                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8072                                         &ins_key, item_size);
8073                 if (ret)
8074                         goto fail;
8075
8076                 leaf = path->nodes[0];
8077                 ei = btrfs_item_ptr(leaf, path->slots[0],
8078                                     struct btrfs_extent_item);
8079
8080                 btrfs_set_extent_refs(leaf, ei, 0);
8081                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8082
8083                 if (back->is_data) {
8084                         btrfs_set_extent_flags(leaf, ei,
8085                                                BTRFS_EXTENT_FLAG_DATA);
8086                 } else {
8087                         struct btrfs_disk_key copy_key;;
8088
8089                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8090                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8091                                              sizeof(*bi));
8092
8093                         btrfs_set_disk_key_objectid(&copy_key,
8094                                                     rec->info_objectid);
8095                         btrfs_set_disk_key_type(&copy_key, 0);
8096                         btrfs_set_disk_key_offset(&copy_key, 0);
8097
8098                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8099                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8100
8101                         btrfs_set_extent_flags(leaf, ei,
8102                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8103                 }
8104
8105                 btrfs_mark_buffer_dirty(leaf);
8106                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8107                                                rec->max_size, 1, 0);
8108                 if (ret)
8109                         goto fail;
8110                 btrfs_release_path(path);
8111         }
8112
8113         if (back->is_data) {
8114                 u64 parent;
8115                 int i;
8116
8117                 dback = to_data_backref(back);
8118                 if (back->full_backref)
8119                         parent = dback->parent;
8120                 else
8121                         parent = 0;
8122
8123                 for (i = 0; i < dback->found_ref; i++) {
8124                         /* if parent != 0, we're doing a full backref
8125                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8126                          * just makes the backref allocator create a data
8127                          * backref
8128                          */
8129                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8130                                                    rec->start, rec->max_size,
8131                                                    parent,
8132                                                    dback->root,
8133                                                    parent ?
8134                                                    BTRFS_FIRST_FREE_OBJECTID :
8135                                                    dback->owner,
8136                                                    dback->offset);
8137                         if (ret)
8138                                 break;
8139                 }
8140                 fprintf(stderr, "adding new data backref"
8141                                 " on %llu %s %llu owner %llu"
8142                                 " offset %llu found %d\n",
8143                                 (unsigned long long)rec->start,
8144                                 back->full_backref ?
8145                                 "parent" : "root",
8146                                 back->full_backref ?
8147                                 (unsigned long long)parent :
8148                                 (unsigned long long)dback->root,
8149                                 (unsigned long long)dback->owner,
8150                                 (unsigned long long)dback->offset,
8151                                 dback->found_ref);
8152         } else {
8153                 u64 parent;
8154                 struct tree_backref *tback;
8155
8156                 tback = to_tree_backref(back);
8157                 if (back->full_backref)
8158                         parent = tback->parent;
8159                 else
8160                         parent = 0;
8161
8162                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8163                                            rec->start, rec->max_size,
8164                                            parent, tback->root, 0, 0);
8165                 fprintf(stderr, "adding new tree backref on "
8166                         "start %llu len %llu parent %llu root %llu\n",
8167                         rec->start, rec->max_size, parent, tback->root);
8168         }
8169 fail:
8170         btrfs_release_path(path);
8171         return ret;
8172 }
8173
8174 static struct extent_entry *find_entry(struct list_head *entries,
8175                                        u64 bytenr, u64 bytes)
8176 {
8177         struct extent_entry *entry = NULL;
8178
8179         list_for_each_entry(entry, entries, list) {
8180                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8181                         return entry;
8182         }
8183
8184         return NULL;
8185 }
8186
8187 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8188 {
8189         struct extent_entry *entry, *best = NULL, *prev = NULL;
8190
8191         list_for_each_entry(entry, entries, list) {
8192                 /*
8193                  * If there are as many broken entries as entries then we know
8194                  * not to trust this particular entry.
8195                  */
8196                 if (entry->broken == entry->count)
8197                         continue;
8198
8199                 /*
8200                  * Special case, when there are only two entries and 'best' is
8201                  * the first one
8202                  */
8203                 if (!prev) {
8204                         best = entry;
8205                         prev = entry;
8206                         continue;
8207                 }
8208
8209                 /*
8210                  * If our current entry == best then we can't be sure our best
8211                  * is really the best, so we need to keep searching.
8212                  */
8213                 if (best && best->count == entry->count) {
8214                         prev = entry;
8215                         best = NULL;
8216                         continue;
8217                 }
8218
8219                 /* Prev == entry, not good enough, have to keep searching */
8220                 if (!prev->broken && prev->count == entry->count)
8221                         continue;
8222
8223                 if (!best)
8224                         best = (prev->count > entry->count) ? prev : entry;
8225                 else if (best->count < entry->count)
8226                         best = entry;
8227                 prev = entry;
8228         }
8229
8230         return best;
8231 }
8232
8233 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8234                       struct data_backref *dback, struct extent_entry *entry)
8235 {
8236         struct btrfs_trans_handle *trans;
8237         struct btrfs_root *root;
8238         struct btrfs_file_extent_item *fi;
8239         struct extent_buffer *leaf;
8240         struct btrfs_key key;
8241         u64 bytenr, bytes;
8242         int ret, err;
8243
8244         key.objectid = dback->root;
8245         key.type = BTRFS_ROOT_ITEM_KEY;
8246         key.offset = (u64)-1;
8247         root = btrfs_read_fs_root(info, &key);
8248         if (IS_ERR(root)) {
8249                 fprintf(stderr, "Couldn't find root for our ref\n");
8250                 return -EINVAL;
8251         }
8252
8253         /*
8254          * The backref points to the original offset of the extent if it was
8255          * split, so we need to search down to the offset we have and then walk
8256          * forward until we find the backref we're looking for.
8257          */
8258         key.objectid = dback->owner;
8259         key.type = BTRFS_EXTENT_DATA_KEY;
8260         key.offset = dback->offset;
8261         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8262         if (ret < 0) {
8263                 fprintf(stderr, "Error looking up ref %d\n", ret);
8264                 return ret;
8265         }
8266
8267         while (1) {
8268                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8269                         ret = btrfs_next_leaf(root, path);
8270                         if (ret) {
8271                                 fprintf(stderr, "Couldn't find our ref, next\n");
8272                                 return -EINVAL;
8273                         }
8274                 }
8275                 leaf = path->nodes[0];
8276                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8277                 if (key.objectid != dback->owner ||
8278                     key.type != BTRFS_EXTENT_DATA_KEY) {
8279                         fprintf(stderr, "Couldn't find our ref, search\n");
8280                         return -EINVAL;
8281                 }
8282                 fi = btrfs_item_ptr(leaf, path->slots[0],
8283                                     struct btrfs_file_extent_item);
8284                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8285                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8286
8287                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8288                         break;
8289                 path->slots[0]++;
8290         }
8291
8292         btrfs_release_path(path);
8293
8294         trans = btrfs_start_transaction(root, 1);
8295         if (IS_ERR(trans))
8296                 return PTR_ERR(trans);
8297
8298         /*
8299          * Ok we have the key of the file extent we want to fix, now we can cow
8300          * down to the thing and fix it.
8301          */
8302         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8303         if (ret < 0) {
8304                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8305                         key.objectid, key.type, key.offset, ret);
8306                 goto out;
8307         }
8308         if (ret > 0) {
8309                 fprintf(stderr, "Well that's odd, we just found this key "
8310                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8311                         key.offset);
8312                 ret = -EINVAL;
8313                 goto out;
8314         }
8315         leaf = path->nodes[0];
8316         fi = btrfs_item_ptr(leaf, path->slots[0],
8317                             struct btrfs_file_extent_item);
8318
8319         if (btrfs_file_extent_compression(leaf, fi) &&
8320             dback->disk_bytenr != entry->bytenr) {
8321                 fprintf(stderr, "Ref doesn't match the record start and is "
8322                         "compressed, please take a btrfs-image of this file "
8323                         "system and send it to a btrfs developer so they can "
8324                         "complete this functionality for bytenr %Lu\n",
8325                         dback->disk_bytenr);
8326                 ret = -EINVAL;
8327                 goto out;
8328         }
8329
8330         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8331                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8332         } else if (dback->disk_bytenr > entry->bytenr) {
8333                 u64 off_diff, offset;
8334
8335                 off_diff = dback->disk_bytenr - entry->bytenr;
8336                 offset = btrfs_file_extent_offset(leaf, fi);
8337                 if (dback->disk_bytenr + offset +
8338                     btrfs_file_extent_num_bytes(leaf, fi) >
8339                     entry->bytenr + entry->bytes) {
8340                         fprintf(stderr, "Ref is past the entry end, please "
8341                                 "take a btrfs-image of this file system and "
8342                                 "send it to a btrfs developer, ref %Lu\n",
8343                                 dback->disk_bytenr);
8344                         ret = -EINVAL;
8345                         goto out;
8346                 }
8347                 offset += off_diff;
8348                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8349                 btrfs_set_file_extent_offset(leaf, fi, offset);
8350         } else if (dback->disk_bytenr < entry->bytenr) {
8351                 u64 offset;
8352
8353                 offset = btrfs_file_extent_offset(leaf, fi);
8354                 if (dback->disk_bytenr + offset < entry->bytenr) {
8355                         fprintf(stderr, "Ref is before the entry start, please"
8356                                 " take a btrfs-image of this file system and "
8357                                 "send it to a btrfs developer, ref %Lu\n",
8358                                 dback->disk_bytenr);
8359                         ret = -EINVAL;
8360                         goto out;
8361                 }
8362
8363                 offset += dback->disk_bytenr;
8364                 offset -= entry->bytenr;
8365                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8366                 btrfs_set_file_extent_offset(leaf, fi, offset);
8367         }
8368
8369         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8370
8371         /*
8372          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8373          * only do this if we aren't using compression, otherwise it's a
8374          * trickier case.
8375          */
8376         if (!btrfs_file_extent_compression(leaf, fi))
8377                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8378         else
8379                 printf("ram bytes may be wrong?\n");
8380         btrfs_mark_buffer_dirty(leaf);
8381 out:
8382         err = btrfs_commit_transaction(trans, root);
8383         btrfs_release_path(path);
8384         return ret ? ret : err;
8385 }
8386
8387 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8388                            struct extent_record *rec)
8389 {
8390         struct extent_backref *back;
8391         struct data_backref *dback;
8392         struct extent_entry *entry, *best = NULL;
8393         LIST_HEAD(entries);
8394         int nr_entries = 0;
8395         int broken_entries = 0;
8396         int ret = 0;
8397         short mismatch = 0;
8398
8399         /*
8400          * Metadata is easy and the backrefs should always agree on bytenr and
8401          * size, if not we've got bigger issues.
8402          */
8403         if (rec->metadata)
8404                 return 0;
8405
8406         list_for_each_entry(back, &rec->backrefs, list) {
8407                 if (back->full_backref || !back->is_data)
8408                         continue;
8409
8410                 dback = to_data_backref(back);
8411
8412                 /*
8413                  * We only pay attention to backrefs that we found a real
8414                  * backref for.
8415                  */
8416                 if (dback->found_ref == 0)
8417                         continue;
8418
8419                 /*
8420                  * For now we only catch when the bytes don't match, not the
8421                  * bytenr.  We can easily do this at the same time, but I want
8422                  * to have a fs image to test on before we just add repair
8423                  * functionality willy-nilly so we know we won't screw up the
8424                  * repair.
8425                  */
8426
8427                 entry = find_entry(&entries, dback->disk_bytenr,
8428                                    dback->bytes);
8429                 if (!entry) {
8430                         entry = malloc(sizeof(struct extent_entry));
8431                         if (!entry) {
8432                                 ret = -ENOMEM;
8433                                 goto out;
8434                         }
8435                         memset(entry, 0, sizeof(*entry));
8436                         entry->bytenr = dback->disk_bytenr;
8437                         entry->bytes = dback->bytes;
8438                         list_add_tail(&entry->list, &entries);
8439                         nr_entries++;
8440                 }
8441
8442                 /*
8443                  * If we only have on entry we may think the entries agree when
8444                  * in reality they don't so we have to do some extra checking.
8445                  */
8446                 if (dback->disk_bytenr != rec->start ||
8447                     dback->bytes != rec->nr || back->broken)
8448                         mismatch = 1;
8449
8450                 if (back->broken) {
8451                         entry->broken++;
8452                         broken_entries++;
8453                 }
8454
8455                 entry->count++;
8456         }
8457
8458         /* Yay all the backrefs agree, carry on good sir */
8459         if (nr_entries <= 1 && !mismatch)
8460                 goto out;
8461
8462         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8463                 "%Lu\n", rec->start);
8464
8465         /*
8466          * First we want to see if the backrefs can agree amongst themselves who
8467          * is right, so figure out which one of the entries has the highest
8468          * count.
8469          */
8470         best = find_most_right_entry(&entries);
8471
8472         /*
8473          * Ok so we may have an even split between what the backrefs think, so
8474          * this is where we use the extent ref to see what it thinks.
8475          */
8476         if (!best) {
8477                 entry = find_entry(&entries, rec->start, rec->nr);
8478                 if (!entry && (!broken_entries || !rec->found_rec)) {
8479                         fprintf(stderr, "Backrefs don't agree with each other "
8480                                 "and extent record doesn't agree with anybody,"
8481                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8482                                 rec->start, rec->nr);
8483                         ret = -EINVAL;
8484                         goto out;
8485                 } else if (!entry) {
8486                         /*
8487                          * Ok our backrefs were broken, we'll assume this is the
8488                          * correct value and add an entry for this range.
8489                          */
8490                         entry = malloc(sizeof(struct extent_entry));
8491                         if (!entry) {
8492                                 ret = -ENOMEM;
8493                                 goto out;
8494                         }
8495                         memset(entry, 0, sizeof(*entry));
8496                         entry->bytenr = rec->start;
8497                         entry->bytes = rec->nr;
8498                         list_add_tail(&entry->list, &entries);
8499                         nr_entries++;
8500                 }
8501                 entry->count++;
8502                 best = find_most_right_entry(&entries);
8503                 if (!best) {
8504                         fprintf(stderr, "Backrefs and extent record evenly "
8505                                 "split on who is right, this is going to "
8506                                 "require user input to fix bytenr %Lu bytes "
8507                                 "%Lu\n", rec->start, rec->nr);
8508                         ret = -EINVAL;
8509                         goto out;
8510                 }
8511         }
8512
8513         /*
8514          * I don't think this can happen currently as we'll abort() if we catch
8515          * this case higher up, but in case somebody removes that we still can't
8516          * deal with it properly here yet, so just bail out of that's the case.
8517          */
8518         if (best->bytenr != rec->start) {
8519                 fprintf(stderr, "Extent start and backref starts don't match, "
8520                         "please use btrfs-image on this file system and send "
8521                         "it to a btrfs developer so they can make fsck fix "
8522                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8523                         rec->start, rec->nr);
8524                 ret = -EINVAL;
8525                 goto out;
8526         }
8527
8528         /*
8529          * Ok great we all agreed on an extent record, let's go find the real
8530          * references and fix up the ones that don't match.
8531          */
8532         list_for_each_entry(back, &rec->backrefs, list) {
8533                 if (back->full_backref || !back->is_data)
8534                         continue;
8535
8536                 dback = to_data_backref(back);
8537
8538                 /*
8539                  * Still ignoring backrefs that don't have a real ref attached
8540                  * to them.
8541                  */
8542                 if (dback->found_ref == 0)
8543                         continue;
8544
8545                 if (dback->bytes == best->bytes &&
8546                     dback->disk_bytenr == best->bytenr)
8547                         continue;
8548
8549                 ret = repair_ref(info, path, dback, best);
8550                 if (ret)
8551                         goto out;
8552         }
8553
8554         /*
8555          * Ok we messed with the actual refs, which means we need to drop our
8556          * entire cache and go back and rescan.  I know this is a huge pain and
8557          * adds a lot of extra work, but it's the only way to be safe.  Once all
8558          * the backrefs agree we may not need to do anything to the extent
8559          * record itself.
8560          */
8561         ret = -EAGAIN;
8562 out:
8563         while (!list_empty(&entries)) {
8564                 entry = list_entry(entries.next, struct extent_entry, list);
8565                 list_del_init(&entry->list);
8566                 free(entry);
8567         }
8568         return ret;
8569 }
8570
8571 static int process_duplicates(struct btrfs_root *root,
8572                               struct cache_tree *extent_cache,
8573                               struct extent_record *rec)
8574 {
8575         struct extent_record *good, *tmp;
8576         struct cache_extent *cache;
8577         int ret;
8578
8579         /*
8580          * If we found a extent record for this extent then return, or if we
8581          * have more than one duplicate we are likely going to need to delete
8582          * something.
8583          */
8584         if (rec->found_rec || rec->num_duplicates > 1)
8585                 return 0;
8586
8587         /* Shouldn't happen but just in case */
8588         BUG_ON(!rec->num_duplicates);
8589
8590         /*
8591          * So this happens if we end up with a backref that doesn't match the
8592          * actual extent entry.  So either the backref is bad or the extent
8593          * entry is bad.  Either way we want to have the extent_record actually
8594          * reflect what we found in the extent_tree, so we need to take the
8595          * duplicate out and use that as the extent_record since the only way we
8596          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8597          */
8598         remove_cache_extent(extent_cache, &rec->cache);
8599
8600         good = to_extent_record(rec->dups.next);
8601         list_del_init(&good->list);
8602         INIT_LIST_HEAD(&good->backrefs);
8603         INIT_LIST_HEAD(&good->dups);
8604         good->cache.start = good->start;
8605         good->cache.size = good->nr;
8606         good->content_checked = 0;
8607         good->owner_ref_checked = 0;
8608         good->num_duplicates = 0;
8609         good->refs = rec->refs;
8610         list_splice_init(&rec->backrefs, &good->backrefs);
8611         while (1) {
8612                 cache = lookup_cache_extent(extent_cache, good->start,
8613                                             good->nr);
8614                 if (!cache)
8615                         break;
8616                 tmp = container_of(cache, struct extent_record, cache);
8617
8618                 /*
8619                  * If we find another overlapping extent and it's found_rec is
8620                  * set then it's a duplicate and we need to try and delete
8621                  * something.
8622                  */
8623                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8624                         if (list_empty(&good->list))
8625                                 list_add_tail(&good->list,
8626                                               &duplicate_extents);
8627                         good->num_duplicates += tmp->num_duplicates + 1;
8628                         list_splice_init(&tmp->dups, &good->dups);
8629                         list_del_init(&tmp->list);
8630                         list_add_tail(&tmp->list, &good->dups);
8631                         remove_cache_extent(extent_cache, &tmp->cache);
8632                         continue;
8633                 }
8634
8635                 /*
8636                  * Ok we have another non extent item backed extent rec, so lets
8637                  * just add it to this extent and carry on like we did above.
8638                  */
8639                 good->refs += tmp->refs;
8640                 list_splice_init(&tmp->backrefs, &good->backrefs);
8641                 remove_cache_extent(extent_cache, &tmp->cache);
8642                 free(tmp);
8643         }
8644         ret = insert_cache_extent(extent_cache, &good->cache);
8645         BUG_ON(ret);
8646         free(rec);
8647         return good->num_duplicates ? 0 : 1;
8648 }
8649
8650 static int delete_duplicate_records(struct btrfs_root *root,
8651                                     struct extent_record *rec)
8652 {
8653         struct btrfs_trans_handle *trans;
8654         LIST_HEAD(delete_list);
8655         struct btrfs_path path;
8656         struct extent_record *tmp, *good, *n;
8657         int nr_del = 0;
8658         int ret = 0, err;
8659         struct btrfs_key key;
8660
8661         btrfs_init_path(&path);
8662
8663         good = rec;
8664         /* Find the record that covers all of the duplicates. */
8665         list_for_each_entry(tmp, &rec->dups, list) {
8666                 if (good->start < tmp->start)
8667                         continue;
8668                 if (good->nr > tmp->nr)
8669                         continue;
8670
8671                 if (tmp->start + tmp->nr < good->start + good->nr) {
8672                         fprintf(stderr, "Ok we have overlapping extents that "
8673                                 "aren't completely covered by each other, this "
8674                                 "is going to require more careful thought.  "
8675                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8676                                 tmp->start, tmp->nr, good->start, good->nr);
8677                         abort();
8678                 }
8679                 good = tmp;
8680         }
8681
8682         if (good != rec)
8683                 list_add_tail(&rec->list, &delete_list);
8684
8685         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8686                 if (tmp == good)
8687                         continue;
8688                 list_move_tail(&tmp->list, &delete_list);
8689         }
8690
8691         root = root->fs_info->extent_root;
8692         trans = btrfs_start_transaction(root, 1);
8693         if (IS_ERR(trans)) {
8694                 ret = PTR_ERR(trans);
8695                 goto out;
8696         }
8697
8698         list_for_each_entry(tmp, &delete_list, list) {
8699                 if (tmp->found_rec == 0)
8700                         continue;
8701                 key.objectid = tmp->start;
8702                 key.type = BTRFS_EXTENT_ITEM_KEY;
8703                 key.offset = tmp->nr;
8704
8705                 /* Shouldn't happen but just in case */
8706                 if (tmp->metadata) {
8707                         fprintf(stderr, "Well this shouldn't happen, extent "
8708                                 "record overlaps but is metadata? "
8709                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8710                         abort();
8711                 }
8712
8713                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8714                 if (ret) {
8715                         if (ret > 0)
8716                                 ret = -EINVAL;
8717                         break;
8718                 }
8719                 ret = btrfs_del_item(trans, root, &path);
8720                 if (ret)
8721                         break;
8722                 btrfs_release_path(&path);
8723                 nr_del++;
8724         }
8725         err = btrfs_commit_transaction(trans, root);
8726         if (err && !ret)
8727                 ret = err;
8728 out:
8729         while (!list_empty(&delete_list)) {
8730                 tmp = to_extent_record(delete_list.next);
8731                 list_del_init(&tmp->list);
8732                 if (tmp == rec)
8733                         continue;
8734                 free(tmp);
8735         }
8736
8737         while (!list_empty(&rec->dups)) {
8738                 tmp = to_extent_record(rec->dups.next);
8739                 list_del_init(&tmp->list);
8740                 free(tmp);
8741         }
8742
8743         btrfs_release_path(&path);
8744
8745         if (!ret && !nr_del)
8746                 rec->num_duplicates = 0;
8747
8748         return ret ? ret : nr_del;
8749 }
8750
8751 static int find_possible_backrefs(struct btrfs_fs_info *info,
8752                                   struct btrfs_path *path,
8753                                   struct cache_tree *extent_cache,
8754                                   struct extent_record *rec)
8755 {
8756         struct btrfs_root *root;
8757         struct extent_backref *back;
8758         struct data_backref *dback;
8759         struct cache_extent *cache;
8760         struct btrfs_file_extent_item *fi;
8761         struct btrfs_key key;
8762         u64 bytenr, bytes;
8763         int ret;
8764
8765         list_for_each_entry(back, &rec->backrefs, list) {
8766                 /* Don't care about full backrefs (poor unloved backrefs) */
8767                 if (back->full_backref || !back->is_data)
8768                         continue;
8769
8770                 dback = to_data_backref(back);
8771
8772                 /* We found this one, we don't need to do a lookup */
8773                 if (dback->found_ref)
8774                         continue;
8775
8776                 key.objectid = dback->root;
8777                 key.type = BTRFS_ROOT_ITEM_KEY;
8778                 key.offset = (u64)-1;
8779
8780                 root = btrfs_read_fs_root(info, &key);
8781
8782                 /* No root, definitely a bad ref, skip */
8783                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8784                         continue;
8785                 /* Other err, exit */
8786                 if (IS_ERR(root))
8787                         return PTR_ERR(root);
8788
8789                 key.objectid = dback->owner;
8790                 key.type = BTRFS_EXTENT_DATA_KEY;
8791                 key.offset = dback->offset;
8792                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8793                 if (ret) {
8794                         btrfs_release_path(path);
8795                         if (ret < 0)
8796                                 return ret;
8797                         /* Didn't find it, we can carry on */
8798                         ret = 0;
8799                         continue;
8800                 }
8801
8802                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8803                                     struct btrfs_file_extent_item);
8804                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8805                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8806                 btrfs_release_path(path);
8807                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8808                 if (cache) {
8809                         struct extent_record *tmp;
8810                         tmp = container_of(cache, struct extent_record, cache);
8811
8812                         /*
8813                          * If we found an extent record for the bytenr for this
8814                          * particular backref then we can't add it to our
8815                          * current extent record.  We only want to add backrefs
8816                          * that don't have a corresponding extent item in the
8817                          * extent tree since they likely belong to this record
8818                          * and we need to fix it if it doesn't match bytenrs.
8819                          */
8820                         if  (tmp->found_rec)
8821                                 continue;
8822                 }
8823
8824                 dback->found_ref += 1;
8825                 dback->disk_bytenr = bytenr;
8826                 dback->bytes = bytes;
8827
8828                 /*
8829                  * Set this so the verify backref code knows not to trust the
8830                  * values in this backref.
8831                  */
8832                 back->broken = 1;
8833         }
8834
8835         return 0;
8836 }
8837
8838 /*
8839  * Record orphan data ref into corresponding root.
8840  *
8841  * Return 0 if the extent item contains data ref and recorded.
8842  * Return 1 if the extent item contains no useful data ref
8843  *   On that case, it may contains only shared_dataref or metadata backref
8844  *   or the file extent exists(this should be handled by the extent bytenr
8845  *   recovery routine)
8846  * Return <0 if something goes wrong.
8847  */
8848 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8849                                       struct extent_record *rec)
8850 {
8851         struct btrfs_key key;
8852         struct btrfs_root *dest_root;
8853         struct extent_backref *back;
8854         struct data_backref *dback;
8855         struct orphan_data_extent *orphan;
8856         struct btrfs_path path;
8857         int recorded_data_ref = 0;
8858         int ret = 0;
8859
8860         if (rec->metadata)
8861                 return 1;
8862         btrfs_init_path(&path);
8863         list_for_each_entry(back, &rec->backrefs, list) {
8864                 if (back->full_backref || !back->is_data ||
8865                     !back->found_extent_tree)
8866                         continue;
8867                 dback = to_data_backref(back);
8868                 if (dback->found_ref)
8869                         continue;
8870                 key.objectid = dback->root;
8871                 key.type = BTRFS_ROOT_ITEM_KEY;
8872                 key.offset = (u64)-1;
8873
8874                 dest_root = btrfs_read_fs_root(fs_info, &key);
8875
8876                 /* For non-exist root we just skip it */
8877                 if (IS_ERR(dest_root) || !dest_root)
8878                         continue;
8879
8880                 key.objectid = dback->owner;
8881                 key.type = BTRFS_EXTENT_DATA_KEY;
8882                 key.offset = dback->offset;
8883
8884                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8885                 btrfs_release_path(&path);
8886                 /*
8887                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8888                  * we need to record it for inode/file extent rebuild.
8889                  * For ret > 0, we record it only for file extent rebuild.
8890                  * For ret == 0, the file extent exists but only bytenr
8891                  * mismatch, let the original bytenr fix routine to handle,
8892                  * don't record it.
8893                  */
8894                 if (ret == 0)
8895                         continue;
8896                 ret = 0;
8897                 orphan = malloc(sizeof(*orphan));
8898                 if (!orphan) {
8899                         ret = -ENOMEM;
8900                         goto out;
8901                 }
8902                 INIT_LIST_HEAD(&orphan->list);
8903                 orphan->root = dback->root;
8904                 orphan->objectid = dback->owner;
8905                 orphan->offset = dback->offset;
8906                 orphan->disk_bytenr = rec->cache.start;
8907                 orphan->disk_len = rec->cache.size;
8908                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8909                 recorded_data_ref = 1;
8910         }
8911 out:
8912         btrfs_release_path(&path);
8913         if (!ret)
8914                 return !recorded_data_ref;
8915         else
8916                 return ret;
8917 }
8918
8919 /*
8920  * when an incorrect extent item is found, this will delete
8921  * all of the existing entries for it and recreate them
8922  * based on what the tree scan found.
8923  */
8924 static int fixup_extent_refs(struct btrfs_fs_info *info,
8925                              struct cache_tree *extent_cache,
8926                              struct extent_record *rec)
8927 {
8928         struct btrfs_trans_handle *trans = NULL;
8929         int ret;
8930         struct btrfs_path path;
8931         struct list_head *cur = rec->backrefs.next;
8932         struct cache_extent *cache;
8933         struct extent_backref *back;
8934         int allocated = 0;
8935         u64 flags = 0;
8936
8937         if (rec->flag_block_full_backref)
8938                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8939
8940         btrfs_init_path(&path);
8941         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8942                 /*
8943                  * Sometimes the backrefs themselves are so broken they don't
8944                  * get attached to any meaningful rec, so first go back and
8945                  * check any of our backrefs that we couldn't find and throw
8946                  * them into the list if we find the backref so that
8947                  * verify_backrefs can figure out what to do.
8948                  */
8949                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8950                 if (ret < 0)
8951                         goto out;
8952         }
8953
8954         /* step one, make sure all of the backrefs agree */
8955         ret = verify_backrefs(info, &path, rec);
8956         if (ret < 0)
8957                 goto out;
8958
8959         trans = btrfs_start_transaction(info->extent_root, 1);
8960         if (IS_ERR(trans)) {
8961                 ret = PTR_ERR(trans);
8962                 goto out;
8963         }
8964
8965         /* step two, delete all the existing records */
8966         ret = delete_extent_records(trans, info->extent_root, &path,
8967                                     rec->start);
8968
8969         if (ret < 0)
8970                 goto out;
8971
8972         /* was this block corrupt?  If so, don't add references to it */
8973         cache = lookup_cache_extent(info->corrupt_blocks,
8974                                     rec->start, rec->max_size);
8975         if (cache) {
8976                 ret = 0;
8977                 goto out;
8978         }
8979
8980         /* step three, recreate all the refs we did find */
8981         while(cur != &rec->backrefs) {
8982                 back = to_extent_backref(cur);
8983                 cur = cur->next;
8984
8985                 /*
8986                  * if we didn't find any references, don't create a
8987                  * new extent record
8988                  */
8989                 if (!back->found_ref)
8990                         continue;
8991
8992                 rec->bad_full_backref = 0;
8993                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
8994                 allocated = 1;
8995
8996                 if (ret)
8997                         goto out;
8998         }
8999 out:
9000         if (trans) {
9001                 int err = btrfs_commit_transaction(trans, info->extent_root);
9002                 if (!ret)
9003                         ret = err;
9004         }
9005
9006         if (!ret)
9007                 fprintf(stderr, "Repaired extent references for %llu\n",
9008                                 (unsigned long long)rec->start);
9009
9010         btrfs_release_path(&path);
9011         return ret;
9012 }
9013
9014 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9015                               struct extent_record *rec)
9016 {
9017         struct btrfs_trans_handle *trans;
9018         struct btrfs_root *root = fs_info->extent_root;
9019         struct btrfs_path path;
9020         struct btrfs_extent_item *ei;
9021         struct btrfs_key key;
9022         u64 flags;
9023         int ret = 0;
9024
9025         key.objectid = rec->start;
9026         if (rec->metadata) {
9027                 key.type = BTRFS_METADATA_ITEM_KEY;
9028                 key.offset = rec->info_level;
9029         } else {
9030                 key.type = BTRFS_EXTENT_ITEM_KEY;
9031                 key.offset = rec->max_size;
9032         }
9033
9034         trans = btrfs_start_transaction(root, 0);
9035         if (IS_ERR(trans))
9036                 return PTR_ERR(trans);
9037
9038         btrfs_init_path(&path);
9039         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9040         if (ret < 0) {
9041                 btrfs_release_path(&path);
9042                 btrfs_commit_transaction(trans, root);
9043                 return ret;
9044         } else if (ret) {
9045                 fprintf(stderr, "Didn't find extent for %llu\n",
9046                         (unsigned long long)rec->start);
9047                 btrfs_release_path(&path);
9048                 btrfs_commit_transaction(trans, root);
9049                 return -ENOENT;
9050         }
9051
9052         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9053                             struct btrfs_extent_item);
9054         flags = btrfs_extent_flags(path.nodes[0], ei);
9055         if (rec->flag_block_full_backref) {
9056                 fprintf(stderr, "setting full backref on %llu\n",
9057                         (unsigned long long)key.objectid);
9058                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9059         } else {
9060                 fprintf(stderr, "clearing full backref on %llu\n",
9061                         (unsigned long long)key.objectid);
9062                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9063         }
9064         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9065         btrfs_mark_buffer_dirty(path.nodes[0]);
9066         btrfs_release_path(&path);
9067         ret = btrfs_commit_transaction(trans, root);
9068         if (!ret)
9069                 fprintf(stderr, "Repaired extent flags for %llu\n",
9070                                 (unsigned long long)rec->start);
9071
9072         return ret;
9073 }
9074
9075 /* right now we only prune from the extent allocation tree */
9076 static int prune_one_block(struct btrfs_trans_handle *trans,
9077                            struct btrfs_fs_info *info,
9078                            struct btrfs_corrupt_block *corrupt)
9079 {
9080         int ret;
9081         struct btrfs_path path;
9082         struct extent_buffer *eb;
9083         u64 found;
9084         int slot;
9085         int nritems;
9086         int level = corrupt->level + 1;
9087
9088         btrfs_init_path(&path);
9089 again:
9090         /* we want to stop at the parent to our busted block */
9091         path.lowest_level = level;
9092
9093         ret = btrfs_search_slot(trans, info->extent_root,
9094                                 &corrupt->key, &path, -1, 1);
9095
9096         if (ret < 0)
9097                 goto out;
9098
9099         eb = path.nodes[level];
9100         if (!eb) {
9101                 ret = -ENOENT;
9102                 goto out;
9103         }
9104
9105         /*
9106          * hopefully the search gave us the block we want to prune,
9107          * lets try that first
9108          */
9109         slot = path.slots[level];
9110         found =  btrfs_node_blockptr(eb, slot);
9111         if (found == corrupt->cache.start)
9112                 goto del_ptr;
9113
9114         nritems = btrfs_header_nritems(eb);
9115
9116         /* the search failed, lets scan this node and hope we find it */
9117         for (slot = 0; slot < nritems; slot++) {
9118                 found =  btrfs_node_blockptr(eb, slot);
9119                 if (found == corrupt->cache.start)
9120                         goto del_ptr;
9121         }
9122         /*
9123          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9124          * to this block
9125          */
9126         if (eb == info->extent_root->node) {
9127                 ret = -ENOENT;
9128                 goto out;
9129         } else {
9130                 level++;
9131                 btrfs_release_path(&path);
9132                 goto again;
9133         }
9134
9135 del_ptr:
9136         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9137         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9138
9139 out:
9140         btrfs_release_path(&path);
9141         return ret;
9142 }
9143
9144 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9145 {
9146         struct btrfs_trans_handle *trans = NULL;
9147         struct cache_extent *cache;
9148         struct btrfs_corrupt_block *corrupt;
9149
9150         while (1) {
9151                 cache = search_cache_extent(info->corrupt_blocks, 0);
9152                 if (!cache)
9153                         break;
9154                 if (!trans) {
9155                         trans = btrfs_start_transaction(info->extent_root, 1);
9156                         if (IS_ERR(trans))
9157                                 return PTR_ERR(trans);
9158                 }
9159                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9160                 prune_one_block(trans, info, corrupt);
9161                 remove_cache_extent(info->corrupt_blocks, cache);
9162         }
9163         if (trans)
9164                 return btrfs_commit_transaction(trans, info->extent_root);
9165         return 0;
9166 }
9167
9168 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9169 {
9170         struct btrfs_block_group_cache *cache;
9171         u64 start, end;
9172         int ret;
9173
9174         while (1) {
9175                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9176                                             &start, &end, EXTENT_DIRTY);
9177                 if (ret)
9178                         break;
9179                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9180         }
9181
9182         start = 0;
9183         while (1) {
9184                 cache = btrfs_lookup_first_block_group(fs_info, start);
9185                 if (!cache)
9186                         break;
9187                 if (cache->cached)
9188                         cache->cached = 0;
9189                 start = cache->key.objectid + cache->key.offset;
9190         }
9191 }
9192
9193 static int check_extent_refs(struct btrfs_root *root,
9194                              struct cache_tree *extent_cache)
9195 {
9196         struct extent_record *rec;
9197         struct cache_extent *cache;
9198         int ret = 0;
9199         int had_dups = 0;
9200
9201         if (repair) {
9202                 /*
9203                  * if we're doing a repair, we have to make sure
9204                  * we don't allocate from the problem extents.
9205                  * In the worst case, this will be all the
9206                  * extents in the FS
9207                  */
9208                 cache = search_cache_extent(extent_cache, 0);
9209                 while(cache) {
9210                         rec = container_of(cache, struct extent_record, cache);
9211                         set_extent_dirty(root->fs_info->excluded_extents,
9212                                          rec->start,
9213                                          rec->start + rec->max_size - 1);
9214                         cache = next_cache_extent(cache);
9215                 }
9216
9217                 /* pin down all the corrupted blocks too */
9218                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9219                 while(cache) {
9220                         set_extent_dirty(root->fs_info->excluded_extents,
9221                                          cache->start,
9222                                          cache->start + cache->size - 1);
9223                         cache = next_cache_extent(cache);
9224                 }
9225                 prune_corrupt_blocks(root->fs_info);
9226                 reset_cached_block_groups(root->fs_info);
9227         }
9228
9229         reset_cached_block_groups(root->fs_info);
9230
9231         /*
9232          * We need to delete any duplicate entries we find first otherwise we
9233          * could mess up the extent tree when we have backrefs that actually
9234          * belong to a different extent item and not the weird duplicate one.
9235          */
9236         while (repair && !list_empty(&duplicate_extents)) {
9237                 rec = to_extent_record(duplicate_extents.next);
9238                 list_del_init(&rec->list);
9239
9240                 /* Sometimes we can find a backref before we find an actual
9241                  * extent, so we need to process it a little bit to see if there
9242                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9243                  * if this is a backref screwup.  If we need to delete stuff
9244                  * process_duplicates() will return 0, otherwise it will return
9245                  * 1 and we
9246                  */
9247                 if (process_duplicates(root, extent_cache, rec))
9248                         continue;
9249                 ret = delete_duplicate_records(root, rec);
9250                 if (ret < 0)
9251                         return ret;
9252                 /*
9253                  * delete_duplicate_records will return the number of entries
9254                  * deleted, so if it's greater than 0 then we know we actually
9255                  * did something and we need to remove.
9256                  */
9257                 if (ret)
9258                         had_dups = 1;
9259         }
9260
9261         if (had_dups)
9262                 return -EAGAIN;
9263
9264         while(1) {
9265                 int cur_err = 0;
9266                 int fix = 0;
9267
9268                 cache = search_cache_extent(extent_cache, 0);
9269                 if (!cache)
9270                         break;
9271                 rec = container_of(cache, struct extent_record, cache);
9272                 if (rec->num_duplicates) {
9273                         fprintf(stderr, "extent item %llu has multiple extent "
9274                                 "items\n", (unsigned long long)rec->start);
9275                         cur_err = 1;
9276                 }
9277
9278                 if (rec->refs != rec->extent_item_refs) {
9279                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9280                                 (unsigned long long)rec->start,
9281                                 (unsigned long long)rec->nr);
9282                         fprintf(stderr, "extent item %llu, found %llu\n",
9283                                 (unsigned long long)rec->extent_item_refs,
9284                                 (unsigned long long)rec->refs);
9285                         ret = record_orphan_data_extents(root->fs_info, rec);
9286                         if (ret < 0)
9287                                 goto repair_abort;
9288                         fix = ret;
9289                         cur_err = 1;
9290                 }
9291                 if (all_backpointers_checked(rec, 1)) {
9292                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9293                                 (unsigned long long)rec->start,
9294                                 (unsigned long long)rec->nr);
9295                         fix = 1;
9296                         cur_err = 1;
9297                 }
9298                 if (!rec->owner_ref_checked) {
9299                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9300                                 (unsigned long long)rec->start,
9301                                 (unsigned long long)rec->nr);
9302                         fix = 1;
9303                         cur_err = 1;
9304                 }
9305
9306                 if (repair && fix) {
9307                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9308                         if (ret)
9309                                 goto repair_abort;
9310                 }
9311
9312
9313                 if (rec->bad_full_backref) {
9314                         fprintf(stderr, "bad full backref, on [%llu]\n",
9315                                 (unsigned long long)rec->start);
9316                         if (repair) {
9317                                 ret = fixup_extent_flags(root->fs_info, rec);
9318                                 if (ret)
9319                                         goto repair_abort;
9320                                 fix = 1;
9321                         }
9322                         cur_err = 1;
9323                 }
9324                 /*
9325                  * Although it's not a extent ref's problem, we reuse this
9326                  * routine for error reporting.
9327                  * No repair function yet.
9328                  */
9329                 if (rec->crossing_stripes) {
9330                         fprintf(stderr,
9331                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9332                                 rec->start, rec->start + rec->max_size);
9333                         cur_err = 1;
9334                 }
9335
9336                 if (rec->wrong_chunk_type) {
9337                         fprintf(stderr,
9338                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9339                                 rec->start, rec->start + rec->max_size);
9340                         cur_err = 1;
9341                 }
9342
9343                 remove_cache_extent(extent_cache, cache);
9344                 free_all_extent_backrefs(rec);
9345                 if (!init_extent_tree && repair && (!cur_err || fix))
9346                         clear_extent_dirty(root->fs_info->excluded_extents,
9347                                            rec->start,
9348                                            rec->start + rec->max_size - 1);
9349                 free(rec);
9350         }
9351 repair_abort:
9352         if (repair) {
9353                 if (ret && ret != -EAGAIN) {
9354                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9355                         exit(1);
9356                 } else if (!ret) {
9357                         struct btrfs_trans_handle *trans;
9358
9359                         root = root->fs_info->extent_root;
9360                         trans = btrfs_start_transaction(root, 1);
9361                         if (IS_ERR(trans)) {
9362                                 ret = PTR_ERR(trans);
9363                                 goto repair_abort;
9364                         }
9365
9366                         btrfs_fix_block_accounting(trans, root);
9367                         ret = btrfs_commit_transaction(trans, root);
9368                         if (ret)
9369                                 goto repair_abort;
9370                 }
9371                 return ret;
9372         }
9373         return 0;
9374 }
9375
9376 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9377 {
9378         u64 stripe_size;
9379
9380         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9381                 stripe_size = length;
9382                 stripe_size /= num_stripes;
9383         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9384                 stripe_size = length * 2;
9385                 stripe_size /= num_stripes;
9386         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9387                 stripe_size = length;
9388                 stripe_size /= (num_stripes - 1);
9389         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9390                 stripe_size = length;
9391                 stripe_size /= (num_stripes - 2);
9392         } else {
9393                 stripe_size = length;
9394         }
9395         return stripe_size;
9396 }
9397
9398 /*
9399  * Check the chunk with its block group/dev list ref:
9400  * Return 0 if all refs seems valid.
9401  * Return 1 if part of refs seems valid, need later check for rebuild ref
9402  * like missing block group and needs to search extent tree to rebuild them.
9403  * Return -1 if essential refs are missing and unable to rebuild.
9404  */
9405 static int check_chunk_refs(struct chunk_record *chunk_rec,
9406                             struct block_group_tree *block_group_cache,
9407                             struct device_extent_tree *dev_extent_cache,
9408                             int silent)
9409 {
9410         struct cache_extent *block_group_item;
9411         struct block_group_record *block_group_rec;
9412         struct cache_extent *dev_extent_item;
9413         struct device_extent_record *dev_extent_rec;
9414         u64 devid;
9415         u64 offset;
9416         u64 length;
9417         int metadump_v2 = 0;
9418         int i;
9419         int ret = 0;
9420
9421         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9422                                                chunk_rec->offset,
9423                                                chunk_rec->length);
9424         if (block_group_item) {
9425                 block_group_rec = container_of(block_group_item,
9426                                                struct block_group_record,
9427                                                cache);
9428                 if (chunk_rec->length != block_group_rec->offset ||
9429                     chunk_rec->offset != block_group_rec->objectid ||
9430                     (!metadump_v2 &&
9431                      chunk_rec->type_flags != block_group_rec->flags)) {
9432                         if (!silent)
9433                                 fprintf(stderr,
9434                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9435                                         chunk_rec->objectid,
9436                                         chunk_rec->type,
9437                                         chunk_rec->offset,
9438                                         chunk_rec->length,
9439                                         chunk_rec->offset,
9440                                         chunk_rec->type_flags,
9441                                         block_group_rec->objectid,
9442                                         block_group_rec->type,
9443                                         block_group_rec->offset,
9444                                         block_group_rec->offset,
9445                                         block_group_rec->objectid,
9446                                         block_group_rec->flags);
9447                         ret = -1;
9448                 } else {
9449                         list_del_init(&block_group_rec->list);
9450                         chunk_rec->bg_rec = block_group_rec;
9451                 }
9452         } else {
9453                 if (!silent)
9454                         fprintf(stderr,
9455                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9456                                 chunk_rec->objectid,
9457                                 chunk_rec->type,
9458                                 chunk_rec->offset,
9459                                 chunk_rec->length,
9460                                 chunk_rec->offset,
9461                                 chunk_rec->type_flags);
9462                 ret = 1;
9463         }
9464
9465         if (metadump_v2)
9466                 return ret;
9467
9468         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9469                                     chunk_rec->num_stripes);
9470         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9471                 devid = chunk_rec->stripes[i].devid;
9472                 offset = chunk_rec->stripes[i].offset;
9473                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9474                                                        devid, offset, length);
9475                 if (dev_extent_item) {
9476                         dev_extent_rec = container_of(dev_extent_item,
9477                                                 struct device_extent_record,
9478                                                 cache);
9479                         if (dev_extent_rec->objectid != devid ||
9480                             dev_extent_rec->offset != offset ||
9481                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9482                             dev_extent_rec->length != length) {
9483                                 if (!silent)
9484                                         fprintf(stderr,
9485                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9486                                                 chunk_rec->objectid,
9487                                                 chunk_rec->type,
9488                                                 chunk_rec->offset,
9489                                                 chunk_rec->stripes[i].devid,
9490                                                 chunk_rec->stripes[i].offset,
9491                                                 dev_extent_rec->objectid,
9492                                                 dev_extent_rec->offset,
9493                                                 dev_extent_rec->length);
9494                                 ret = -1;
9495                         } else {
9496                                 list_move(&dev_extent_rec->chunk_list,
9497                                           &chunk_rec->dextents);
9498                         }
9499                 } else {
9500                         if (!silent)
9501                                 fprintf(stderr,
9502                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9503                                         chunk_rec->objectid,
9504                                         chunk_rec->type,
9505                                         chunk_rec->offset,
9506                                         chunk_rec->stripes[i].devid,
9507                                         chunk_rec->stripes[i].offset);
9508                         ret = -1;
9509                 }
9510         }
9511         return ret;
9512 }
9513
9514 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9515 int check_chunks(struct cache_tree *chunk_cache,
9516                  struct block_group_tree *block_group_cache,
9517                  struct device_extent_tree *dev_extent_cache,
9518                  struct list_head *good, struct list_head *bad,
9519                  struct list_head *rebuild, int silent)
9520 {
9521         struct cache_extent *chunk_item;
9522         struct chunk_record *chunk_rec;
9523         struct block_group_record *bg_rec;
9524         struct device_extent_record *dext_rec;
9525         int err;
9526         int ret = 0;
9527
9528         chunk_item = first_cache_extent(chunk_cache);
9529         while (chunk_item) {
9530                 chunk_rec = container_of(chunk_item, struct chunk_record,
9531                                          cache);
9532                 err = check_chunk_refs(chunk_rec, block_group_cache,
9533                                        dev_extent_cache, silent);
9534                 if (err < 0)
9535                         ret = err;
9536                 if (err == 0 && good)
9537                         list_add_tail(&chunk_rec->list, good);
9538                 if (err > 0 && rebuild)
9539                         list_add_tail(&chunk_rec->list, rebuild);
9540                 if (err < 0 && bad)
9541                         list_add_tail(&chunk_rec->list, bad);
9542                 chunk_item = next_cache_extent(chunk_item);
9543         }
9544
9545         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9546                 if (!silent)
9547                         fprintf(stderr,
9548                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9549                                 bg_rec->objectid,
9550                                 bg_rec->offset,
9551                                 bg_rec->flags);
9552                 if (!ret)
9553                         ret = 1;
9554         }
9555
9556         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9557                             chunk_list) {
9558                 if (!silent)
9559                         fprintf(stderr,
9560                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9561                                 dext_rec->objectid,
9562                                 dext_rec->offset,
9563                                 dext_rec->length);
9564                 if (!ret)
9565                         ret = 1;
9566         }
9567         return ret;
9568 }
9569
9570
9571 static int check_device_used(struct device_record *dev_rec,
9572                              struct device_extent_tree *dext_cache)
9573 {
9574         struct cache_extent *cache;
9575         struct device_extent_record *dev_extent_rec;
9576         u64 total_byte = 0;
9577
9578         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9579         while (cache) {
9580                 dev_extent_rec = container_of(cache,
9581                                               struct device_extent_record,
9582                                               cache);
9583                 if (dev_extent_rec->objectid != dev_rec->devid)
9584                         break;
9585
9586                 list_del_init(&dev_extent_rec->device_list);
9587                 total_byte += dev_extent_rec->length;
9588                 cache = next_cache_extent(cache);
9589         }
9590
9591         if (total_byte != dev_rec->byte_used) {
9592                 fprintf(stderr,
9593                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9594                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9595                         dev_rec->type, dev_rec->offset);
9596                 return -1;
9597         } else {
9598                 return 0;
9599         }
9600 }
9601
9602 /* check btrfs_dev_item -> btrfs_dev_extent */
9603 static int check_devices(struct rb_root *dev_cache,
9604                          struct device_extent_tree *dev_extent_cache)
9605 {
9606         struct rb_node *dev_node;
9607         struct device_record *dev_rec;
9608         struct device_extent_record *dext_rec;
9609         int err;
9610         int ret = 0;
9611
9612         dev_node = rb_first(dev_cache);
9613         while (dev_node) {
9614                 dev_rec = container_of(dev_node, struct device_record, node);
9615                 err = check_device_used(dev_rec, dev_extent_cache);
9616                 if (err)
9617                         ret = err;
9618
9619                 dev_node = rb_next(dev_node);
9620         }
9621         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9622                             device_list) {
9623                 fprintf(stderr,
9624                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9625                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9626                 if (!ret)
9627                         ret = 1;
9628         }
9629         return ret;
9630 }
9631
9632 static int add_root_item_to_list(struct list_head *head,
9633                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9634                                   u8 level, u8 drop_level,
9635                                   int level_size, struct btrfs_key *drop_key)
9636 {
9637
9638         struct root_item_record *ri_rec;
9639         ri_rec = malloc(sizeof(*ri_rec));
9640         if (!ri_rec)
9641                 return -ENOMEM;
9642         ri_rec->bytenr = bytenr;
9643         ri_rec->objectid = objectid;
9644         ri_rec->level = level;
9645         ri_rec->level_size = level_size;
9646         ri_rec->drop_level = drop_level;
9647         ri_rec->last_snapshot = last_snapshot;
9648         if (drop_key)
9649                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9650         list_add_tail(&ri_rec->list, head);
9651
9652         return 0;
9653 }
9654
9655 static void free_root_item_list(struct list_head *list)
9656 {
9657         struct root_item_record *ri_rec;
9658
9659         while (!list_empty(list)) {
9660                 ri_rec = list_first_entry(list, struct root_item_record,
9661                                           list);
9662                 list_del_init(&ri_rec->list);
9663                 free(ri_rec);
9664         }
9665 }
9666
9667 static int deal_root_from_list(struct list_head *list,
9668                                struct btrfs_root *root,
9669                                struct block_info *bits,
9670                                int bits_nr,
9671                                struct cache_tree *pending,
9672                                struct cache_tree *seen,
9673                                struct cache_tree *reada,
9674                                struct cache_tree *nodes,
9675                                struct cache_tree *extent_cache,
9676                                struct cache_tree *chunk_cache,
9677                                struct rb_root *dev_cache,
9678                                struct block_group_tree *block_group_cache,
9679                                struct device_extent_tree *dev_extent_cache)
9680 {
9681         int ret = 0;
9682         u64 last;
9683
9684         while (!list_empty(list)) {
9685                 struct root_item_record *rec;
9686                 struct extent_buffer *buf;
9687                 rec = list_entry(list->next,
9688                                  struct root_item_record, list);
9689                 last = 0;
9690                 buf = read_tree_block(root->fs_info->tree_root,
9691                                       rec->bytenr, rec->level_size, 0);
9692                 if (!extent_buffer_uptodate(buf)) {
9693                         free_extent_buffer(buf);
9694                         ret = -EIO;
9695                         break;
9696                 }
9697                 ret = add_root_to_pending(buf, extent_cache, pending,
9698                                     seen, nodes, rec->objectid);
9699                 if (ret < 0)
9700                         break;
9701                 /*
9702                  * To rebuild extent tree, we need deal with snapshot
9703                  * one by one, otherwise we deal with node firstly which
9704                  * can maximize readahead.
9705                  */
9706                 while (1) {
9707                         ret = run_next_block(root, bits, bits_nr, &last,
9708                                              pending, seen, reada, nodes,
9709                                              extent_cache, chunk_cache,
9710                                              dev_cache, block_group_cache,
9711                                              dev_extent_cache, rec);
9712                         if (ret != 0)
9713                                 break;
9714                 }
9715                 free_extent_buffer(buf);
9716                 list_del(&rec->list);
9717                 free(rec);
9718                 if (ret < 0)
9719                         break;
9720         }
9721         while (ret >= 0) {
9722                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9723                                      reada, nodes, extent_cache, chunk_cache,
9724                                      dev_cache, block_group_cache,
9725                                      dev_extent_cache, NULL);
9726                 if (ret != 0) {
9727                         if (ret > 0)
9728                                 ret = 0;
9729                         break;
9730                 }
9731         }
9732         return ret;
9733 }
9734
9735 static int check_chunks_and_extents(struct btrfs_root *root)
9736 {
9737         struct rb_root dev_cache;
9738         struct cache_tree chunk_cache;
9739         struct block_group_tree block_group_cache;
9740         struct device_extent_tree dev_extent_cache;
9741         struct cache_tree extent_cache;
9742         struct cache_tree seen;
9743         struct cache_tree pending;
9744         struct cache_tree reada;
9745         struct cache_tree nodes;
9746         struct extent_io_tree excluded_extents;
9747         struct cache_tree corrupt_blocks;
9748         struct btrfs_path path;
9749         struct btrfs_key key;
9750         struct btrfs_key found_key;
9751         int ret, err = 0;
9752         struct block_info *bits;
9753         int bits_nr;
9754         struct extent_buffer *leaf;
9755         int slot;
9756         struct btrfs_root_item ri;
9757         struct list_head dropping_trees;
9758         struct list_head normal_trees;
9759         struct btrfs_root *root1;
9760         u64 objectid;
9761         u32 level_size;
9762         u8 level;
9763
9764         dev_cache = RB_ROOT;
9765         cache_tree_init(&chunk_cache);
9766         block_group_tree_init(&block_group_cache);
9767         device_extent_tree_init(&dev_extent_cache);
9768
9769         cache_tree_init(&extent_cache);
9770         cache_tree_init(&seen);
9771         cache_tree_init(&pending);
9772         cache_tree_init(&nodes);
9773         cache_tree_init(&reada);
9774         cache_tree_init(&corrupt_blocks);
9775         extent_io_tree_init(&excluded_extents);
9776         INIT_LIST_HEAD(&dropping_trees);
9777         INIT_LIST_HEAD(&normal_trees);
9778
9779         if (repair) {
9780                 root->fs_info->excluded_extents = &excluded_extents;
9781                 root->fs_info->fsck_extent_cache = &extent_cache;
9782                 root->fs_info->free_extent_hook = free_extent_hook;
9783                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9784         }
9785
9786         bits_nr = 1024;
9787         bits = malloc(bits_nr * sizeof(struct block_info));
9788         if (!bits) {
9789                 perror("malloc");
9790                 exit(1);
9791         }
9792
9793         if (ctx.progress_enabled) {
9794                 ctx.tp = TASK_EXTENTS;
9795                 task_start(ctx.info);
9796         }
9797
9798 again:
9799         root1 = root->fs_info->tree_root;
9800         level = btrfs_header_level(root1->node);
9801         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9802                                     root1->node->start, 0, level, 0,
9803                                     root1->nodesize, NULL);
9804         if (ret < 0)
9805                 goto out;
9806         root1 = root->fs_info->chunk_root;
9807         level = btrfs_header_level(root1->node);
9808         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9809                                     root1->node->start, 0, level, 0,
9810                                     root1->nodesize, NULL);
9811         if (ret < 0)
9812                 goto out;
9813         btrfs_init_path(&path);
9814         key.offset = 0;
9815         key.objectid = 0;
9816         key.type = BTRFS_ROOT_ITEM_KEY;
9817         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9818                                         &key, &path, 0, 0);
9819         if (ret < 0)
9820                 goto out;
9821         while(1) {
9822                 leaf = path.nodes[0];
9823                 slot = path.slots[0];
9824                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9825                         ret = btrfs_next_leaf(root, &path);
9826                         if (ret != 0)
9827                                 break;
9828                         leaf = path.nodes[0];
9829                         slot = path.slots[0];
9830                 }
9831                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9832                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9833                         unsigned long offset;
9834                         u64 last_snapshot;
9835
9836                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9837                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9838                         last_snapshot = btrfs_root_last_snapshot(&ri);
9839                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9840                                 level = btrfs_root_level(&ri);
9841                                 level_size = root->nodesize;
9842                                 ret = add_root_item_to_list(&normal_trees,
9843                                                 found_key.objectid,
9844                                                 btrfs_root_bytenr(&ri),
9845                                                 last_snapshot, level,
9846                                                 0, level_size, NULL);
9847                                 if (ret < 0)
9848                                         goto out;
9849                         } else {
9850                                 level = btrfs_root_level(&ri);
9851                                 level_size = root->nodesize;
9852                                 objectid = found_key.objectid;
9853                                 btrfs_disk_key_to_cpu(&found_key,
9854                                                       &ri.drop_progress);
9855                                 ret = add_root_item_to_list(&dropping_trees,
9856                                                 objectid,
9857                                                 btrfs_root_bytenr(&ri),
9858                                                 last_snapshot, level,
9859                                                 ri.drop_level,
9860                                                 level_size, &found_key);
9861                                 if (ret < 0)
9862                                         goto out;
9863                         }
9864                 }
9865                 path.slots[0]++;
9866         }
9867         btrfs_release_path(&path);
9868
9869         /*
9870          * check_block can return -EAGAIN if it fixes something, please keep
9871          * this in mind when dealing with return values from these functions, if
9872          * we get -EAGAIN we want to fall through and restart the loop.
9873          */
9874         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9875                                   &seen, &reada, &nodes, &extent_cache,
9876                                   &chunk_cache, &dev_cache, &block_group_cache,
9877                                   &dev_extent_cache);
9878         if (ret < 0) {
9879                 if (ret == -EAGAIN)
9880                         goto loop;
9881                 goto out;
9882         }
9883         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9884                                   &pending, &seen, &reada, &nodes,
9885                                   &extent_cache, &chunk_cache, &dev_cache,
9886                                   &block_group_cache, &dev_extent_cache);
9887         if (ret < 0) {
9888                 if (ret == -EAGAIN)
9889                         goto loop;
9890                 goto out;
9891         }
9892
9893         ret = check_chunks(&chunk_cache, &block_group_cache,
9894                            &dev_extent_cache, NULL, NULL, NULL, 0);
9895         if (ret) {
9896                 if (ret == -EAGAIN)
9897                         goto loop;
9898                 err = ret;
9899         }
9900
9901         ret = check_extent_refs(root, &extent_cache);
9902         if (ret < 0) {
9903                 if (ret == -EAGAIN)
9904                         goto loop;
9905                 goto out;
9906         }
9907
9908         ret = check_devices(&dev_cache, &dev_extent_cache);
9909         if (ret && err)
9910                 ret = err;
9911
9912 out:
9913         task_stop(ctx.info);
9914         if (repair) {
9915                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9916                 extent_io_tree_cleanup(&excluded_extents);
9917                 root->fs_info->fsck_extent_cache = NULL;
9918                 root->fs_info->free_extent_hook = NULL;
9919                 root->fs_info->corrupt_blocks = NULL;
9920                 root->fs_info->excluded_extents = NULL;
9921         }
9922         free(bits);
9923         free_chunk_cache_tree(&chunk_cache);
9924         free_device_cache_tree(&dev_cache);
9925         free_block_group_tree(&block_group_cache);
9926         free_device_extent_tree(&dev_extent_cache);
9927         free_extent_cache_tree(&seen);
9928         free_extent_cache_tree(&pending);
9929         free_extent_cache_tree(&reada);
9930         free_extent_cache_tree(&nodes);
9931         return ret;
9932 loop:
9933         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9934         free_extent_cache_tree(&seen);
9935         free_extent_cache_tree(&pending);
9936         free_extent_cache_tree(&reada);
9937         free_extent_cache_tree(&nodes);
9938         free_chunk_cache_tree(&chunk_cache);
9939         free_block_group_tree(&block_group_cache);
9940         free_device_cache_tree(&dev_cache);
9941         free_device_extent_tree(&dev_extent_cache);
9942         free_extent_record_cache(&extent_cache);
9943         free_root_item_list(&normal_trees);
9944         free_root_item_list(&dropping_trees);
9945         extent_io_tree_cleanup(&excluded_extents);
9946         goto again;
9947 }
9948
9949 /*
9950  * Check backrefs of a tree block given by @bytenr or @eb.
9951  *
9952  * @root:       the root containing the @bytenr or @eb
9953  * @eb:         tree block extent buffer, can be NULL
9954  * @bytenr:     bytenr of the tree block to search
9955  * @level:      tree level of the tree block
9956  * @owner:      owner of the tree block
9957  *
9958  * Return >0 for any error found and output error message
9959  * Return 0 for no error found
9960  */
9961 static int check_tree_block_ref(struct btrfs_root *root,
9962                                 struct extent_buffer *eb, u64 bytenr,
9963                                 int level, u64 owner)
9964 {
9965         struct btrfs_key key;
9966         struct btrfs_root *extent_root = root->fs_info->extent_root;
9967         struct btrfs_path path;
9968         struct btrfs_extent_item *ei;
9969         struct btrfs_extent_inline_ref *iref;
9970         struct extent_buffer *leaf;
9971         unsigned long end;
9972         unsigned long ptr;
9973         int slot;
9974         int skinny_level;
9975         int type;
9976         u32 nodesize = root->nodesize;
9977         u32 item_size;
9978         u64 offset;
9979         int tree_reloc_root = 0;
9980         int found_ref = 0;
9981         int err = 0;
9982         int ret;
9983
9984         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
9985             btrfs_header_bytenr(root->node) == bytenr)
9986                 tree_reloc_root = 1;
9987
9988         btrfs_init_path(&path);
9989         key.objectid = bytenr;
9990         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
9991                 key.type = BTRFS_METADATA_ITEM_KEY;
9992         else
9993                 key.type = BTRFS_EXTENT_ITEM_KEY;
9994         key.offset = (u64)-1;
9995
9996         /* Search for the backref in extent tree */
9997         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9998         if (ret < 0) {
9999                 err |= BACKREF_MISSING;
10000                 goto out;
10001         }
10002         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10003         if (ret) {
10004                 err |= BACKREF_MISSING;
10005                 goto out;
10006         }
10007
10008         leaf = path.nodes[0];
10009         slot = path.slots[0];
10010         btrfs_item_key_to_cpu(leaf, &key, slot);
10011
10012         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10013
10014         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10015                 skinny_level = (int)key.offset;
10016                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10017         } else {
10018                 struct btrfs_tree_block_info *info;
10019
10020                 info = (struct btrfs_tree_block_info *)(ei + 1);
10021                 skinny_level = btrfs_tree_block_level(leaf, info);
10022                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10023         }
10024
10025         if (eb) {
10026                 u64 header_gen;
10027                 u64 extent_gen;
10028
10029                 if (!(btrfs_extent_flags(leaf, ei) &
10030                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10031                         error(
10032                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10033                                 key.objectid, nodesize,
10034                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10035                         err = BACKREF_MISMATCH;
10036                 }
10037                 header_gen = btrfs_header_generation(eb);
10038                 extent_gen = btrfs_extent_generation(leaf, ei);
10039                 if (header_gen != extent_gen) {
10040                         error(
10041         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10042                                 key.objectid, nodesize, header_gen,
10043                                 extent_gen);
10044                         err = BACKREF_MISMATCH;
10045                 }
10046                 if (level != skinny_level) {
10047                         error(
10048                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10049                                 key.objectid, nodesize, level, skinny_level);
10050                         err = BACKREF_MISMATCH;
10051                 }
10052                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10053                         error(
10054                         "extent[%llu %u] is referred by other roots than %llu",
10055                                 key.objectid, nodesize, root->objectid);
10056                         err = BACKREF_MISMATCH;
10057                 }
10058         }
10059
10060         /*
10061          * Iterate the extent/metadata item to find the exact backref
10062          */
10063         item_size = btrfs_item_size_nr(leaf, slot);
10064         ptr = (unsigned long)iref;
10065         end = (unsigned long)ei + item_size;
10066         while (ptr < end) {
10067                 iref = (struct btrfs_extent_inline_ref *)ptr;
10068                 type = btrfs_extent_inline_ref_type(leaf, iref);
10069                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10070
10071                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10072                         (offset == root->objectid || offset == owner)) {
10073                         found_ref = 1;
10074                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10075                         /*
10076                          * Backref of tree reloc root points to itself, no need
10077                          * to check backref any more.
10078                          */
10079                         if (tree_reloc_root)
10080                                 found_ref = 1;
10081                         else
10082                         /* Check if the backref points to valid referencer */
10083                                 found_ref = !check_tree_block_ref(root, NULL,
10084                                                 offset, level + 1, owner);
10085                 }
10086
10087                 if (found_ref)
10088                         break;
10089                 ptr += btrfs_extent_inline_ref_size(type);
10090         }
10091
10092         /*
10093          * Inlined extent item doesn't have what we need, check
10094          * TREE_BLOCK_REF_KEY
10095          */
10096         if (!found_ref) {
10097                 btrfs_release_path(&path);
10098                 key.objectid = bytenr;
10099                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10100                 key.offset = root->objectid;
10101
10102                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10103                 if (!ret)
10104                         found_ref = 1;
10105         }
10106         if (!found_ref)
10107                 err |= BACKREF_MISSING;
10108 out:
10109         btrfs_release_path(&path);
10110         if (eb && (err & BACKREF_MISSING))
10111                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10112                         bytenr, nodesize, owner, level);
10113         return err;
10114 }
10115
10116 /*
10117  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10118  *
10119  * Return >0 any error found and output error message
10120  * Return 0 for no error found
10121  */
10122 static int check_extent_data_item(struct btrfs_root *root,
10123                                   struct extent_buffer *eb, int slot)
10124 {
10125         struct btrfs_file_extent_item *fi;
10126         struct btrfs_path path;
10127         struct btrfs_root *extent_root = root->fs_info->extent_root;
10128         struct btrfs_key fi_key;
10129         struct btrfs_key dbref_key;
10130         struct extent_buffer *leaf;
10131         struct btrfs_extent_item *ei;
10132         struct btrfs_extent_inline_ref *iref;
10133         struct btrfs_extent_data_ref *dref;
10134         u64 owner;
10135         u64 disk_bytenr;
10136         u64 disk_num_bytes;
10137         u64 extent_num_bytes;
10138         u64 extent_flags;
10139         u32 item_size;
10140         unsigned long end;
10141         unsigned long ptr;
10142         int type;
10143         u64 ref_root;
10144         int found_dbackref = 0;
10145         int err = 0;
10146         int ret;
10147
10148         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10149         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10150
10151         /* Nothing to check for hole and inline data extents */
10152         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10153             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10154                 return 0;
10155
10156         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10157         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10158         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10159
10160         /* Check unaligned disk_num_bytes and num_bytes */
10161         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10162                 error(
10163 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10164                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10165                         root->sectorsize);
10166                 err |= BYTES_UNALIGNED;
10167         } else {
10168                 data_bytes_allocated += disk_num_bytes;
10169         }
10170         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10171                 error(
10172 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10173                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10174                         root->sectorsize);
10175                 err |= BYTES_UNALIGNED;
10176         } else {
10177                 data_bytes_referenced += extent_num_bytes;
10178         }
10179         owner = btrfs_header_owner(eb);
10180
10181         /* Check the extent item of the file extent in extent tree */
10182         btrfs_init_path(&path);
10183         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10184         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10185         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10186
10187         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10188         if (ret) {
10189                 err |= BACKREF_MISSING;
10190                 goto error;
10191         }
10192
10193         leaf = path.nodes[0];
10194         slot = path.slots[0];
10195         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10196
10197         extent_flags = btrfs_extent_flags(leaf, ei);
10198
10199         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10200                 error(
10201                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10202                     disk_bytenr, disk_num_bytes,
10203                     BTRFS_EXTENT_FLAG_DATA);
10204                 err |= BACKREF_MISMATCH;
10205         }
10206
10207         /* Check data backref inside that extent item */
10208         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10209         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10210         ptr = (unsigned long)iref;
10211         end = (unsigned long)ei + item_size;
10212         while (ptr < end) {
10213                 iref = (struct btrfs_extent_inline_ref *)ptr;
10214                 type = btrfs_extent_inline_ref_type(leaf, iref);
10215                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10216
10217                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10218                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10219                         if (ref_root == owner || ref_root == root->objectid)
10220                                 found_dbackref = 1;
10221                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10222                         found_dbackref = !check_tree_block_ref(root, NULL,
10223                                 btrfs_extent_inline_ref_offset(leaf, iref),
10224                                 0, owner);
10225                 }
10226
10227                 if (found_dbackref)
10228                         break;
10229                 ptr += btrfs_extent_inline_ref_size(type);
10230         }
10231
10232         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
10233         if (!found_dbackref) {
10234                 btrfs_release_path(&path);
10235
10236                 btrfs_init_path(&path);
10237                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10238                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10239                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10240                                 fi_key.objectid, fi_key.offset);
10241
10242                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10243                                         &dbref_key, &path, 0, 0);
10244                 if (!ret)
10245                         found_dbackref = 1;
10246         }
10247
10248         if (!found_dbackref)
10249                 err |= BACKREF_MISSING;
10250 error:
10251         btrfs_release_path(&path);
10252         if (err & BACKREF_MISSING) {
10253                 error("data extent[%llu %llu] backref lost",
10254                       disk_bytenr, disk_num_bytes);
10255         }
10256         return err;
10257 }
10258
10259 /*
10260  * Get real tree block level for the case like shared block
10261  * Return >= 0 as tree level
10262  * Return <0 for error
10263  */
10264 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10265 {
10266         struct extent_buffer *eb;
10267         struct btrfs_path path;
10268         struct btrfs_key key;
10269         struct btrfs_extent_item *ei;
10270         u64 flags;
10271         u64 transid;
10272         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10273         u8 backref_level;
10274         u8 header_level;
10275         int ret;
10276
10277         /* Search extent tree for extent generation and level */
10278         key.objectid = bytenr;
10279         key.type = BTRFS_METADATA_ITEM_KEY;
10280         key.offset = (u64)-1;
10281
10282         btrfs_init_path(&path);
10283         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10284         if (ret < 0)
10285                 goto release_out;
10286         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10287         if (ret < 0)
10288                 goto release_out;
10289         if (ret > 0) {
10290                 ret = -ENOENT;
10291                 goto release_out;
10292         }
10293
10294         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10295         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10296                             struct btrfs_extent_item);
10297         flags = btrfs_extent_flags(path.nodes[0], ei);
10298         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10299                 ret = -ENOENT;
10300                 goto release_out;
10301         }
10302
10303         /* Get transid for later read_tree_block() check */
10304         transid = btrfs_extent_generation(path.nodes[0], ei);
10305
10306         /* Get backref level as one source */
10307         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10308                 backref_level = key.offset;
10309         } else {
10310                 struct btrfs_tree_block_info *info;
10311
10312                 info = (struct btrfs_tree_block_info *)(ei + 1);
10313                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10314         }
10315         btrfs_release_path(&path);
10316
10317         /* Get level from tree block as an alternative source */
10318         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10319         if (!extent_buffer_uptodate(eb)) {
10320                 free_extent_buffer(eb);
10321                 return -EIO;
10322         }
10323         header_level = btrfs_header_level(eb);
10324         free_extent_buffer(eb);
10325
10326         if (header_level != backref_level)
10327                 return -EIO;
10328         return header_level;
10329
10330 release_out:
10331         btrfs_release_path(&path);
10332         return ret;
10333 }
10334
10335 /*
10336  * Check if a tree block backref is valid (points to a valid tree block)
10337  * if level == -1, level will be resolved
10338  * Return >0 for any error found and print error message
10339  */
10340 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10341                                     u64 bytenr, int level)
10342 {
10343         struct btrfs_root *root;
10344         struct btrfs_key key;
10345         struct btrfs_path path;
10346         struct extent_buffer *eb;
10347         struct extent_buffer *node;
10348         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10349         int err = 0;
10350         int ret;
10351
10352         /* Query level for level == -1 special case */
10353         if (level == -1)
10354                 level = query_tree_block_level(fs_info, bytenr);
10355         if (level < 0) {
10356                 err |= REFERENCER_MISSING;
10357                 goto out;
10358         }
10359
10360         key.objectid = root_id;
10361         key.type = BTRFS_ROOT_ITEM_KEY;
10362         key.offset = (u64)-1;
10363
10364         root = btrfs_read_fs_root(fs_info, &key);
10365         if (IS_ERR(root)) {
10366                 err |= REFERENCER_MISSING;
10367                 goto out;
10368         }
10369
10370         /* Read out the tree block to get item/node key */
10371         eb = read_tree_block(root, bytenr, root->nodesize, 0);
10372         if (!extent_buffer_uptodate(eb)) {
10373                 err |= REFERENCER_MISSING;
10374                 free_extent_buffer(eb);
10375                 goto out;
10376         }
10377
10378         /* Empty tree, no need to check key */
10379         if (!btrfs_header_nritems(eb) && !level) {
10380                 free_extent_buffer(eb);
10381                 goto out;
10382         }
10383
10384         if (level)
10385                 btrfs_node_key_to_cpu(eb, &key, 0);
10386         else
10387                 btrfs_item_key_to_cpu(eb, &key, 0);
10388
10389         free_extent_buffer(eb);
10390
10391         btrfs_init_path(&path);
10392         path.lowest_level = level;
10393         /* Search with the first key, to ensure we can reach it */
10394         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10395         if (ret < 0) {
10396                 err |= REFERENCER_MISSING;
10397                 goto release_out;
10398         }
10399
10400         node = path.nodes[level];
10401         if (btrfs_header_bytenr(node) != bytenr) {
10402                 error(
10403         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10404                         bytenr, nodesize, bytenr,
10405                         btrfs_header_bytenr(node));
10406                 err |= REFERENCER_MISMATCH;
10407         }
10408         if (btrfs_header_level(node) != level) {
10409                 error(
10410         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10411                         bytenr, nodesize, level,
10412                         btrfs_header_level(node));
10413                 err |= REFERENCER_MISMATCH;
10414         }
10415
10416 release_out:
10417         btrfs_release_path(&path);
10418 out:
10419         if (err & REFERENCER_MISSING) {
10420                 if (level < 0)
10421                         error("extent [%llu %d] lost referencer (owner: %llu)",
10422                                 bytenr, nodesize, root_id);
10423                 else
10424                         error(
10425                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10426                                 bytenr, nodesize, root_id, level);
10427         }
10428
10429         return err;
10430 }
10431
10432 /*
10433  * Check if tree block @eb is tree reloc root.
10434  * Return 0 if it's not or any problem happens
10435  * Return 1 if it's a tree reloc root
10436  */
10437 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10438                                  struct extent_buffer *eb)
10439 {
10440         struct btrfs_root *tree_reloc_root;
10441         struct btrfs_key key;
10442         u64 bytenr = btrfs_header_bytenr(eb);
10443         u64 owner = btrfs_header_owner(eb);
10444         int ret = 0;
10445
10446         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10447         key.offset = owner;
10448         key.type = BTRFS_ROOT_ITEM_KEY;
10449
10450         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10451         if (IS_ERR(tree_reloc_root))
10452                 return 0;
10453
10454         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10455                 ret = 1;
10456         btrfs_free_fs_root(tree_reloc_root);
10457         return ret;
10458 }
10459
10460 /*
10461  * Check referencer for shared block backref
10462  * If level == -1, this function will resolve the level.
10463  */
10464 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10465                                      u64 parent, u64 bytenr, int level)
10466 {
10467         struct extent_buffer *eb;
10468         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10469         u32 nr;
10470         int found_parent = 0;
10471         int i;
10472
10473         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10474         if (!extent_buffer_uptodate(eb))
10475                 goto out;
10476
10477         if (level == -1)
10478                 level = query_tree_block_level(fs_info, bytenr);
10479         if (level < 0)
10480                 goto out;
10481
10482         /* It's possible it's a tree reloc root */
10483         if (parent == bytenr) {
10484                 if (is_tree_reloc_root(fs_info, eb))
10485                         found_parent = 1;
10486                 goto out;
10487         }
10488
10489         if (level + 1 != btrfs_header_level(eb))
10490                 goto out;
10491
10492         nr = btrfs_header_nritems(eb);
10493         for (i = 0; i < nr; i++) {
10494                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10495                         found_parent = 1;
10496                         break;
10497                 }
10498         }
10499 out:
10500         free_extent_buffer(eb);
10501         if (!found_parent) {
10502                 error(
10503         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10504                         bytenr, nodesize, parent, level);
10505                 return REFERENCER_MISSING;
10506         }
10507         return 0;
10508 }
10509
10510 /*
10511  * Check referencer for normal (inlined) data ref
10512  * If len == 0, it will be resolved by searching in extent tree
10513  */
10514 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10515                                      u64 root_id, u64 objectid, u64 offset,
10516                                      u64 bytenr, u64 len, u32 count)
10517 {
10518         struct btrfs_root *root;
10519         struct btrfs_root *extent_root = fs_info->extent_root;
10520         struct btrfs_key key;
10521         struct btrfs_path path;
10522         struct extent_buffer *leaf;
10523         struct btrfs_file_extent_item *fi;
10524         u32 found_count = 0;
10525         int slot;
10526         int ret = 0;
10527
10528         if (!len) {
10529                 key.objectid = bytenr;
10530                 key.type = BTRFS_EXTENT_ITEM_KEY;
10531                 key.offset = (u64)-1;
10532
10533                 btrfs_init_path(&path);
10534                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10535                 if (ret < 0)
10536                         goto out;
10537                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10538                 if (ret)
10539                         goto out;
10540                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10541                 if (key.objectid != bytenr ||
10542                     key.type != BTRFS_EXTENT_ITEM_KEY)
10543                         goto out;
10544                 len = key.offset;
10545                 btrfs_release_path(&path);
10546         }
10547         key.objectid = root_id;
10548         key.type = BTRFS_ROOT_ITEM_KEY;
10549         key.offset = (u64)-1;
10550         btrfs_init_path(&path);
10551
10552         root = btrfs_read_fs_root(fs_info, &key);
10553         if (IS_ERR(root))
10554                 goto out;
10555
10556         key.objectid = objectid;
10557         key.type = BTRFS_EXTENT_DATA_KEY;
10558         /*
10559          * It can be nasty as data backref offset is
10560          * file offset - file extent offset, which is smaller or
10561          * equal to original backref offset.  The only special case is
10562          * overflow.  So we need to special check and do further search.
10563          */
10564         key.offset = offset & (1ULL << 63) ? 0 : offset;
10565
10566         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10567         if (ret < 0)
10568                 goto out;
10569
10570         /*
10571          * Search afterwards to get correct one
10572          * NOTE: As we must do a comprehensive check on the data backref to
10573          * make sure the dref count also matches, we must iterate all file
10574          * extents for that inode.
10575          */
10576         while (1) {
10577                 leaf = path.nodes[0];
10578                 slot = path.slots[0];
10579
10580                 btrfs_item_key_to_cpu(leaf, &key, slot);
10581                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10582                         break;
10583                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10584                 /*
10585                  * Except normal disk bytenr and disk num bytes, we still
10586                  * need to do extra check on dbackref offset as
10587                  * dbackref offset = file_offset - file_extent_offset
10588                  */
10589                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10590                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10591                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10592                     offset)
10593                         found_count++;
10594
10595                 ret = btrfs_next_item(root, &path);
10596                 if (ret)
10597                         break;
10598         }
10599 out:
10600         btrfs_release_path(&path);
10601         if (found_count != count) {
10602                 error(
10603 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10604                         bytenr, len, root_id, objectid, offset, count, found_count);
10605                 return REFERENCER_MISSING;
10606         }
10607         return 0;
10608 }
10609
10610 /*
10611  * Check if the referencer of a shared data backref exists
10612  */
10613 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10614                                      u64 parent, u64 bytenr)
10615 {
10616         struct extent_buffer *eb;
10617         struct btrfs_key key;
10618         struct btrfs_file_extent_item *fi;
10619         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10620         u32 nr;
10621         int found_parent = 0;
10622         int i;
10623
10624         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10625         if (!extent_buffer_uptodate(eb))
10626                 goto out;
10627
10628         nr = btrfs_header_nritems(eb);
10629         for (i = 0; i < nr; i++) {
10630                 btrfs_item_key_to_cpu(eb, &key, i);
10631                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10632                         continue;
10633
10634                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10635                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10636                         continue;
10637
10638                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10639                         found_parent = 1;
10640                         break;
10641                 }
10642         }
10643
10644 out:
10645         free_extent_buffer(eb);
10646         if (!found_parent) {
10647                 error("shared extent %llu referencer lost (parent: %llu)",
10648                         bytenr, parent);
10649                 return REFERENCER_MISSING;
10650         }
10651         return 0;
10652 }
10653
10654 /*
10655  * This function will check a given extent item, including its backref and
10656  * itself (like crossing stripe boundary and type)
10657  *
10658  * Since we don't use extent_record anymore, introduce new error bit
10659  */
10660 static int check_extent_item(struct btrfs_fs_info *fs_info,
10661                              struct extent_buffer *eb, int slot)
10662 {
10663         struct btrfs_extent_item *ei;
10664         struct btrfs_extent_inline_ref *iref;
10665         struct btrfs_extent_data_ref *dref;
10666         unsigned long end;
10667         unsigned long ptr;
10668         int type;
10669         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10670         u32 item_size = btrfs_item_size_nr(eb, slot);
10671         u64 flags;
10672         u64 offset;
10673         int metadata = 0;
10674         int level;
10675         struct btrfs_key key;
10676         int ret;
10677         int err = 0;
10678
10679         btrfs_item_key_to_cpu(eb, &key, slot);
10680         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10681                 bytes_used += key.offset;
10682         else
10683                 bytes_used += nodesize;
10684
10685         if (item_size < sizeof(*ei)) {
10686                 /*
10687                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10688                  * old thing when on disk format is still un-determined.
10689                  * No need to care about it anymore
10690                  */
10691                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10692                 return -ENOTTY;
10693         }
10694
10695         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10696         flags = btrfs_extent_flags(eb, ei);
10697
10698         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10699                 metadata = 1;
10700         if (metadata && check_crossing_stripes(global_info, key.objectid,
10701                                                eb->len)) {
10702                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10703                       key.objectid, key.objectid + nodesize);
10704                 err |= CROSSING_STRIPE_BOUNDARY;
10705         }
10706
10707         ptr = (unsigned long)(ei + 1);
10708
10709         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10710                 /* Old EXTENT_ITEM metadata */
10711                 struct btrfs_tree_block_info *info;
10712
10713                 info = (struct btrfs_tree_block_info *)ptr;
10714                 level = btrfs_tree_block_level(eb, info);
10715                 ptr += sizeof(struct btrfs_tree_block_info);
10716         } else {
10717                 /* New METADATA_ITEM */
10718                 level = key.offset;
10719         }
10720         end = (unsigned long)ei + item_size;
10721
10722         if (ptr >= end) {
10723                 err |= ITEM_SIZE_MISMATCH;
10724                 goto out;
10725         }
10726
10727         /* Now check every backref in this extent item */
10728 next:
10729         iref = (struct btrfs_extent_inline_ref *)ptr;
10730         type = btrfs_extent_inline_ref_type(eb, iref);
10731         offset = btrfs_extent_inline_ref_offset(eb, iref);
10732         switch (type) {
10733         case BTRFS_TREE_BLOCK_REF_KEY:
10734                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10735                                                level);
10736                 err |= ret;
10737                 break;
10738         case BTRFS_SHARED_BLOCK_REF_KEY:
10739                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10740                                                  level);
10741                 err |= ret;
10742                 break;
10743         case BTRFS_EXTENT_DATA_REF_KEY:
10744                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10745                 ret = check_extent_data_backref(fs_info,
10746                                 btrfs_extent_data_ref_root(eb, dref),
10747                                 btrfs_extent_data_ref_objectid(eb, dref),
10748                                 btrfs_extent_data_ref_offset(eb, dref),
10749                                 key.objectid, key.offset,
10750                                 btrfs_extent_data_ref_count(eb, dref));
10751                 err |= ret;
10752                 break;
10753         case BTRFS_SHARED_DATA_REF_KEY:
10754                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10755                 err |= ret;
10756                 break;
10757         default:
10758                 error("extent[%llu %d %llu] has unknown ref type: %d",
10759                         key.objectid, key.type, key.offset, type);
10760                 err |= UNKNOWN_TYPE;
10761                 goto out;
10762         }
10763
10764         ptr += btrfs_extent_inline_ref_size(type);
10765         if (ptr < end)
10766                 goto next;
10767
10768 out:
10769         return err;
10770 }
10771
10772 /*
10773  * Check if a dev extent item is referred correctly by its chunk
10774  */
10775 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10776                                  struct extent_buffer *eb, int slot)
10777 {
10778         struct btrfs_root *chunk_root = fs_info->chunk_root;
10779         struct btrfs_dev_extent *ptr;
10780         struct btrfs_path path;
10781         struct btrfs_key chunk_key;
10782         struct btrfs_key devext_key;
10783         struct btrfs_chunk *chunk;
10784         struct extent_buffer *l;
10785         int num_stripes;
10786         u64 length;
10787         int i;
10788         int found_chunk = 0;
10789         int ret;
10790
10791         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10792         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10793         length = btrfs_dev_extent_length(eb, ptr);
10794
10795         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10796         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10797         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10798
10799         btrfs_init_path(&path);
10800         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10801         if (ret)
10802                 goto out;
10803
10804         l = path.nodes[0];
10805         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10806         if (btrfs_chunk_length(l, chunk) != length)
10807                 goto out;
10808
10809         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10810         for (i = 0; i < num_stripes; i++) {
10811                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10812                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10813
10814                 if (devid == devext_key.objectid &&
10815                     offset == devext_key.offset) {
10816                         found_chunk = 1;
10817                         break;
10818                 }
10819         }
10820 out:
10821         btrfs_release_path(&path);
10822         if (!found_chunk) {
10823                 error(
10824                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10825                         devext_key.objectid, devext_key.offset, length);
10826                 return REFERENCER_MISSING;
10827         }
10828         return 0;
10829 }
10830
10831 /*
10832  * Check if the used space is correct with the dev item
10833  */
10834 static int check_dev_item(struct btrfs_fs_info *fs_info,
10835                           struct extent_buffer *eb, int slot)
10836 {
10837         struct btrfs_root *dev_root = fs_info->dev_root;
10838         struct btrfs_dev_item *dev_item;
10839         struct btrfs_path path;
10840         struct btrfs_key key;
10841         struct btrfs_dev_extent *ptr;
10842         u64 dev_id;
10843         u64 used;
10844         u64 total = 0;
10845         int ret;
10846
10847         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10848         dev_id = btrfs_device_id(eb, dev_item);
10849         used = btrfs_device_bytes_used(eb, dev_item);
10850
10851         key.objectid = dev_id;
10852         key.type = BTRFS_DEV_EXTENT_KEY;
10853         key.offset = 0;
10854
10855         btrfs_init_path(&path);
10856         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10857         if (ret < 0) {
10858                 btrfs_item_key_to_cpu(eb, &key, slot);
10859                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10860                         key.objectid, key.type, key.offset);
10861                 btrfs_release_path(&path);
10862                 return REFERENCER_MISSING;
10863         }
10864
10865         /* Iterate dev_extents to calculate the used space of a device */
10866         while (1) {
10867                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10868
10869                 if (key.objectid > dev_id)
10870                         break;
10871                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10872                         goto next;
10873
10874                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10875                                      struct btrfs_dev_extent);
10876                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10877 next:
10878                 ret = btrfs_next_item(dev_root, &path);
10879                 if (ret)
10880                         break;
10881         }
10882         btrfs_release_path(&path);
10883
10884         if (used != total) {
10885                 btrfs_item_key_to_cpu(eb, &key, slot);
10886                 error(
10887 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10888                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10889                         BTRFS_DEV_EXTENT_KEY, dev_id);
10890                 return ACCOUNTING_MISMATCH;
10891         }
10892         return 0;
10893 }
10894
10895 /*
10896  * Check a block group item with its referener (chunk) and its used space
10897  * with extent/metadata item
10898  */
10899 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10900                                   struct extent_buffer *eb, int slot)
10901 {
10902         struct btrfs_root *extent_root = fs_info->extent_root;
10903         struct btrfs_root *chunk_root = fs_info->chunk_root;
10904         struct btrfs_block_group_item *bi;
10905         struct btrfs_block_group_item bg_item;
10906         struct btrfs_path path;
10907         struct btrfs_key bg_key;
10908         struct btrfs_key chunk_key;
10909         struct btrfs_key extent_key;
10910         struct btrfs_chunk *chunk;
10911         struct extent_buffer *leaf;
10912         struct btrfs_extent_item *ei;
10913         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10914         u64 flags;
10915         u64 bg_flags;
10916         u64 used;
10917         u64 total = 0;
10918         int ret;
10919         int err = 0;
10920
10921         btrfs_item_key_to_cpu(eb, &bg_key, slot);
10922         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10923         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10924         used = btrfs_block_group_used(&bg_item);
10925         bg_flags = btrfs_block_group_flags(&bg_item);
10926
10927         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10928         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10929         chunk_key.offset = bg_key.objectid;
10930
10931         btrfs_init_path(&path);
10932         /* Search for the referencer chunk */
10933         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10934         if (ret) {
10935                 error(
10936                 "block group[%llu %llu] did not find the related chunk item",
10937                         bg_key.objectid, bg_key.offset);
10938                 err |= REFERENCER_MISSING;
10939         } else {
10940                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10941                                         struct btrfs_chunk);
10942                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10943                                                 bg_key.offset) {
10944                         error(
10945         "block group[%llu %llu] related chunk item length does not match",
10946                                 bg_key.objectid, bg_key.offset);
10947                         err |= REFERENCER_MISMATCH;
10948                 }
10949         }
10950         btrfs_release_path(&path);
10951
10952         /* Search from the block group bytenr */
10953         extent_key.objectid = bg_key.objectid;
10954         extent_key.type = 0;
10955         extent_key.offset = 0;
10956
10957         btrfs_init_path(&path);
10958         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10959         if (ret < 0)
10960                 goto out;
10961
10962         /* Iterate extent tree to account used space */
10963         while (1) {
10964                 leaf = path.nodes[0];
10965                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
10966                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
10967                         break;
10968
10969                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
10970                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
10971                         goto next;
10972                 if (extent_key.objectid < bg_key.objectid)
10973                         goto next;
10974
10975                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
10976                         total += nodesize;
10977                 else
10978                         total += extent_key.offset;
10979
10980                 ei = btrfs_item_ptr(leaf, path.slots[0],
10981                                     struct btrfs_extent_item);
10982                 flags = btrfs_extent_flags(leaf, ei);
10983                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
10984                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
10985                                 error(
10986                         "bad extent[%llu, %llu) type mismatch with chunk",
10987                                         extent_key.objectid,
10988                                         extent_key.objectid + extent_key.offset);
10989                                 err |= CHUNK_TYPE_MISMATCH;
10990                         }
10991                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
10992                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
10993                                     BTRFS_BLOCK_GROUP_METADATA))) {
10994                                 error(
10995                         "bad extent[%llu, %llu) type mismatch with chunk",
10996                                         extent_key.objectid,
10997                                         extent_key.objectid + nodesize);
10998                                 err |= CHUNK_TYPE_MISMATCH;
10999                         }
11000                 }
11001 next:
11002                 ret = btrfs_next_item(extent_root, &path);
11003                 if (ret)
11004                         break;
11005         }
11006
11007 out:
11008         btrfs_release_path(&path);
11009
11010         if (total != used) {
11011                 error(
11012                 "block group[%llu %llu] used %llu but extent items used %llu",
11013                         bg_key.objectid, bg_key.offset, used, total);
11014                 err |= ACCOUNTING_MISMATCH;
11015         }
11016         return err;
11017 }
11018
11019 /*
11020  * Check a chunk item.
11021  * Including checking all referred dev_extents and block group
11022  */
11023 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11024                             struct extent_buffer *eb, int slot)
11025 {
11026         struct btrfs_root *extent_root = fs_info->extent_root;
11027         struct btrfs_root *dev_root = fs_info->dev_root;
11028         struct btrfs_path path;
11029         struct btrfs_key chunk_key;
11030         struct btrfs_key bg_key;
11031         struct btrfs_key devext_key;
11032         struct btrfs_chunk *chunk;
11033         struct extent_buffer *leaf;
11034         struct btrfs_block_group_item *bi;
11035         struct btrfs_block_group_item bg_item;
11036         struct btrfs_dev_extent *ptr;
11037         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11038         u64 length;
11039         u64 chunk_end;
11040         u64 type;
11041         u64 profile;
11042         int num_stripes;
11043         u64 offset;
11044         u64 objectid;
11045         int i;
11046         int ret;
11047         int err = 0;
11048
11049         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11050         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11051         length = btrfs_chunk_length(eb, chunk);
11052         chunk_end = chunk_key.offset + length;
11053         if (!IS_ALIGNED(length, sectorsize)) {
11054                 error("chunk[%llu %llu) not aligned to %u",
11055                         chunk_key.offset, chunk_end, sectorsize);
11056                 err |= BYTES_UNALIGNED;
11057                 goto out;
11058         }
11059
11060         type = btrfs_chunk_type(eb, chunk);
11061         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11062         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11063                 error("chunk[%llu %llu) has no chunk type",
11064                         chunk_key.offset, chunk_end);
11065                 err |= UNKNOWN_TYPE;
11066         }
11067         if (profile && (profile & (profile - 1))) {
11068                 error("chunk[%llu %llu) multiple profiles detected: %llx",
11069                         chunk_key.offset, chunk_end, profile);
11070                 err |= UNKNOWN_TYPE;
11071         }
11072
11073         bg_key.objectid = chunk_key.offset;
11074         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11075         bg_key.offset = length;
11076
11077         btrfs_init_path(&path);
11078         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11079         if (ret) {
11080                 error(
11081                 "chunk[%llu %llu) did not find the related block group item",
11082                         chunk_key.offset, chunk_end);
11083                 err |= REFERENCER_MISSING;
11084         } else{
11085                 leaf = path.nodes[0];
11086                 bi = btrfs_item_ptr(leaf, path.slots[0],
11087                                     struct btrfs_block_group_item);
11088                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11089                                    sizeof(bg_item));
11090                 if (btrfs_block_group_flags(&bg_item) != type) {
11091                         error(
11092 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11093                                 chunk_key.offset, chunk_end, type,
11094                                 btrfs_block_group_flags(&bg_item));
11095                         err |= REFERENCER_MISSING;
11096                 }
11097         }
11098
11099         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11100         for (i = 0; i < num_stripes; i++) {
11101                 btrfs_release_path(&path);
11102                 btrfs_init_path(&path);
11103                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11104                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11105                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11106
11107                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11108                                         0, 0);
11109                 if (ret)
11110                         goto not_match_dev;
11111
11112                 leaf = path.nodes[0];
11113                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11114                                      struct btrfs_dev_extent);
11115                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11116                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11117                 if (objectid != chunk_key.objectid ||
11118                     offset != chunk_key.offset ||
11119                     btrfs_dev_extent_length(leaf, ptr) != length)
11120                         goto not_match_dev;
11121                 continue;
11122 not_match_dev:
11123                 err |= BACKREF_MISSING;
11124                 error(
11125                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11126                         chunk_key.objectid, chunk_end, i);
11127                 continue;
11128         }
11129         btrfs_release_path(&path);
11130 out:
11131         return err;
11132 }
11133
11134 /*
11135  * Main entry function to check known items and update related accounting info
11136  */
11137 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11138 {
11139         struct btrfs_fs_info *fs_info = root->fs_info;
11140         struct btrfs_key key;
11141         int slot = 0;
11142         int type;
11143         struct btrfs_extent_data_ref *dref;
11144         int ret;
11145         int err = 0;
11146
11147 next:
11148         btrfs_item_key_to_cpu(eb, &key, slot);
11149         type = key.type;
11150
11151         switch (type) {
11152         case BTRFS_EXTENT_DATA_KEY:
11153                 ret = check_extent_data_item(root, eb, slot);
11154                 err |= ret;
11155                 break;
11156         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11157                 ret = check_block_group_item(fs_info, eb, slot);
11158                 err |= ret;
11159                 break;
11160         case BTRFS_DEV_ITEM_KEY:
11161                 ret = check_dev_item(fs_info, eb, slot);
11162                 err |= ret;
11163                 break;
11164         case BTRFS_CHUNK_ITEM_KEY:
11165                 ret = check_chunk_item(fs_info, eb, slot);
11166                 err |= ret;
11167                 break;
11168         case BTRFS_DEV_EXTENT_KEY:
11169                 ret = check_dev_extent_item(fs_info, eb, slot);
11170                 err |= ret;
11171                 break;
11172         case BTRFS_EXTENT_ITEM_KEY:
11173         case BTRFS_METADATA_ITEM_KEY:
11174                 ret = check_extent_item(fs_info, eb, slot);
11175                 err |= ret;
11176                 break;
11177         case BTRFS_EXTENT_CSUM_KEY:
11178                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11179                 break;
11180         case BTRFS_TREE_BLOCK_REF_KEY:
11181                 ret = check_tree_block_backref(fs_info, key.offset,
11182                                                key.objectid, -1);
11183                 err |= ret;
11184                 break;
11185         case BTRFS_EXTENT_DATA_REF_KEY:
11186                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11187                 ret = check_extent_data_backref(fs_info,
11188                                 btrfs_extent_data_ref_root(eb, dref),
11189                                 btrfs_extent_data_ref_objectid(eb, dref),
11190                                 btrfs_extent_data_ref_offset(eb, dref),
11191                                 key.objectid, 0,
11192                                 btrfs_extent_data_ref_count(eb, dref));
11193                 err |= ret;
11194                 break;
11195         case BTRFS_SHARED_BLOCK_REF_KEY:
11196                 ret = check_shared_block_backref(fs_info, key.offset,
11197                                                  key.objectid, -1);
11198                 err |= ret;
11199                 break;
11200         case BTRFS_SHARED_DATA_REF_KEY:
11201                 ret = check_shared_data_backref(fs_info, key.offset,
11202                                                 key.objectid);
11203                 err |= ret;
11204                 break;
11205         default:
11206                 break;
11207         }
11208
11209         if (++slot < btrfs_header_nritems(eb))
11210                 goto next;
11211
11212         return err;
11213 }
11214
11215 /*
11216  * Helper function for later fs/subvol tree check.  To determine if a tree
11217  * block should be checked.
11218  * This function will ensure only the direct referencer with lowest rootid to
11219  * check a fs/subvolume tree block.
11220  *
11221  * Backref check at extent tree would detect errors like missing subvolume
11222  * tree, so we can do aggressive check to reduce duplicated checks.
11223  */
11224 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11225 {
11226         struct btrfs_root *extent_root = root->fs_info->extent_root;
11227         struct btrfs_key key;
11228         struct btrfs_path path;
11229         struct extent_buffer *leaf;
11230         int slot;
11231         struct btrfs_extent_item *ei;
11232         unsigned long ptr;
11233         unsigned long end;
11234         int type;
11235         u32 item_size;
11236         u64 offset;
11237         struct btrfs_extent_inline_ref *iref;
11238         int ret;
11239
11240         btrfs_init_path(&path);
11241         key.objectid = btrfs_header_bytenr(eb);
11242         key.type = BTRFS_METADATA_ITEM_KEY;
11243         key.offset = (u64)-1;
11244
11245         /*
11246          * Any failure in backref resolving means we can't determine
11247          * whom the tree block belongs to.
11248          * So in that case, we need to check that tree block
11249          */
11250         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11251         if (ret < 0)
11252                 goto need_check;
11253
11254         ret = btrfs_previous_extent_item(extent_root, &path,
11255                                          btrfs_header_bytenr(eb));
11256         if (ret)
11257                 goto need_check;
11258
11259         leaf = path.nodes[0];
11260         slot = path.slots[0];
11261         btrfs_item_key_to_cpu(leaf, &key, slot);
11262         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11263
11264         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11265                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11266         } else {
11267                 struct btrfs_tree_block_info *info;
11268
11269                 info = (struct btrfs_tree_block_info *)(ei + 1);
11270                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11271         }
11272
11273         item_size = btrfs_item_size_nr(leaf, slot);
11274         ptr = (unsigned long)iref;
11275         end = (unsigned long)ei + item_size;
11276         while (ptr < end) {
11277                 iref = (struct btrfs_extent_inline_ref *)ptr;
11278                 type = btrfs_extent_inline_ref_type(leaf, iref);
11279                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11280
11281                 /*
11282                  * We only check the tree block if current root is
11283                  * the lowest referencer of it.
11284                  */
11285                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11286                     offset < root->objectid) {
11287                         btrfs_release_path(&path);
11288                         return 0;
11289                 }
11290
11291                 ptr += btrfs_extent_inline_ref_size(type);
11292         }
11293         /*
11294          * Normally we should also check keyed tree block ref, but that may be
11295          * very time consuming.  Inlined ref should already make us skip a lot
11296          * of refs now.  So skip search keyed tree block ref.
11297          */
11298
11299 need_check:
11300         btrfs_release_path(&path);
11301         return 1;
11302 }
11303
11304 /*
11305  * Traversal function for tree block. We will do:
11306  * 1) Skip shared fs/subvolume tree blocks
11307  * 2) Update related bytes accounting
11308  * 3) Pre-order traversal
11309  */
11310 static int traverse_tree_block(struct btrfs_root *root,
11311                                 struct extent_buffer *node)
11312 {
11313         struct extent_buffer *eb;
11314         struct btrfs_key key;
11315         struct btrfs_key drop_key;
11316         int level;
11317         u64 nr;
11318         int i;
11319         int err = 0;
11320         int ret;
11321
11322         /*
11323          * Skip shared fs/subvolume tree block, in that case they will
11324          * be checked by referencer with lowest rootid
11325          */
11326         if (is_fstree(root->objectid) && !should_check(root, node))
11327                 return 0;
11328
11329         /* Update bytes accounting */
11330         total_btree_bytes += node->len;
11331         if (fs_root_objectid(btrfs_header_owner(node)))
11332                 total_fs_tree_bytes += node->len;
11333         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11334                 total_extent_tree_bytes += node->len;
11335         if (!found_old_backref &&
11336             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11337             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11338             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11339                 found_old_backref = 1;
11340
11341         /* pre-order tranversal, check itself first */
11342         level = btrfs_header_level(node);
11343         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11344                                    btrfs_header_level(node),
11345                                    btrfs_header_owner(node));
11346         err |= ret;
11347         if (err)
11348                 error(
11349         "check %s failed root %llu bytenr %llu level %d, force continue check",
11350                         level ? "node":"leaf", root->objectid,
11351                         btrfs_header_bytenr(node), btrfs_header_level(node));
11352
11353         if (!level) {
11354                 btree_space_waste += btrfs_leaf_free_space(root, node);
11355                 ret = check_leaf_items(root, node);
11356                 err |= ret;
11357                 return err;
11358         }
11359
11360         nr = btrfs_header_nritems(node);
11361         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11362         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11363                 sizeof(struct btrfs_key_ptr);
11364
11365         /* Then check all its children */
11366         for (i = 0; i < nr; i++) {
11367                 u64 blocknr = btrfs_node_blockptr(node, i);
11368
11369                 btrfs_node_key_to_cpu(node, &key, i);
11370                 if (level == root->root_item.drop_level &&
11371                     is_dropped_key(&key, &drop_key))
11372                         continue;
11373
11374                 /*
11375                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11376                  * to call the function itself.
11377                  */
11378                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11379                 if (extent_buffer_uptodate(eb)) {
11380                         ret = traverse_tree_block(root, eb);
11381                         err |= ret;
11382                 }
11383                 free_extent_buffer(eb);
11384         }
11385
11386         return err;
11387 }
11388
11389 /*
11390  * Low memory usage version check_chunks_and_extents.
11391  */
11392 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11393 {
11394         struct btrfs_path path;
11395         struct btrfs_key key;
11396         struct btrfs_root *root1;
11397         struct btrfs_root *cur_root;
11398         int err = 0;
11399         int ret;
11400
11401         root1 = root->fs_info->chunk_root;
11402         ret = traverse_tree_block(root1, root1->node);
11403         err |= ret;
11404
11405         root1 = root->fs_info->tree_root;
11406         ret = traverse_tree_block(root1, root1->node);
11407         err |= ret;
11408
11409         btrfs_init_path(&path);
11410         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11411         key.offset = 0;
11412         key.type = BTRFS_ROOT_ITEM_KEY;
11413
11414         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11415         if (ret) {
11416                 error("cannot find extent treet in tree_root");
11417                 goto out;
11418         }
11419
11420         while (1) {
11421                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11422                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11423                         goto next;
11424                 key.offset = (u64)-1;
11425
11426                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11427                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11428                                         &key);
11429                 else
11430                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11431                 if (IS_ERR(cur_root) || !cur_root) {
11432                         error("failed to read tree: %lld", key.objectid);
11433                         goto next;
11434                 }
11435
11436                 ret = traverse_tree_block(cur_root, cur_root->node);
11437                 err |= ret;
11438
11439                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11440                         btrfs_free_fs_root(cur_root);
11441 next:
11442                 ret = btrfs_next_item(root1, &path);
11443                 if (ret)
11444                         goto out;
11445         }
11446
11447 out:
11448         btrfs_release_path(&path);
11449         return err;
11450 }
11451
11452 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11453                            struct btrfs_root *root, int overwrite)
11454 {
11455         struct extent_buffer *c;
11456         struct extent_buffer *old = root->node;
11457         int level;
11458         int ret;
11459         struct btrfs_disk_key disk_key = {0,0,0};
11460
11461         level = 0;
11462
11463         if (overwrite) {
11464                 c = old;
11465                 extent_buffer_get(c);
11466                 goto init;
11467         }
11468         c = btrfs_alloc_free_block(trans, root,
11469                                    root->nodesize,
11470                                    root->root_key.objectid,
11471                                    &disk_key, level, 0, 0);
11472         if (IS_ERR(c)) {
11473                 c = old;
11474                 extent_buffer_get(c);
11475                 overwrite = 1;
11476         }
11477 init:
11478         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11479         btrfs_set_header_level(c, level);
11480         btrfs_set_header_bytenr(c, c->start);
11481         btrfs_set_header_generation(c, trans->transid);
11482         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11483         btrfs_set_header_owner(c, root->root_key.objectid);
11484
11485         write_extent_buffer(c, root->fs_info->fsid,
11486                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11487
11488         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11489                             btrfs_header_chunk_tree_uuid(c),
11490                             BTRFS_UUID_SIZE);
11491
11492         btrfs_mark_buffer_dirty(c);
11493         /*
11494          * this case can happen in the following case:
11495          *
11496          * 1.overwrite previous root.
11497          *
11498          * 2.reinit reloc data root, this is because we skip pin
11499          * down reloc data tree before which means we can allocate
11500          * same block bytenr here.
11501          */
11502         if (old->start == c->start) {
11503                 btrfs_set_root_generation(&root->root_item,
11504                                           trans->transid);
11505                 root->root_item.level = btrfs_header_level(root->node);
11506                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11507                                         &root->root_key, &root->root_item);
11508                 if (ret) {
11509                         free_extent_buffer(c);
11510                         return ret;
11511                 }
11512         }
11513         free_extent_buffer(old);
11514         root->node = c;
11515         add_root_to_dirty_list(root);
11516         return 0;
11517 }
11518
11519 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11520                                 struct extent_buffer *eb, int tree_root)
11521 {
11522         struct extent_buffer *tmp;
11523         struct btrfs_root_item *ri;
11524         struct btrfs_key key;
11525         u64 bytenr;
11526         u32 nodesize;
11527         int level = btrfs_header_level(eb);
11528         int nritems;
11529         int ret;
11530         int i;
11531
11532         /*
11533          * If we have pinned this block before, don't pin it again.
11534          * This can not only avoid forever loop with broken filesystem
11535          * but also give us some speedups.
11536          */
11537         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11538                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11539                 return 0;
11540
11541         btrfs_pin_extent(fs_info, eb->start, eb->len);
11542
11543         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11544         nritems = btrfs_header_nritems(eb);
11545         for (i = 0; i < nritems; i++) {
11546                 if (level == 0) {
11547                         btrfs_item_key_to_cpu(eb, &key, i);
11548                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11549                                 continue;
11550                         /* Skip the extent root and reloc roots */
11551                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11552                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11553                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11554                                 continue;
11555                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11556                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11557
11558                         /*
11559                          * If at any point we start needing the real root we
11560                          * will have to build a stump root for the root we are
11561                          * in, but for now this doesn't actually use the root so
11562                          * just pass in extent_root.
11563                          */
11564                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11565                                               nodesize, 0);
11566                         if (!extent_buffer_uptodate(tmp)) {
11567                                 fprintf(stderr, "Error reading root block\n");
11568                                 return -EIO;
11569                         }
11570                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11571                         free_extent_buffer(tmp);
11572                         if (ret)
11573                                 return ret;
11574                 } else {
11575                         bytenr = btrfs_node_blockptr(eb, i);
11576
11577                         /* If we aren't the tree root don't read the block */
11578                         if (level == 1 && !tree_root) {
11579                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11580                                 continue;
11581                         }
11582
11583                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11584                                               nodesize, 0);
11585                         if (!extent_buffer_uptodate(tmp)) {
11586                                 fprintf(stderr, "Error reading tree block\n");
11587                                 return -EIO;
11588                         }
11589                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11590                         free_extent_buffer(tmp);
11591                         if (ret)
11592                                 return ret;
11593                 }
11594         }
11595
11596         return 0;
11597 }
11598
11599 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11600 {
11601         int ret;
11602
11603         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11604         if (ret)
11605                 return ret;
11606
11607         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11608 }
11609
11610 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11611 {
11612         struct btrfs_block_group_cache *cache;
11613         struct btrfs_path path;
11614         struct extent_buffer *leaf;
11615         struct btrfs_chunk *chunk;
11616         struct btrfs_key key;
11617         int ret;
11618         u64 start;
11619
11620         btrfs_init_path(&path);
11621         key.objectid = 0;
11622         key.type = BTRFS_CHUNK_ITEM_KEY;
11623         key.offset = 0;
11624         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11625         if (ret < 0) {
11626                 btrfs_release_path(&path);
11627                 return ret;
11628         }
11629
11630         /*
11631          * We do this in case the block groups were screwed up and had alloc
11632          * bits that aren't actually set on the chunks.  This happens with
11633          * restored images every time and could happen in real life I guess.
11634          */
11635         fs_info->avail_data_alloc_bits = 0;
11636         fs_info->avail_metadata_alloc_bits = 0;
11637         fs_info->avail_system_alloc_bits = 0;
11638
11639         /* First we need to create the in-memory block groups */
11640         while (1) {
11641                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11642                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11643                         if (ret < 0) {
11644                                 btrfs_release_path(&path);
11645                                 return ret;
11646                         }
11647                         if (ret) {
11648                                 ret = 0;
11649                                 break;
11650                         }
11651                 }
11652                 leaf = path.nodes[0];
11653                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11654                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11655                         path.slots[0]++;
11656                         continue;
11657                 }
11658
11659                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11660                 btrfs_add_block_group(fs_info, 0,
11661                                       btrfs_chunk_type(leaf, chunk),
11662                                       key.objectid, key.offset,
11663                                       btrfs_chunk_length(leaf, chunk));
11664                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11665                                  key.offset + btrfs_chunk_length(leaf, chunk));
11666                 path.slots[0]++;
11667         }
11668         start = 0;
11669         while (1) {
11670                 cache = btrfs_lookup_first_block_group(fs_info, start);
11671                 if (!cache)
11672                         break;
11673                 cache->cached = 1;
11674                 start = cache->key.objectid + cache->key.offset;
11675         }
11676
11677         btrfs_release_path(&path);
11678         return 0;
11679 }
11680
11681 static int reset_balance(struct btrfs_trans_handle *trans,
11682                          struct btrfs_fs_info *fs_info)
11683 {
11684         struct btrfs_root *root = fs_info->tree_root;
11685         struct btrfs_path path;
11686         struct extent_buffer *leaf;
11687         struct btrfs_key key;
11688         int del_slot, del_nr = 0;
11689         int ret;
11690         int found = 0;
11691
11692         btrfs_init_path(&path);
11693         key.objectid = BTRFS_BALANCE_OBJECTID;
11694         key.type = BTRFS_BALANCE_ITEM_KEY;
11695         key.offset = 0;
11696         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11697         if (ret) {
11698                 if (ret > 0)
11699                         ret = 0;
11700                 if (!ret)
11701                         goto reinit_data_reloc;
11702                 else
11703                         goto out;
11704         }
11705
11706         ret = btrfs_del_item(trans, root, &path);
11707         if (ret)
11708                 goto out;
11709         btrfs_release_path(&path);
11710
11711         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11712         key.type = BTRFS_ROOT_ITEM_KEY;
11713         key.offset = 0;
11714         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11715         if (ret < 0)
11716                 goto out;
11717         while (1) {
11718                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11719                         if (!found)
11720                                 break;
11721
11722                         if (del_nr) {
11723                                 ret = btrfs_del_items(trans, root, &path,
11724                                                       del_slot, del_nr);
11725                                 del_nr = 0;
11726                                 if (ret)
11727                                         goto out;
11728                         }
11729                         key.offset++;
11730                         btrfs_release_path(&path);
11731
11732                         found = 0;
11733                         ret = btrfs_search_slot(trans, root, &key, &path,
11734                                                 -1, 1);
11735                         if (ret < 0)
11736                                 goto out;
11737                         continue;
11738                 }
11739                 found = 1;
11740                 leaf = path.nodes[0];
11741                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11742                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11743                         break;
11744                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11745                         path.slots[0]++;
11746                         continue;
11747                 }
11748                 if (!del_nr) {
11749                         del_slot = path.slots[0];
11750                         del_nr = 1;
11751                 } else {
11752                         del_nr++;
11753                 }
11754                 path.slots[0]++;
11755         }
11756
11757         if (del_nr) {
11758                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11759                 if (ret)
11760                         goto out;
11761         }
11762         btrfs_release_path(&path);
11763
11764 reinit_data_reloc:
11765         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11766         key.type = BTRFS_ROOT_ITEM_KEY;
11767         key.offset = (u64)-1;
11768         root = btrfs_read_fs_root(fs_info, &key);
11769         if (IS_ERR(root)) {
11770                 fprintf(stderr, "Error reading data reloc tree\n");
11771                 ret = PTR_ERR(root);
11772                 goto out;
11773         }
11774         record_root_in_trans(trans, root);
11775         ret = btrfs_fsck_reinit_root(trans, root, 0);
11776         if (ret)
11777                 goto out;
11778         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11779 out:
11780         btrfs_release_path(&path);
11781         return ret;
11782 }
11783
11784 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11785                               struct btrfs_fs_info *fs_info)
11786 {
11787         u64 start = 0;
11788         int ret;
11789
11790         /*
11791          * The only reason we don't do this is because right now we're just
11792          * walking the trees we find and pinning down their bytes, we don't look
11793          * at any of the leaves.  In order to do mixed groups we'd have to check
11794          * the leaves of any fs roots and pin down the bytes for any file
11795          * extents we find.  Not hard but why do it if we don't have to?
11796          */
11797         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11798                 fprintf(stderr, "We don't support re-initing the extent tree "
11799                         "for mixed block groups yet, please notify a btrfs "
11800                         "developer you want to do this so they can add this "
11801                         "functionality.\n");
11802                 return -EINVAL;
11803         }
11804
11805         /*
11806          * first we need to walk all of the trees except the extent tree and pin
11807          * down the bytes that are in use so we don't overwrite any existing
11808          * metadata.
11809          */
11810         ret = pin_metadata_blocks(fs_info);
11811         if (ret) {
11812                 fprintf(stderr, "error pinning down used bytes\n");
11813                 return ret;
11814         }
11815
11816         /*
11817          * Need to drop all the block groups since we're going to recreate all
11818          * of them again.
11819          */
11820         btrfs_free_block_groups(fs_info);
11821         ret = reset_block_groups(fs_info);
11822         if (ret) {
11823                 fprintf(stderr, "error resetting the block groups\n");
11824                 return ret;
11825         }
11826
11827         /* Ok we can allocate now, reinit the extent root */
11828         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11829         if (ret) {
11830                 fprintf(stderr, "extent root initialization failed\n");
11831                 /*
11832                  * When the transaction code is updated we should end the
11833                  * transaction, but for now progs only knows about commit so
11834                  * just return an error.
11835                  */
11836                 return ret;
11837         }
11838
11839         /*
11840          * Now we have all the in-memory block groups setup so we can make
11841          * allocations properly, and the metadata we care about is safe since we
11842          * pinned all of it above.
11843          */
11844         while (1) {
11845                 struct btrfs_block_group_cache *cache;
11846
11847                 cache = btrfs_lookup_first_block_group(fs_info, start);
11848                 if (!cache)
11849                         break;
11850                 start = cache->key.objectid + cache->key.offset;
11851                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11852                                         &cache->key, &cache->item,
11853                                         sizeof(cache->item));
11854                 if (ret) {
11855                         fprintf(stderr, "Error adding block group\n");
11856                         return ret;
11857                 }
11858                 btrfs_extent_post_op(trans, fs_info->extent_root);
11859         }
11860
11861         ret = reset_balance(trans, fs_info);
11862         if (ret)
11863                 fprintf(stderr, "error resetting the pending balance\n");
11864
11865         return ret;
11866 }
11867
11868 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11869 {
11870         struct btrfs_path path;
11871         struct btrfs_trans_handle *trans;
11872         struct btrfs_key key;
11873         int ret;
11874
11875         printf("Recowing metadata block %llu\n", eb->start);
11876         key.objectid = btrfs_header_owner(eb);
11877         key.type = BTRFS_ROOT_ITEM_KEY;
11878         key.offset = (u64)-1;
11879
11880         root = btrfs_read_fs_root(root->fs_info, &key);
11881         if (IS_ERR(root)) {
11882                 fprintf(stderr, "Couldn't find owner root %llu\n",
11883                         key.objectid);
11884                 return PTR_ERR(root);
11885         }
11886
11887         trans = btrfs_start_transaction(root, 1);
11888         if (IS_ERR(trans))
11889                 return PTR_ERR(trans);
11890
11891         btrfs_init_path(&path);
11892         path.lowest_level = btrfs_header_level(eb);
11893         if (path.lowest_level)
11894                 btrfs_node_key_to_cpu(eb, &key, 0);
11895         else
11896                 btrfs_item_key_to_cpu(eb, &key, 0);
11897
11898         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11899         btrfs_commit_transaction(trans, root);
11900         btrfs_release_path(&path);
11901         return ret;
11902 }
11903
11904 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11905 {
11906         struct btrfs_path path;
11907         struct btrfs_trans_handle *trans;
11908         struct btrfs_key key;
11909         int ret;
11910
11911         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11912                bad->key.type, bad->key.offset);
11913         key.objectid = bad->root_id;
11914         key.type = BTRFS_ROOT_ITEM_KEY;
11915         key.offset = (u64)-1;
11916
11917         root = btrfs_read_fs_root(root->fs_info, &key);
11918         if (IS_ERR(root)) {
11919                 fprintf(stderr, "Couldn't find owner root %llu\n",
11920                         key.objectid);
11921                 return PTR_ERR(root);
11922         }
11923
11924         trans = btrfs_start_transaction(root, 1);
11925         if (IS_ERR(trans))
11926                 return PTR_ERR(trans);
11927
11928         btrfs_init_path(&path);
11929         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11930         if (ret) {
11931                 if (ret > 0)
11932                         ret = 0;
11933                 goto out;
11934         }
11935         ret = btrfs_del_item(trans, root, &path);
11936 out:
11937         btrfs_commit_transaction(trans, root);
11938         btrfs_release_path(&path);
11939         return ret;
11940 }
11941
11942 static int zero_log_tree(struct btrfs_root *root)
11943 {
11944         struct btrfs_trans_handle *trans;
11945         int ret;
11946
11947         trans = btrfs_start_transaction(root, 1);
11948         if (IS_ERR(trans)) {
11949                 ret = PTR_ERR(trans);
11950                 return ret;
11951         }
11952         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11953         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11954         ret = btrfs_commit_transaction(trans, root);
11955         return ret;
11956 }
11957
11958 static int populate_csum(struct btrfs_trans_handle *trans,
11959                          struct btrfs_root *csum_root, char *buf, u64 start,
11960                          u64 len)
11961 {
11962         u64 offset = 0;
11963         u64 sectorsize;
11964         int ret = 0;
11965
11966         while (offset < len) {
11967                 sectorsize = csum_root->sectorsize;
11968                 ret = read_extent_data(csum_root, buf, start + offset,
11969                                        &sectorsize, 0);
11970                 if (ret)
11971                         break;
11972                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
11973                                             start + offset, buf, sectorsize);
11974                 if (ret)
11975                         break;
11976                 offset += sectorsize;
11977         }
11978         return ret;
11979 }
11980
11981 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
11982                                       struct btrfs_root *csum_root,
11983                                       struct btrfs_root *cur_root)
11984 {
11985         struct btrfs_path path;
11986         struct btrfs_key key;
11987         struct extent_buffer *node;
11988         struct btrfs_file_extent_item *fi;
11989         char *buf = NULL;
11990         u64 start = 0;
11991         u64 len = 0;
11992         int slot = 0;
11993         int ret = 0;
11994
11995         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
11996         if (!buf)
11997                 return -ENOMEM;
11998
11999         btrfs_init_path(&path);
12000         key.objectid = 0;
12001         key.offset = 0;
12002         key.type = 0;
12003         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12004         if (ret < 0)
12005                 goto out;
12006         /* Iterate all regular file extents and fill its csum */
12007         while (1) {
12008                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12009
12010                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12011                         goto next;
12012                 node = path.nodes[0];
12013                 slot = path.slots[0];
12014                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12015                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12016                         goto next;
12017                 start = btrfs_file_extent_disk_bytenr(node, fi);
12018                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12019
12020                 ret = populate_csum(trans, csum_root, buf, start, len);
12021                 if (ret == -EEXIST)
12022                         ret = 0;
12023                 if (ret < 0)
12024                         goto out;
12025 next:
12026                 /*
12027                  * TODO: if next leaf is corrupted, jump to nearest next valid
12028                  * leaf.
12029                  */
12030                 ret = btrfs_next_item(cur_root, &path);
12031                 if (ret < 0)
12032                         goto out;
12033                 if (ret > 0) {
12034                         ret = 0;
12035                         goto out;
12036                 }
12037         }
12038
12039 out:
12040         btrfs_release_path(&path);
12041         free(buf);
12042         return ret;
12043 }
12044
12045 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12046                                   struct btrfs_root *csum_root)
12047 {
12048         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12049         struct btrfs_path path;
12050         struct btrfs_root *tree_root = fs_info->tree_root;
12051         struct btrfs_root *cur_root;
12052         struct extent_buffer *node;
12053         struct btrfs_key key;
12054         int slot = 0;
12055         int ret = 0;
12056
12057         btrfs_init_path(&path);
12058         key.objectid = BTRFS_FS_TREE_OBJECTID;
12059         key.offset = 0;
12060         key.type = BTRFS_ROOT_ITEM_KEY;
12061         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12062         if (ret < 0)
12063                 goto out;
12064         if (ret > 0) {
12065                 ret = -ENOENT;
12066                 goto out;
12067         }
12068
12069         while (1) {
12070                 node = path.nodes[0];
12071                 slot = path.slots[0];
12072                 btrfs_item_key_to_cpu(node, &key, slot);
12073                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12074                         goto out;
12075                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12076                         goto next;
12077                 if (!is_fstree(key.objectid))
12078                         goto next;
12079                 key.offset = (u64)-1;
12080
12081                 cur_root = btrfs_read_fs_root(fs_info, &key);
12082                 if (IS_ERR(cur_root) || !cur_root) {
12083                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12084                                 key.objectid);
12085                         goto out;
12086                 }
12087                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12088                                 cur_root);
12089                 if (ret < 0)
12090                         goto out;
12091 next:
12092                 ret = btrfs_next_item(tree_root, &path);
12093                 if (ret > 0) {
12094                         ret = 0;
12095                         goto out;
12096                 }
12097                 if (ret < 0)
12098                         goto out;
12099         }
12100
12101 out:
12102         btrfs_release_path(&path);
12103         return ret;
12104 }
12105
12106 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12107                                       struct btrfs_root *csum_root)
12108 {
12109         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12110         struct btrfs_path path;
12111         struct btrfs_extent_item *ei;
12112         struct extent_buffer *leaf;
12113         char *buf;
12114         struct btrfs_key key;
12115         int ret;
12116
12117         btrfs_init_path(&path);
12118         key.objectid = 0;
12119         key.type = BTRFS_EXTENT_ITEM_KEY;
12120         key.offset = 0;
12121         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12122         if (ret < 0) {
12123                 btrfs_release_path(&path);
12124                 return ret;
12125         }
12126
12127         buf = malloc(csum_root->sectorsize);
12128         if (!buf) {
12129                 btrfs_release_path(&path);
12130                 return -ENOMEM;
12131         }
12132
12133         while (1) {
12134                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12135                         ret = btrfs_next_leaf(extent_root, &path);
12136                         if (ret < 0)
12137                                 break;
12138                         if (ret) {
12139                                 ret = 0;
12140                                 break;
12141                         }
12142                 }
12143                 leaf = path.nodes[0];
12144
12145                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12146                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12147                         path.slots[0]++;
12148                         continue;
12149                 }
12150
12151                 ei = btrfs_item_ptr(leaf, path.slots[0],
12152                                     struct btrfs_extent_item);
12153                 if (!(btrfs_extent_flags(leaf, ei) &
12154                       BTRFS_EXTENT_FLAG_DATA)) {
12155                         path.slots[0]++;
12156                         continue;
12157                 }
12158
12159                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12160                                     key.offset);
12161                 if (ret)
12162                         break;
12163                 path.slots[0]++;
12164         }
12165
12166         btrfs_release_path(&path);
12167         free(buf);
12168         return ret;
12169 }
12170
12171 /*
12172  * Recalculate the csum and put it into the csum tree.
12173  *
12174  * Extent tree init will wipe out all the extent info, so in that case, we
12175  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12176  * will use fs/subvol trees to init the csum tree.
12177  */
12178 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12179                           struct btrfs_root *csum_root,
12180                           int search_fs_tree)
12181 {
12182         if (search_fs_tree)
12183                 return fill_csum_tree_from_fs(trans, csum_root);
12184         else
12185                 return fill_csum_tree_from_extent(trans, csum_root);
12186 }
12187
12188 static void free_roots_info_cache(void)
12189 {
12190         if (!roots_info_cache)
12191                 return;
12192
12193         while (!cache_tree_empty(roots_info_cache)) {
12194                 struct cache_extent *entry;
12195                 struct root_item_info *rii;
12196
12197                 entry = first_cache_extent(roots_info_cache);
12198                 if (!entry)
12199                         break;
12200                 remove_cache_extent(roots_info_cache, entry);
12201                 rii = container_of(entry, struct root_item_info, cache_extent);
12202                 free(rii);
12203         }
12204
12205         free(roots_info_cache);
12206         roots_info_cache = NULL;
12207 }
12208
12209 static int build_roots_info_cache(struct btrfs_fs_info *info)
12210 {
12211         int ret = 0;
12212         struct btrfs_key key;
12213         struct extent_buffer *leaf;
12214         struct btrfs_path path;
12215
12216         if (!roots_info_cache) {
12217                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12218                 if (!roots_info_cache)
12219                         return -ENOMEM;
12220                 cache_tree_init(roots_info_cache);
12221         }
12222
12223         btrfs_init_path(&path);
12224         key.objectid = 0;
12225         key.type = BTRFS_EXTENT_ITEM_KEY;
12226         key.offset = 0;
12227         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12228         if (ret < 0)
12229                 goto out;
12230         leaf = path.nodes[0];
12231
12232         while (1) {
12233                 struct btrfs_key found_key;
12234                 struct btrfs_extent_item *ei;
12235                 struct btrfs_extent_inline_ref *iref;
12236                 int slot = path.slots[0];
12237                 int type;
12238                 u64 flags;
12239                 u64 root_id;
12240                 u8 level;
12241                 struct cache_extent *entry;
12242                 struct root_item_info *rii;
12243
12244                 if (slot >= btrfs_header_nritems(leaf)) {
12245                         ret = btrfs_next_leaf(info->extent_root, &path);
12246                         if (ret < 0) {
12247                                 break;
12248                         } else if (ret) {
12249                                 ret = 0;
12250                                 break;
12251                         }
12252                         leaf = path.nodes[0];
12253                         slot = path.slots[0];
12254                 }
12255
12256                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12257
12258                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12259                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12260                         goto next;
12261
12262                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12263                 flags = btrfs_extent_flags(leaf, ei);
12264
12265                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12266                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12267                         goto next;
12268
12269                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12270                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12271                         level = found_key.offset;
12272                 } else {
12273                         struct btrfs_tree_block_info *binfo;
12274
12275                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12276                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12277                         level = btrfs_tree_block_level(leaf, binfo);
12278                 }
12279
12280                 /*
12281                  * For a root extent, it must be of the following type and the
12282                  * first (and only one) iref in the item.
12283                  */
12284                 type = btrfs_extent_inline_ref_type(leaf, iref);
12285                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12286                         goto next;
12287
12288                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12289                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12290                 if (!entry) {
12291                         rii = malloc(sizeof(struct root_item_info));
12292                         if (!rii) {
12293                                 ret = -ENOMEM;
12294                                 goto out;
12295                         }
12296                         rii->cache_extent.start = root_id;
12297                         rii->cache_extent.size = 1;
12298                         rii->level = (u8)-1;
12299                         entry = &rii->cache_extent;
12300                         ret = insert_cache_extent(roots_info_cache, entry);
12301                         ASSERT(ret == 0);
12302                 } else {
12303                         rii = container_of(entry, struct root_item_info,
12304                                            cache_extent);
12305                 }
12306
12307                 ASSERT(rii->cache_extent.start == root_id);
12308                 ASSERT(rii->cache_extent.size == 1);
12309
12310                 if (level > rii->level || rii->level == (u8)-1) {
12311                         rii->level = level;
12312                         rii->bytenr = found_key.objectid;
12313                         rii->gen = btrfs_extent_generation(leaf, ei);
12314                         rii->node_count = 1;
12315                 } else if (level == rii->level) {
12316                         rii->node_count++;
12317                 }
12318 next:
12319                 path.slots[0]++;
12320         }
12321
12322 out:
12323         btrfs_release_path(&path);
12324
12325         return ret;
12326 }
12327
12328 static int maybe_repair_root_item(struct btrfs_fs_info *info,
12329                                   struct btrfs_path *path,
12330                                   const struct btrfs_key *root_key,
12331                                   const int read_only_mode)
12332 {
12333         const u64 root_id = root_key->objectid;
12334         struct cache_extent *entry;
12335         struct root_item_info *rii;
12336         struct btrfs_root_item ri;
12337         unsigned long offset;
12338
12339         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12340         if (!entry) {
12341                 fprintf(stderr,
12342                         "Error: could not find extent items for root %llu\n",
12343                         root_key->objectid);
12344                 return -ENOENT;
12345         }
12346
12347         rii = container_of(entry, struct root_item_info, cache_extent);
12348         ASSERT(rii->cache_extent.start == root_id);
12349         ASSERT(rii->cache_extent.size == 1);
12350
12351         if (rii->node_count != 1) {
12352                 fprintf(stderr,
12353                         "Error: could not find btree root extent for root %llu\n",
12354                         root_id);
12355                 return -ENOENT;
12356         }
12357
12358         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12359         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12360
12361         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12362             btrfs_root_level(&ri) != rii->level ||
12363             btrfs_root_generation(&ri) != rii->gen) {
12364
12365                 /*
12366                  * If we're in repair mode but our caller told us to not update
12367                  * the root item, i.e. just check if it needs to be updated, don't
12368                  * print this message, since the caller will call us again shortly
12369                  * for the same root item without read only mode (the caller will
12370                  * open a transaction first).
12371                  */
12372                 if (!(read_only_mode && repair))
12373                         fprintf(stderr,
12374                                 "%sroot item for root %llu,"
12375                                 " current bytenr %llu, current gen %llu, current level %u,"
12376                                 " new bytenr %llu, new gen %llu, new level %u\n",
12377                                 (read_only_mode ? "" : "fixing "),
12378                                 root_id,
12379                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12380                                 btrfs_root_level(&ri),
12381                                 rii->bytenr, rii->gen, rii->level);
12382
12383                 if (btrfs_root_generation(&ri) > rii->gen) {
12384                         fprintf(stderr,
12385                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12386                                 root_id, btrfs_root_generation(&ri), rii->gen);
12387                         return -EINVAL;
12388                 }
12389
12390                 if (!read_only_mode) {
12391                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12392                         btrfs_set_root_level(&ri, rii->level);
12393                         btrfs_set_root_generation(&ri, rii->gen);
12394                         write_extent_buffer(path->nodes[0], &ri,
12395                                             offset, sizeof(ri));
12396                 }
12397
12398                 return 1;
12399         }
12400
12401         return 0;
12402 }
12403
12404 /*
12405  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12406  * caused read-only snapshots to be corrupted if they were created at a moment
12407  * when the source subvolume/snapshot had orphan items. The issue was that the
12408  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12409  * node instead of the post orphan cleanup root node.
12410  * So this function, and its callees, just detects and fixes those cases. Even
12411  * though the regression was for read-only snapshots, this function applies to
12412  * any snapshot/subvolume root.
12413  * This must be run before any other repair code - not doing it so, makes other
12414  * repair code delete or modify backrefs in the extent tree for example, which
12415  * will result in an inconsistent fs after repairing the root items.
12416  */
12417 static int repair_root_items(struct btrfs_fs_info *info)
12418 {
12419         struct btrfs_path path;
12420         struct btrfs_key key;
12421         struct extent_buffer *leaf;
12422         struct btrfs_trans_handle *trans = NULL;
12423         int ret = 0;
12424         int bad_roots = 0;
12425         int need_trans = 0;
12426
12427         btrfs_init_path(&path);
12428
12429         ret = build_roots_info_cache(info);
12430         if (ret)
12431                 goto out;
12432
12433         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12434         key.type = BTRFS_ROOT_ITEM_KEY;
12435         key.offset = 0;
12436
12437 again:
12438         /*
12439          * Avoid opening and committing transactions if a leaf doesn't have
12440          * any root items that need to be fixed, so that we avoid rotating
12441          * backup roots unnecessarily.
12442          */
12443         if (need_trans) {
12444                 trans = btrfs_start_transaction(info->tree_root, 1);
12445                 if (IS_ERR(trans)) {
12446                         ret = PTR_ERR(trans);
12447                         goto out;
12448                 }
12449         }
12450
12451         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12452                                 0, trans ? 1 : 0);
12453         if (ret < 0)
12454                 goto out;
12455         leaf = path.nodes[0];
12456
12457         while (1) {
12458                 struct btrfs_key found_key;
12459
12460                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12461                         int no_more_keys = find_next_key(&path, &key);
12462
12463                         btrfs_release_path(&path);
12464                         if (trans) {
12465                                 ret = btrfs_commit_transaction(trans,
12466                                                                info->tree_root);
12467                                 trans = NULL;
12468                                 if (ret < 0)
12469                                         goto out;
12470                         }
12471                         need_trans = 0;
12472                         if (no_more_keys)
12473                                 break;
12474                         goto again;
12475                 }
12476
12477                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12478
12479                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12480                         goto next;
12481                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12482                         goto next;
12483
12484                 ret = maybe_repair_root_item(info, &path, &found_key,
12485                                              trans ? 0 : 1);
12486                 if (ret < 0)
12487                         goto out;
12488                 if (ret) {
12489                         if (!trans && repair) {
12490                                 need_trans = 1;
12491                                 key = found_key;
12492                                 btrfs_release_path(&path);
12493                                 goto again;
12494                         }
12495                         bad_roots++;
12496                 }
12497 next:
12498                 path.slots[0]++;
12499         }
12500         ret = 0;
12501 out:
12502         free_roots_info_cache();
12503         btrfs_release_path(&path);
12504         if (trans)
12505                 btrfs_commit_transaction(trans, info->tree_root);
12506         if (ret < 0)
12507                 return ret;
12508
12509         return bad_roots;
12510 }
12511
12512 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12513 {
12514         struct btrfs_trans_handle *trans;
12515         struct btrfs_block_group_cache *bg_cache;
12516         u64 current = 0;
12517         int ret = 0;
12518
12519         /* Clear all free space cache inodes and its extent data */
12520         while (1) {
12521                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12522                 if (!bg_cache)
12523                         break;
12524                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12525                 if (ret < 0)
12526                         return ret;
12527                 current = bg_cache->key.objectid + bg_cache->key.offset;
12528         }
12529
12530         /* Don't forget to set cache_generation to -1 */
12531         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12532         if (IS_ERR(trans)) {
12533                 error("failed to update super block cache generation");
12534                 return PTR_ERR(trans);
12535         }
12536         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12537         btrfs_commit_transaction(trans, fs_info->tree_root);
12538
12539         return ret;
12540 }
12541
12542 const char * const cmd_check_usage[] = {
12543         "btrfs check [options] <device>",
12544         "Check structural integrity of a filesystem (unmounted).",
12545         "Check structural integrity of an unmounted filesystem. Verify internal",
12546         "trees' consistency and item connectivity. In the repair mode try to",
12547         "fix the problems found. ",
12548         "WARNING: the repair mode is considered dangerous",
12549         "",
12550         "-s|--super <superblock>     use this superblock copy",
12551         "-b|--backup                 use the first valid backup root copy",
12552         "--repair                    try to repair the filesystem",
12553         "--readonly                  run in read-only mode (default)",
12554         "--init-csum-tree            create a new CRC tree",
12555         "--init-extent-tree          create a new extent tree",
12556         "--mode <MODE>               allows choice of memory/IO trade-offs",
12557         "                            where MODE is one of:",
12558         "                            original - read inodes and extents to memory (requires",
12559         "                                       more memory, does less IO)",
12560         "                            lowmem   - try to use less memory but read blocks again",
12561         "                                       when needed",
12562         "--check-data-csum           verify checksums of data blocks",
12563         "-Q|--qgroup-report          print a report on qgroup consistency",
12564         "-E|--subvol-extents <subvolid>",
12565         "                            print subvolume extents and sharing state",
12566         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12567         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12568         "-p|--progress               indicate progress",
12569         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12570         NULL
12571 };
12572
12573 int cmd_check(int argc, char **argv)
12574 {
12575         struct cache_tree root_cache;
12576         struct btrfs_root *root;
12577         struct btrfs_fs_info *info;
12578         u64 bytenr = 0;
12579         u64 subvolid = 0;
12580         u64 tree_root_bytenr = 0;
12581         u64 chunk_root_bytenr = 0;
12582         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12583         int ret;
12584         int err = 0;
12585         u64 num;
12586         int init_csum_tree = 0;
12587         int readonly = 0;
12588         int clear_space_cache = 0;
12589         int qgroup_report = 0;
12590         int qgroups_repaired = 0;
12591         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12592
12593         while(1) {
12594                 int c;
12595                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12596                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12597                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12598                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12599                 static const struct option long_options[] = {
12600                         { "super", required_argument, NULL, 's' },
12601                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12602                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12603                         { "init-csum-tree", no_argument, NULL,
12604                                 GETOPT_VAL_INIT_CSUM },
12605                         { "init-extent-tree", no_argument, NULL,
12606                                 GETOPT_VAL_INIT_EXTENT },
12607                         { "check-data-csum", no_argument, NULL,
12608                                 GETOPT_VAL_CHECK_CSUM },
12609                         { "backup", no_argument, NULL, 'b' },
12610                         { "subvol-extents", required_argument, NULL, 'E' },
12611                         { "qgroup-report", no_argument, NULL, 'Q' },
12612                         { "tree-root", required_argument, NULL, 'r' },
12613                         { "chunk-root", required_argument, NULL,
12614                                 GETOPT_VAL_CHUNK_TREE },
12615                         { "progress", no_argument, NULL, 'p' },
12616                         { "mode", required_argument, NULL,
12617                                 GETOPT_VAL_MODE },
12618                         { "clear-space-cache", required_argument, NULL,
12619                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12620                         { NULL, 0, NULL, 0}
12621                 };
12622
12623                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12624                 if (c < 0)
12625                         break;
12626                 switch(c) {
12627                         case 'a': /* ignored */ break;
12628                         case 'b':
12629                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12630                                 break;
12631                         case 's':
12632                                 num = arg_strtou64(optarg);
12633                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12634                                         error(
12635                                         "super mirror should be less than %d",
12636                                                 BTRFS_SUPER_MIRROR_MAX);
12637                                         exit(1);
12638                                 }
12639                                 bytenr = btrfs_sb_offset(((int)num));
12640                                 printf("using SB copy %llu, bytenr %llu\n", num,
12641                                        (unsigned long long)bytenr);
12642                                 break;
12643                         case 'Q':
12644                                 qgroup_report = 1;
12645                                 break;
12646                         case 'E':
12647                                 subvolid = arg_strtou64(optarg);
12648                                 break;
12649                         case 'r':
12650                                 tree_root_bytenr = arg_strtou64(optarg);
12651                                 break;
12652                         case GETOPT_VAL_CHUNK_TREE:
12653                                 chunk_root_bytenr = arg_strtou64(optarg);
12654                                 break;
12655                         case 'p':
12656                                 ctx.progress_enabled = true;
12657                                 break;
12658                         case '?':
12659                         case 'h':
12660                                 usage(cmd_check_usage);
12661                         case GETOPT_VAL_REPAIR:
12662                                 printf("enabling repair mode\n");
12663                                 repair = 1;
12664                                 ctree_flags |= OPEN_CTREE_WRITES;
12665                                 break;
12666                         case GETOPT_VAL_READONLY:
12667                                 readonly = 1;
12668                                 break;
12669                         case GETOPT_VAL_INIT_CSUM:
12670                                 printf("Creating a new CRC tree\n");
12671                                 init_csum_tree = 1;
12672                                 repair = 1;
12673                                 ctree_flags |= OPEN_CTREE_WRITES;
12674                                 break;
12675                         case GETOPT_VAL_INIT_EXTENT:
12676                                 init_extent_tree = 1;
12677                                 ctree_flags |= (OPEN_CTREE_WRITES |
12678                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12679                                 repair = 1;
12680                                 break;
12681                         case GETOPT_VAL_CHECK_CSUM:
12682                                 check_data_csum = 1;
12683                                 break;
12684                         case GETOPT_VAL_MODE:
12685                                 check_mode = parse_check_mode(optarg);
12686                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12687                                         error("unknown mode: %s", optarg);
12688                                         exit(1);
12689                                 }
12690                                 break;
12691                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12692                                 if (strcmp(optarg, "v1") == 0) {
12693                                         clear_space_cache = 1;
12694                                 } else if (strcmp(optarg, "v2") == 0) {
12695                                         clear_space_cache = 2;
12696                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12697                                 } else {
12698                                         error(
12699                 "invalid argument to --clear-space-cache, must be v1 or v2");
12700                                         exit(1);
12701                                 }
12702                                 ctree_flags |= OPEN_CTREE_WRITES;
12703                                 break;
12704                 }
12705         }
12706
12707         if (check_argc_exact(argc - optind, 1))
12708                 usage(cmd_check_usage);
12709
12710         if (ctx.progress_enabled) {
12711                 ctx.tp = TASK_NOTHING;
12712                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12713         }
12714
12715         /* This check is the only reason for --readonly to exist */
12716         if (readonly && repair) {
12717                 error("repair options are not compatible with --readonly");
12718                 exit(1);
12719         }
12720
12721         /*
12722          * Not supported yet
12723          */
12724         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12725                 error("low memory mode doesn't support repair yet");
12726                 exit(1);
12727         }
12728
12729         radix_tree_init();
12730         cache_tree_init(&root_cache);
12731
12732         if((ret = check_mounted(argv[optind])) < 0) {
12733                 error("could not check mount status: %s", strerror(-ret));
12734                 err |= !!ret;
12735                 goto err_out;
12736         } else if(ret) {
12737                 error("%s is currently mounted, aborting", argv[optind]);
12738                 ret = -EBUSY;
12739                 err |= !!ret;
12740                 goto err_out;
12741         }
12742
12743         /* only allow partial opening under repair mode */
12744         if (repair)
12745                 ctree_flags |= OPEN_CTREE_PARTIAL;
12746
12747         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12748                                   chunk_root_bytenr, ctree_flags);
12749         if (!info) {
12750                 error("cannot open file system");
12751                 ret = -EIO;
12752                 err |= !!ret;
12753                 goto err_out;
12754         }
12755
12756         global_info = info;
12757         root = info->fs_root;
12758         if (clear_space_cache == 1) {
12759                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12760                         error(
12761                 "free space cache v2 detected, use --clear-space-cache v2");
12762                         ret = 1;
12763                         goto close_out;
12764                 }
12765                 printf("Clearing free space cache\n");
12766                 ret = clear_free_space_cache(info);
12767                 if (ret) {
12768                         error("failed to clear free space cache");
12769                         ret = 1;
12770                 } else {
12771                         printf("Free space cache cleared\n");
12772                 }
12773                 goto close_out;
12774         } else if (clear_space_cache == 2) {
12775                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12776                         printf("no free space cache v2 to clear\n");
12777                         ret = 0;
12778                         goto close_out;
12779                 }
12780                 printf("Clear free space cache v2\n");
12781                 ret = btrfs_clear_free_space_tree(info);
12782                 if (ret) {
12783                         error("failed to clear free space cache v2: %d", ret);
12784                         ret = 1;
12785                 } else {
12786                         printf("free space cache v2 cleared\n");
12787                 }
12788                 goto close_out;
12789         }
12790
12791         /*
12792          * repair mode will force us to commit transaction which
12793          * will make us fail to load log tree when mounting.
12794          */
12795         if (repair && btrfs_super_log_root(info->super_copy)) {
12796                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12797                 if (!ret) {
12798                         ret = 1;
12799                         err |= !!ret;
12800                         goto close_out;
12801                 }
12802                 ret = zero_log_tree(root);
12803                 err |= !!ret;
12804                 if (ret) {
12805                         error("failed to zero log tree: %d", ret);
12806                         goto close_out;
12807                 }
12808         }
12809
12810         uuid_unparse(info->super_copy->fsid, uuidbuf);
12811         if (qgroup_report) {
12812                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12813                        uuidbuf);
12814                 ret = qgroup_verify_all(info);
12815                 err |= !!ret;
12816                 if (ret == 0)
12817                         report_qgroups(1);
12818                 goto close_out;
12819         }
12820         if (subvolid) {
12821                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12822                        subvolid, argv[optind], uuidbuf);
12823                 ret = print_extent_state(info, subvolid);
12824                 err |= !!ret;
12825                 goto close_out;
12826         }
12827         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12828
12829         if (!extent_buffer_uptodate(info->tree_root->node) ||
12830             !extent_buffer_uptodate(info->dev_root->node) ||
12831             !extent_buffer_uptodate(info->chunk_root->node)) {
12832                 error("critical roots corrupted, unable to check the filesystem");
12833                 err |= !!ret;
12834                 ret = -EIO;
12835                 goto close_out;
12836         }
12837
12838         if (init_extent_tree || init_csum_tree) {
12839                 struct btrfs_trans_handle *trans;
12840
12841                 trans = btrfs_start_transaction(info->extent_root, 0);
12842                 if (IS_ERR(trans)) {
12843                         error("error starting transaction");
12844                         ret = PTR_ERR(trans);
12845                         err |= !!ret;
12846                         goto close_out;
12847                 }
12848
12849                 if (init_extent_tree) {
12850                         printf("Creating a new extent tree\n");
12851                         ret = reinit_extent_tree(trans, info);
12852                         err |= !!ret;
12853                         if (ret)
12854                                 goto close_out;
12855                 }
12856
12857                 if (init_csum_tree) {
12858                         printf("Reinitialize checksum tree\n");
12859                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12860                         if (ret) {
12861                                 error("checksum tree initialization failed: %d",
12862                                                 ret);
12863                                 ret = -EIO;
12864                                 err |= !!ret;
12865                                 goto close_out;
12866                         }
12867
12868                         ret = fill_csum_tree(trans, info->csum_root,
12869                                              init_extent_tree);
12870                         err |= !!ret;
12871                         if (ret) {
12872                                 error("checksum tree refilling failed: %d", ret);
12873                                 return -EIO;
12874                         }
12875                 }
12876                 /*
12877                  * Ok now we commit and run the normal fsck, which will add
12878                  * extent entries for all of the items it finds.
12879                  */
12880                 ret = btrfs_commit_transaction(trans, info->extent_root);
12881                 err |= !!ret;
12882                 if (ret)
12883                         goto close_out;
12884         }
12885         if (!extent_buffer_uptodate(info->extent_root->node)) {
12886                 error("critical: extent_root, unable to check the filesystem");
12887                 ret = -EIO;
12888                 err |= !!ret;
12889                 goto close_out;
12890         }
12891         if (!extent_buffer_uptodate(info->csum_root->node)) {
12892                 error("critical: csum_root, unable to check the filesystem");
12893                 ret = -EIO;
12894                 err |= !!ret;
12895                 goto close_out;
12896         }
12897
12898         if (!ctx.progress_enabled)
12899                 fprintf(stderr, "checking extents\n");
12900         if (check_mode == CHECK_MODE_LOWMEM)
12901                 ret = check_chunks_and_extents_v2(root);
12902         else
12903                 ret = check_chunks_and_extents(root);
12904         err |= !!ret;
12905         if (ret)
12906                 error(
12907                 "errors found in extent allocation tree or chunk allocation");
12908
12909         ret = repair_root_items(info);
12910         err |= !!ret;
12911         if (ret < 0)
12912                 goto close_out;
12913         if (repair) {
12914                 fprintf(stderr, "Fixed %d roots.\n", ret);
12915                 ret = 0;
12916         } else if (ret > 0) {
12917                 fprintf(stderr,
12918                        "Found %d roots with an outdated root item.\n",
12919                        ret);
12920                 fprintf(stderr,
12921                         "Please run a filesystem check with the option --repair to fix them.\n");
12922                 ret = 1;
12923                 err |= !!ret;
12924                 goto close_out;
12925         }
12926
12927         if (!ctx.progress_enabled) {
12928                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
12929                         fprintf(stderr, "checking free space tree\n");
12930                 else
12931                         fprintf(stderr, "checking free space cache\n");
12932         }
12933         ret = check_space_cache(root);
12934         err |= !!ret;
12935         if (ret)
12936                 goto out;
12937
12938         /*
12939          * We used to have to have these hole extents in between our real
12940          * extents so if we don't have this flag set we need to make sure there
12941          * are no gaps in the file extents for inodes, otherwise we can just
12942          * ignore it when this happens.
12943          */
12944         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
12945         if (!ctx.progress_enabled)
12946                 fprintf(stderr, "checking fs roots\n");
12947         if (check_mode == CHECK_MODE_LOWMEM)
12948                 ret = check_fs_roots_v2(root->fs_info);
12949         else
12950                 ret = check_fs_roots(root, &root_cache);
12951         err |= !!ret;
12952         if (ret)
12953                 goto out;
12954
12955         fprintf(stderr, "checking csums\n");
12956         ret = check_csums(root);
12957         err |= !!ret;
12958         if (ret)
12959                 goto out;
12960
12961         fprintf(stderr, "checking root refs\n");
12962         /* For low memory mode, check_fs_roots_v2 handles root refs */
12963         if (check_mode != CHECK_MODE_LOWMEM) {
12964                 ret = check_root_refs(root, &root_cache);
12965                 err |= !!ret;
12966                 if (ret)
12967                         goto out;
12968         }
12969
12970         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
12971                 struct extent_buffer *eb;
12972
12973                 eb = list_first_entry(&root->fs_info->recow_ebs,
12974                                       struct extent_buffer, recow);
12975                 list_del_init(&eb->recow);
12976                 ret = recow_extent_buffer(root, eb);
12977                 err |= !!ret;
12978                 if (ret)
12979                         break;
12980         }
12981
12982         while (!list_empty(&delete_items)) {
12983                 struct bad_item *bad;
12984
12985                 bad = list_first_entry(&delete_items, struct bad_item, list);
12986                 list_del_init(&bad->list);
12987                 if (repair) {
12988                         ret = delete_bad_item(root, bad);
12989                         err |= !!ret;
12990                 }
12991                 free(bad);
12992         }
12993
12994         if (info->quota_enabled) {
12995                 fprintf(stderr, "checking quota groups\n");
12996                 ret = qgroup_verify_all(info);
12997                 err |= !!ret;
12998                 if (ret)
12999                         goto out;
13000                 report_qgroups(0);
13001                 ret = repair_qgroups(info, &qgroups_repaired);
13002                 err |= !!ret;
13003                 if (err)
13004                         goto out;
13005                 ret = 0;
13006         }
13007
13008         if (!list_empty(&root->fs_info->recow_ebs)) {
13009                 error("transid errors in file system");
13010                 ret = 1;
13011                 err |= !!ret;
13012         }
13013 out:
13014         if (found_old_backref) { /*
13015                  * there was a disk format change when mixed
13016                  * backref was in testing tree. The old format
13017                  * existed about one week.
13018                  */
13019                 printf("\n * Found old mixed backref format. "
13020                        "The old format is not supported! *"
13021                        "\n * Please mount the FS in readonly mode, "
13022                        "backup data and re-format the FS. *\n\n");
13023                 err |= 1;
13024         }
13025         printf("found %llu bytes used err is %d\n",
13026                (unsigned long long)bytes_used, ret);
13027         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13028         printf("total tree bytes: %llu\n",
13029                (unsigned long long)total_btree_bytes);
13030         printf("total fs tree bytes: %llu\n",
13031                (unsigned long long)total_fs_tree_bytes);
13032         printf("total extent tree bytes: %llu\n",
13033                (unsigned long long)total_extent_tree_bytes);
13034         printf("btree space waste bytes: %llu\n",
13035                (unsigned long long)btree_space_waste);
13036         printf("file data blocks allocated: %llu\n referenced %llu\n",
13037                 (unsigned long long)data_bytes_allocated,
13038                 (unsigned long long)data_bytes_referenced);
13039
13040         free_qgroup_counts();
13041         free_root_recs_tree(&root_cache);
13042 close_out:
13043         close_ctree(root);
13044 err_out:
13045         if (ctx.progress_enabled)
13046                 task_deinit(ctx.info);
13047
13048         return err;
13049 }