204bd4c501bf69490319fbb255e1e9de74db84a6
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         int level_size;
230         struct btrfs_key drop_key;
231 };
232
233 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
235 #define REF_ERR_NO_INODE_REF            (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
238 #define REF_ERR_DUP_INODE_REF           (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF             (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
246
247 struct file_extent_hole {
248         struct rb_node node;
249         u64 start;
250         u64 len;
251 };
252
253 struct inode_record {
254         struct list_head backrefs;
255         unsigned int checked:1;
256         unsigned int merging:1;
257         unsigned int found_inode_item:1;
258         unsigned int found_dir_item:1;
259         unsigned int found_file_extent:1;
260         unsigned int found_csum_item:1;
261         unsigned int some_csum_missing:1;
262         unsigned int nodatasum:1;
263         int errors;
264
265         u64 ino;
266         u32 nlink;
267         u32 imode;
268         u64 isize;
269         u64 nbytes;
270
271         u32 found_link;
272         u64 found_size;
273         u64 extent_start;
274         u64 extent_end;
275         struct rb_root holes;
276         struct list_head orphan_extents;
277
278         u32 refs;
279 };
280
281 #define I_ERR_NO_INODE_ITEM             (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
296
297 struct root_backref {
298         struct list_head list;
299         unsigned int found_dir_item:1;
300         unsigned int found_dir_index:1;
301         unsigned int found_back_ref:1;
302         unsigned int found_forward_ref:1;
303         unsigned int reachable:1;
304         int errors;
305         u64 ref_root;
306         u64 dir;
307         u64 index;
308         u16 namelen;
309         char name[0];
310 };
311
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 {
314         return list_entry(entry, struct root_backref, list);
315 }
316
317 struct root_record {
318         struct list_head backrefs;
319         struct cache_extent cache;
320         unsigned int found_root_item:1;
321         u64 objectid;
322         u32 found_ref;
323 };
324
325 struct ptr_node {
326         struct cache_extent cache;
327         void *data;
328 };
329
330 struct shared_node {
331         struct cache_extent cache;
332         struct cache_tree root_cache;
333         struct cache_tree inode_cache;
334         struct inode_record *current;
335         u32 refs;
336 };
337
338 struct block_info {
339         u64 start;
340         u32 size;
341 };
342
343 struct walk_control {
344         struct cache_tree shared;
345         struct shared_node *nodes[BTRFS_MAX_LEVEL];
346         int active_node;
347         int root_level;
348 };
349
350 struct bad_item {
351         struct btrfs_key key;
352         u64 root_id;
353         struct list_head list;
354 };
355
356 struct extent_entry {
357         u64 bytenr;
358         u64 bytes;
359         int count;
360         int broken;
361         struct list_head list;
362 };
363
364 struct root_item_info {
365         /* level of the root */
366         u8 level;
367         /* number of nodes at this level, must be 1 for a root */
368         int node_count;
369         u64 bytenr;
370         u64 gen;
371         struct cache_extent cache_extent;
372 };
373
374 /*
375  * Error bit for low memory mode check.
376  *
377  * Currently no caller cares about it yet.  Just internal use for error
378  * classification.
379  */
380 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH     (1 << 8)
390
391 static void *print_status_check(void *p)
392 {
393         struct task_ctx *priv = p;
394         const char work_indicator[] = { '.', 'o', 'O', 'o' };
395         uint32_t count = 0;
396         static char *task_position_string[] = {
397                 "checking extents",
398                 "checking free space cache",
399                 "checking fs roots",
400         };
401
402         task_period_start(priv->info, 1000 /* 1s */);
403
404         if (priv->tp == TASK_NOTHING)
405                 return NULL;
406
407         while (1) {
408                 printf("%s [%c]\r", task_position_string[priv->tp],
409                                 work_indicator[count % 4]);
410                 count++;
411                 fflush(stdout);
412                 task_period_wait(priv->info);
413         }
414         return NULL;
415 }
416
417 static int print_status_return(void *p)
418 {
419         printf("\n");
420         fflush(stdout);
421
422         return 0;
423 }
424
425 static enum btrfs_check_mode parse_check_mode(const char *str)
426 {
427         if (strcmp(str, "lowmem") == 0)
428                 return CHECK_MODE_LOWMEM;
429         if (strcmp(str, "orig") == 0)
430                 return CHECK_MODE_ORIGINAL;
431         if (strcmp(str, "original") == 0)
432                 return CHECK_MODE_ORIGINAL;
433
434         return CHECK_MODE_UNKNOWN;
435 }
436
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
439 {
440         struct file_extent_hole *hole;
441
442         if (RB_EMPTY_ROOT(holes))
443                 return (u64)-1;
444
445         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446         return hole->start;
447 }
448
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 {
451         struct file_extent_hole *hole1;
452         struct file_extent_hole *hole2;
453
454         hole1 = rb_entry(node1, struct file_extent_hole, node);
455         hole2 = rb_entry(node2, struct file_extent_hole, node);
456
457         if (hole1->start > hole2->start)
458                 return -1;
459         if (hole1->start < hole2->start)
460                 return 1;
461         /* Now hole1->start == hole2->start */
462         if (hole1->len >= hole2->len)
463                 /*
464                  * Hole 1 will be merge center
465                  * Same hole will be merged later
466                  */
467                 return -1;
468         /* Hole 2 will be merge center */
469         return 1;
470 }
471
472 /*
473  * Add a hole to the record
474  *
475  * This will do hole merge for copy_file_extent_holes(),
476  * which will ensure there won't be continuous holes.
477  */
478 static int add_file_extent_hole(struct rb_root *holes,
479                                 u64 start, u64 len)
480 {
481         struct file_extent_hole *hole;
482         struct file_extent_hole *prev = NULL;
483         struct file_extent_hole *next = NULL;
484
485         hole = malloc(sizeof(*hole));
486         if (!hole)
487                 return -ENOMEM;
488         hole->start = start;
489         hole->len = len;
490         /* Since compare will not return 0, no -EEXIST will happen */
491         rb_insert(holes, &hole->node, compare_hole);
492
493         /* simple merge with previous hole */
494         if (rb_prev(&hole->node))
495                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496                                 node);
497         if (prev && prev->start + prev->len >= hole->start) {
498                 hole->len = hole->start + hole->len - prev->start;
499                 hole->start = prev->start;
500                 rb_erase(&prev->node, holes);
501                 free(prev);
502                 prev = NULL;
503         }
504
505         /* iterate merge with next holes */
506         while (1) {
507                 if (!rb_next(&hole->node))
508                         break;
509                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510                                         node);
511                 if (hole->start + hole->len >= next->start) {
512                         if (hole->start + hole->len <= next->start + next->len)
513                                 hole->len = next->start + next->len -
514                                             hole->start;
515                         rb_erase(&next->node, holes);
516                         free(next);
517                         next = NULL;
518                 } else
519                         break;
520         }
521         return 0;
522 }
523
524 static int compare_hole_range(struct rb_node *node, void *data)
525 {
526         struct file_extent_hole *hole;
527         u64 start;
528
529         hole = (struct file_extent_hole *)data;
530         start = hole->start;
531
532         hole = rb_entry(node, struct file_extent_hole, node);
533         if (start < hole->start)
534                 return -1;
535         if (start >= hole->start && start < hole->start + hole->len)
536                 return 0;
537         return 1;
538 }
539
540 /*
541  * Delete a hole in the record
542  *
543  * This will do the hole split and is much restrict than add.
544  */
545 static int del_file_extent_hole(struct rb_root *holes,
546                                 u64 start, u64 len)
547 {
548         struct file_extent_hole *hole;
549         struct file_extent_hole tmp;
550         u64 prev_start = 0;
551         u64 prev_len = 0;
552         u64 next_start = 0;
553         u64 next_len = 0;
554         struct rb_node *node;
555         int have_prev = 0;
556         int have_next = 0;
557         int ret = 0;
558
559         tmp.start = start;
560         tmp.len = len;
561         node = rb_search(holes, &tmp, compare_hole_range, NULL);
562         if (!node)
563                 return -EEXIST;
564         hole = rb_entry(node, struct file_extent_hole, node);
565         if (start + len > hole->start + hole->len)
566                 return -EEXIST;
567
568         /*
569          * Now there will be no overlap, delete the hole and re-add the
570          * split(s) if they exists.
571          */
572         if (start > hole->start) {
573                 prev_start = hole->start;
574                 prev_len = start - hole->start;
575                 have_prev = 1;
576         }
577         if (hole->start + hole->len > start + len) {
578                 next_start = start + len;
579                 next_len = hole->start + hole->len - start - len;
580                 have_next = 1;
581         }
582         rb_erase(node, holes);
583         free(hole);
584         if (have_prev) {
585                 ret = add_file_extent_hole(holes, prev_start, prev_len);
586                 if (ret < 0)
587                         return ret;
588         }
589         if (have_next) {
590                 ret = add_file_extent_hole(holes, next_start, next_len);
591                 if (ret < 0)
592                         return ret;
593         }
594         return 0;
595 }
596
597 static int copy_file_extent_holes(struct rb_root *dst,
598                                   struct rb_root *src)
599 {
600         struct file_extent_hole *hole;
601         struct rb_node *node;
602         int ret = 0;
603
604         node = rb_first(src);
605         while (node) {
606                 hole = rb_entry(node, struct file_extent_hole, node);
607                 ret = add_file_extent_hole(dst, hole->start, hole->len);
608                 if (ret)
609                         break;
610                 node = rb_next(node);
611         }
612         return ret;
613 }
614
615 static void free_file_extent_holes(struct rb_root *holes)
616 {
617         struct rb_node *node;
618         struct file_extent_hole *hole;
619
620         node = rb_first(holes);
621         while (node) {
622                 hole = rb_entry(node, struct file_extent_hole, node);
623                 rb_erase(node, holes);
624                 free(hole);
625                 node = rb_first(holes);
626         }
627 }
628
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632                                  struct btrfs_root *root)
633 {
634         if (root->last_trans != trans->transid) {
635                 root->track_dirty = 1;
636                 root->last_trans = trans->transid;
637                 root->commit_root = root->node;
638                 extent_buffer_get(root->node);
639         }
640 }
641
642 static u8 imode_to_type(u32 imode)
643 {
644 #define S_SHIFT 12
645         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
647                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
648                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
649                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
650                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
651                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
652                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
653         };
654
655         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
657 }
658
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 {
661         struct device_record *rec1;
662         struct device_record *rec2;
663
664         rec1 = rb_entry(node1, struct device_record, node);
665         rec2 = rb_entry(node2, struct device_record, node);
666         if (rec1->devid > rec2->devid)
667                 return -1;
668         else if (rec1->devid < rec2->devid)
669                 return 1;
670         else
671                 return 0;
672 }
673
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 {
676         struct inode_record *rec;
677         struct inode_backref *backref;
678         struct inode_backref *orig;
679         struct inode_backref *tmp;
680         struct orphan_data_extent *src_orphan;
681         struct orphan_data_extent *dst_orphan;
682         struct rb_node *rb;
683         size_t size;
684         int ret;
685
686         rec = malloc(sizeof(*rec));
687         if (!rec)
688                 return ERR_PTR(-ENOMEM);
689         memcpy(rec, orig_rec, sizeof(*rec));
690         rec->refs = 1;
691         INIT_LIST_HEAD(&rec->backrefs);
692         INIT_LIST_HEAD(&rec->orphan_extents);
693         rec->holes = RB_ROOT;
694
695         list_for_each_entry(orig, &orig_rec->backrefs, list) {
696                 size = sizeof(*orig) + orig->namelen + 1;
697                 backref = malloc(size);
698                 if (!backref) {
699                         ret = -ENOMEM;
700                         goto cleanup;
701                 }
702                 memcpy(backref, orig, size);
703                 list_add_tail(&backref->list, &rec->backrefs);
704         }
705         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706                 dst_orphan = malloc(sizeof(*dst_orphan));
707                 if (!dst_orphan) {
708                         ret = -ENOMEM;
709                         goto cleanup;
710                 }
711                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713         }
714         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715         if (ret < 0)
716                 goto cleanup_rb;
717
718         return rec;
719
720 cleanup_rb:
721         rb = rb_first(&rec->holes);
722         while (rb) {
723                 struct file_extent_hole *hole;
724
725                 hole = rb_entry(rb, struct file_extent_hole, node);
726                 rb = rb_next(rb);
727                 free(hole);
728         }
729
730 cleanup:
731         if (!list_empty(&rec->backrefs))
732                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733                         list_del(&orig->list);
734                         free(orig);
735                 }
736
737         if (!list_empty(&rec->orphan_extents))
738                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739                         list_del(&orig->list);
740                         free(orig);
741                 }
742
743         free(rec);
744
745         return ERR_PTR(ret);
746 }
747
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749                                       u64 objectid)
750 {
751         struct orphan_data_extent *orphan;
752
753         if (list_empty(orphan_extents))
754                 return;
755         printf("The following data extent is lost in tree %llu:\n",
756                objectid);
757         list_for_each_entry(orphan, orphan_extents, list) {
758                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
760                        orphan->disk_len);
761         }
762 }
763
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 {
766         u64 root_objectid = root->root_key.objectid;
767         int errors = rec->errors;
768
769         if (!errors)
770                 return;
771         /* reloc root errors, we print its corresponding fs root objectid*/
772         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773                 root_objectid = root->root_key.offset;
774                 fprintf(stderr, "reloc");
775         }
776         fprintf(stderr, "root %llu inode %llu errors %x",
777                 (unsigned long long) root_objectid,
778                 (unsigned long long) rec->ino, rec->errors);
779
780         if (errors & I_ERR_NO_INODE_ITEM)
781                 fprintf(stderr, ", no inode item");
782         if (errors & I_ERR_NO_ORPHAN_ITEM)
783                 fprintf(stderr, ", no orphan item");
784         if (errors & I_ERR_DUP_INODE_ITEM)
785                 fprintf(stderr, ", dup inode item");
786         if (errors & I_ERR_DUP_DIR_INDEX)
787                 fprintf(stderr, ", dup dir index");
788         if (errors & I_ERR_ODD_DIR_ITEM)
789                 fprintf(stderr, ", odd dir item");
790         if (errors & I_ERR_ODD_FILE_EXTENT)
791                 fprintf(stderr, ", odd file extent");
792         if (errors & I_ERR_BAD_FILE_EXTENT)
793                 fprintf(stderr, ", bad file extent");
794         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795                 fprintf(stderr, ", file extent overlap");
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797                 fprintf(stderr, ", file extent discount");
798         if (errors & I_ERR_DIR_ISIZE_WRONG)
799                 fprintf(stderr, ", dir isize wrong");
800         if (errors & I_ERR_FILE_NBYTES_WRONG)
801                 fprintf(stderr, ", nbytes wrong");
802         if (errors & I_ERR_ODD_CSUM_ITEM)
803                 fprintf(stderr, ", odd csum item");
804         if (errors & I_ERR_SOME_CSUM_MISSING)
805                 fprintf(stderr, ", some csum missing");
806         if (errors & I_ERR_LINK_COUNT_WRONG)
807                 fprintf(stderr, ", link count wrong");
808         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809                 fprintf(stderr, ", orphan file extent");
810         fprintf(stderr, "\n");
811         /* Print the orphan extents if needed */
812         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814
815         /* Print the holes if needed */
816         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817                 struct file_extent_hole *hole;
818                 struct rb_node *node;
819                 int found = 0;
820
821                 node = rb_first(&rec->holes);
822                 fprintf(stderr, "Found file extent holes:\n");
823                 while (node) {
824                         found = 1;
825                         hole = rb_entry(node, struct file_extent_hole, node);
826                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
827                                 hole->start, hole->len);
828                         node = rb_next(node);
829                 }
830                 if (!found)
831                         fprintf(stderr, "\tstart: 0, len: %llu\n",
832                                 round_up(rec->isize,
833                                          root->fs_info->sectorsize));
834         }
835 }
836
837 static void print_ref_error(int errors)
838 {
839         if (errors & REF_ERR_NO_DIR_ITEM)
840                 fprintf(stderr, ", no dir item");
841         if (errors & REF_ERR_NO_DIR_INDEX)
842                 fprintf(stderr, ", no dir index");
843         if (errors & REF_ERR_NO_INODE_REF)
844                 fprintf(stderr, ", no inode ref");
845         if (errors & REF_ERR_DUP_DIR_ITEM)
846                 fprintf(stderr, ", dup dir item");
847         if (errors & REF_ERR_DUP_DIR_INDEX)
848                 fprintf(stderr, ", dup dir index");
849         if (errors & REF_ERR_DUP_INODE_REF)
850                 fprintf(stderr, ", dup inode ref");
851         if (errors & REF_ERR_INDEX_UNMATCH)
852                 fprintf(stderr, ", index mismatch");
853         if (errors & REF_ERR_FILETYPE_UNMATCH)
854                 fprintf(stderr, ", filetype mismatch");
855         if (errors & REF_ERR_NAME_TOO_LONG)
856                 fprintf(stderr, ", name too long");
857         if (errors & REF_ERR_NO_ROOT_REF)
858                 fprintf(stderr, ", no root ref");
859         if (errors & REF_ERR_NO_ROOT_BACKREF)
860                 fprintf(stderr, ", no root backref");
861         if (errors & REF_ERR_DUP_ROOT_REF)
862                 fprintf(stderr, ", dup root ref");
863         if (errors & REF_ERR_DUP_ROOT_BACKREF)
864                 fprintf(stderr, ", dup root backref");
865         fprintf(stderr, "\n");
866 }
867
868 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
869                                           u64 ino, int mod)
870 {
871         struct ptr_node *node;
872         struct cache_extent *cache;
873         struct inode_record *rec = NULL;
874         int ret;
875
876         cache = lookup_cache_extent(inode_cache, ino, 1);
877         if (cache) {
878                 node = container_of(cache, struct ptr_node, cache);
879                 rec = node->data;
880                 if (mod && rec->refs > 1) {
881                         node->data = clone_inode_rec(rec);
882                         if (IS_ERR(node->data))
883                                 return node->data;
884                         rec->refs--;
885                         rec = node->data;
886                 }
887         } else if (mod) {
888                 rec = calloc(1, sizeof(*rec));
889                 if (!rec)
890                         return ERR_PTR(-ENOMEM);
891                 rec->ino = ino;
892                 rec->extent_start = (u64)-1;
893                 rec->refs = 1;
894                 INIT_LIST_HEAD(&rec->backrefs);
895                 INIT_LIST_HEAD(&rec->orphan_extents);
896                 rec->holes = RB_ROOT;
897
898                 node = malloc(sizeof(*node));
899                 if (!node) {
900                         free(rec);
901                         return ERR_PTR(-ENOMEM);
902                 }
903                 node->cache.start = ino;
904                 node->cache.size = 1;
905                 node->data = rec;
906
907                 if (ino == BTRFS_FREE_INO_OBJECTID)
908                         rec->found_link = 1;
909
910                 ret = insert_cache_extent(inode_cache, &node->cache);
911                 if (ret)
912                         return ERR_PTR(-EEXIST);
913         }
914         return rec;
915 }
916
917 static void free_orphan_data_extents(struct list_head *orphan_extents)
918 {
919         struct orphan_data_extent *orphan;
920
921         while (!list_empty(orphan_extents)) {
922                 orphan = list_entry(orphan_extents->next,
923                                     struct orphan_data_extent, list);
924                 list_del(&orphan->list);
925                 free(orphan);
926         }
927 }
928
929 static void free_inode_rec(struct inode_record *rec)
930 {
931         struct inode_backref *backref;
932
933         if (--rec->refs > 0)
934                 return;
935
936         while (!list_empty(&rec->backrefs)) {
937                 backref = to_inode_backref(rec->backrefs.next);
938                 list_del(&backref->list);
939                 free(backref);
940         }
941         free_orphan_data_extents(&rec->orphan_extents);
942         free_file_extent_holes(&rec->holes);
943         free(rec);
944 }
945
946 static int can_free_inode_rec(struct inode_record *rec)
947 {
948         if (!rec->errors && rec->checked && rec->found_inode_item &&
949             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
950                 return 1;
951         return 0;
952 }
953
954 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
955                                  struct inode_record *rec)
956 {
957         struct cache_extent *cache;
958         struct inode_backref *tmp, *backref;
959         struct ptr_node *node;
960         u8 filetype;
961
962         if (!rec->found_inode_item)
963                 return;
964
965         filetype = imode_to_type(rec->imode);
966         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
967                 if (backref->found_dir_item && backref->found_dir_index) {
968                         if (backref->filetype != filetype)
969                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
970                         if (!backref->errors && backref->found_inode_ref &&
971                             rec->nlink == rec->found_link) {
972                                 list_del(&backref->list);
973                                 free(backref);
974                         }
975                 }
976         }
977
978         if (!rec->checked || rec->merging)
979                 return;
980
981         if (S_ISDIR(rec->imode)) {
982                 if (rec->found_size != rec->isize)
983                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
984                 if (rec->found_file_extent)
985                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
986         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
987                 if (rec->found_dir_item)
988                         rec->errors |= I_ERR_ODD_DIR_ITEM;
989                 if (rec->found_size != rec->nbytes)
990                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
991                 if (rec->nlink > 0 && !no_holes &&
992                     (rec->extent_end < rec->isize ||
993                      first_extent_gap(&rec->holes) < rec->isize))
994                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
995         }
996
997         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
998                 if (rec->found_csum_item && rec->nodatasum)
999                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1000                 if (rec->some_csum_missing && !rec->nodatasum)
1001                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1002         }
1003
1004         BUG_ON(rec->refs != 1);
1005         if (can_free_inode_rec(rec)) {
1006                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1007                 node = container_of(cache, struct ptr_node, cache);
1008                 BUG_ON(node->data != rec);
1009                 remove_cache_extent(inode_cache, &node->cache);
1010                 free(node);
1011                 free_inode_rec(rec);
1012         }
1013 }
1014
1015 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1016 {
1017         struct btrfs_path path;
1018         struct btrfs_key key;
1019         int ret;
1020
1021         key.objectid = BTRFS_ORPHAN_OBJECTID;
1022         key.type = BTRFS_ORPHAN_ITEM_KEY;
1023         key.offset = ino;
1024
1025         btrfs_init_path(&path);
1026         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1027         btrfs_release_path(&path);
1028         if (ret > 0)
1029                 ret = -ENOENT;
1030         return ret;
1031 }
1032
1033 static int process_inode_item(struct extent_buffer *eb,
1034                               int slot, struct btrfs_key *key,
1035                               struct shared_node *active_node)
1036 {
1037         struct inode_record *rec;
1038         struct btrfs_inode_item *item;
1039
1040         rec = active_node->current;
1041         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1042         if (rec->found_inode_item) {
1043                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1044                 return 1;
1045         }
1046         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1047         rec->nlink = btrfs_inode_nlink(eb, item);
1048         rec->isize = btrfs_inode_size(eb, item);
1049         rec->nbytes = btrfs_inode_nbytes(eb, item);
1050         rec->imode = btrfs_inode_mode(eb, item);
1051         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1052                 rec->nodatasum = 1;
1053         rec->found_inode_item = 1;
1054         if (rec->nlink == 0)
1055                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1056         maybe_free_inode_rec(&active_node->inode_cache, rec);
1057         return 0;
1058 }
1059
1060 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1061                                                 const char *name,
1062                                                 int namelen, u64 dir)
1063 {
1064         struct inode_backref *backref;
1065
1066         list_for_each_entry(backref, &rec->backrefs, list) {
1067                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1068                         break;
1069                 if (backref->dir != dir || backref->namelen != namelen)
1070                         continue;
1071                 if (memcmp(name, backref->name, namelen))
1072                         continue;
1073                 return backref;
1074         }
1075
1076         backref = malloc(sizeof(*backref) + namelen + 1);
1077         if (!backref)
1078                 return NULL;
1079         memset(backref, 0, sizeof(*backref));
1080         backref->dir = dir;
1081         backref->namelen = namelen;
1082         memcpy(backref->name, name, namelen);
1083         backref->name[namelen] = '\0';
1084         list_add_tail(&backref->list, &rec->backrefs);
1085         return backref;
1086 }
1087
1088 static int add_inode_backref(struct cache_tree *inode_cache,
1089                              u64 ino, u64 dir, u64 index,
1090                              const char *name, int namelen,
1091                              u8 filetype, u8 itemtype, int errors)
1092 {
1093         struct inode_record *rec;
1094         struct inode_backref *backref;
1095
1096         rec = get_inode_rec(inode_cache, ino, 1);
1097         BUG_ON(IS_ERR(rec));
1098         backref = get_inode_backref(rec, name, namelen, dir);
1099         BUG_ON(!backref);
1100         if (errors)
1101                 backref->errors |= errors;
1102         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1103                 if (backref->found_dir_index)
1104                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1105                 if (backref->found_inode_ref && backref->index != index)
1106                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1107                 if (backref->found_dir_item && backref->filetype != filetype)
1108                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1109
1110                 backref->index = index;
1111                 backref->filetype = filetype;
1112                 backref->found_dir_index = 1;
1113         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1114                 rec->found_link++;
1115                 if (backref->found_dir_item)
1116                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1117                 if (backref->found_dir_index && backref->filetype != filetype)
1118                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1119
1120                 backref->filetype = filetype;
1121                 backref->found_dir_item = 1;
1122         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1123                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1124                 if (backref->found_inode_ref)
1125                         backref->errors |= REF_ERR_DUP_INODE_REF;
1126                 if (backref->found_dir_index && backref->index != index)
1127                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1128                 else
1129                         backref->index = index;
1130
1131                 backref->ref_type = itemtype;
1132                 backref->found_inode_ref = 1;
1133         } else {
1134                 BUG_ON(1);
1135         }
1136
1137         maybe_free_inode_rec(inode_cache, rec);
1138         return 0;
1139 }
1140
1141 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1142                             struct cache_tree *dst_cache)
1143 {
1144         struct inode_backref *backref;
1145         u32 dir_count = 0;
1146         int ret = 0;
1147
1148         dst->merging = 1;
1149         list_for_each_entry(backref, &src->backrefs, list) {
1150                 if (backref->found_dir_index) {
1151                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1152                                         backref->index, backref->name,
1153                                         backref->namelen, backref->filetype,
1154                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1155                 }
1156                 if (backref->found_dir_item) {
1157                         dir_count++;
1158                         add_inode_backref(dst_cache, dst->ino,
1159                                         backref->dir, 0, backref->name,
1160                                         backref->namelen, backref->filetype,
1161                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1162                 }
1163                 if (backref->found_inode_ref) {
1164                         add_inode_backref(dst_cache, dst->ino,
1165                                         backref->dir, backref->index,
1166                                         backref->name, backref->namelen, 0,
1167                                         backref->ref_type, backref->errors);
1168                 }
1169         }
1170
1171         if (src->found_dir_item)
1172                 dst->found_dir_item = 1;
1173         if (src->found_file_extent)
1174                 dst->found_file_extent = 1;
1175         if (src->found_csum_item)
1176                 dst->found_csum_item = 1;
1177         if (src->some_csum_missing)
1178                 dst->some_csum_missing = 1;
1179         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1180                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1181                 if (ret < 0)
1182                         return ret;
1183         }
1184
1185         BUG_ON(src->found_link < dir_count);
1186         dst->found_link += src->found_link - dir_count;
1187         dst->found_size += src->found_size;
1188         if (src->extent_start != (u64)-1) {
1189                 if (dst->extent_start == (u64)-1) {
1190                         dst->extent_start = src->extent_start;
1191                         dst->extent_end = src->extent_end;
1192                 } else {
1193                         if (dst->extent_end > src->extent_start)
1194                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1195                         else if (dst->extent_end < src->extent_start) {
1196                                 ret = add_file_extent_hole(&dst->holes,
1197                                         dst->extent_end,
1198                                         src->extent_start - dst->extent_end);
1199                         }
1200                         if (dst->extent_end < src->extent_end)
1201                                 dst->extent_end = src->extent_end;
1202                 }
1203         }
1204
1205         dst->errors |= src->errors;
1206         if (src->found_inode_item) {
1207                 if (!dst->found_inode_item) {
1208                         dst->nlink = src->nlink;
1209                         dst->isize = src->isize;
1210                         dst->nbytes = src->nbytes;
1211                         dst->imode = src->imode;
1212                         dst->nodatasum = src->nodatasum;
1213                         dst->found_inode_item = 1;
1214                 } else {
1215                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1216                 }
1217         }
1218         dst->merging = 0;
1219
1220         return 0;
1221 }
1222
1223 static int splice_shared_node(struct shared_node *src_node,
1224                               struct shared_node *dst_node)
1225 {
1226         struct cache_extent *cache;
1227         struct ptr_node *node, *ins;
1228         struct cache_tree *src, *dst;
1229         struct inode_record *rec, *conflict;
1230         u64 current_ino = 0;
1231         int splice = 0;
1232         int ret;
1233
1234         if (--src_node->refs == 0)
1235                 splice = 1;
1236         if (src_node->current)
1237                 current_ino = src_node->current->ino;
1238
1239         src = &src_node->root_cache;
1240         dst = &dst_node->root_cache;
1241 again:
1242         cache = search_cache_extent(src, 0);
1243         while (cache) {
1244                 node = container_of(cache, struct ptr_node, cache);
1245                 rec = node->data;
1246                 cache = next_cache_extent(cache);
1247
1248                 if (splice) {
1249                         remove_cache_extent(src, &node->cache);
1250                         ins = node;
1251                 } else {
1252                         ins = malloc(sizeof(*ins));
1253                         BUG_ON(!ins);
1254                         ins->cache.start = node->cache.start;
1255                         ins->cache.size = node->cache.size;
1256                         ins->data = rec;
1257                         rec->refs++;
1258                 }
1259                 ret = insert_cache_extent(dst, &ins->cache);
1260                 if (ret == -EEXIST) {
1261                         conflict = get_inode_rec(dst, rec->ino, 1);
1262                         BUG_ON(IS_ERR(conflict));
1263                         merge_inode_recs(rec, conflict, dst);
1264                         if (rec->checked) {
1265                                 conflict->checked = 1;
1266                                 if (dst_node->current == conflict)
1267                                         dst_node->current = NULL;
1268                         }
1269                         maybe_free_inode_rec(dst, conflict);
1270                         free_inode_rec(rec);
1271                         free(ins);
1272                 } else {
1273                         BUG_ON(ret);
1274                 }
1275         }
1276
1277         if (src == &src_node->root_cache) {
1278                 src = &src_node->inode_cache;
1279                 dst = &dst_node->inode_cache;
1280                 goto again;
1281         }
1282
1283         if (current_ino > 0 && (!dst_node->current ||
1284             current_ino > dst_node->current->ino)) {
1285                 if (dst_node->current) {
1286                         dst_node->current->checked = 1;
1287                         maybe_free_inode_rec(dst, dst_node->current);
1288                 }
1289                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1290                 BUG_ON(IS_ERR(dst_node->current));
1291         }
1292         return 0;
1293 }
1294
1295 static void free_inode_ptr(struct cache_extent *cache)
1296 {
1297         struct ptr_node *node;
1298         struct inode_record *rec;
1299
1300         node = container_of(cache, struct ptr_node, cache);
1301         rec = node->data;
1302         free_inode_rec(rec);
1303         free(node);
1304 }
1305
1306 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1307
1308 static struct shared_node *find_shared_node(struct cache_tree *shared,
1309                                             u64 bytenr)
1310 {
1311         struct cache_extent *cache;
1312         struct shared_node *node;
1313
1314         cache = lookup_cache_extent(shared, bytenr, 1);
1315         if (cache) {
1316                 node = container_of(cache, struct shared_node, cache);
1317                 return node;
1318         }
1319         return NULL;
1320 }
1321
1322 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1323 {
1324         int ret;
1325         struct shared_node *node;
1326
1327         node = calloc(1, sizeof(*node));
1328         if (!node)
1329                 return -ENOMEM;
1330         node->cache.start = bytenr;
1331         node->cache.size = 1;
1332         cache_tree_init(&node->root_cache);
1333         cache_tree_init(&node->inode_cache);
1334         node->refs = refs;
1335
1336         ret = insert_cache_extent(shared, &node->cache);
1337
1338         return ret;
1339 }
1340
1341 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1342                              struct walk_control *wc, int level)
1343 {
1344         struct shared_node *node;
1345         struct shared_node *dest;
1346         int ret;
1347
1348         if (level == wc->active_node)
1349                 return 0;
1350
1351         BUG_ON(wc->active_node <= level);
1352         node = find_shared_node(&wc->shared, bytenr);
1353         if (!node) {
1354                 ret = add_shared_node(&wc->shared, bytenr, refs);
1355                 BUG_ON(ret);
1356                 node = find_shared_node(&wc->shared, bytenr);
1357                 wc->nodes[level] = node;
1358                 wc->active_node = level;
1359                 return 0;
1360         }
1361
1362         if (wc->root_level == wc->active_node &&
1363             btrfs_root_refs(&root->root_item) == 0) {
1364                 if (--node->refs == 0) {
1365                         free_inode_recs_tree(&node->root_cache);
1366                         free_inode_recs_tree(&node->inode_cache);
1367                         remove_cache_extent(&wc->shared, &node->cache);
1368                         free(node);
1369                 }
1370                 return 1;
1371         }
1372
1373         dest = wc->nodes[wc->active_node];
1374         splice_shared_node(node, dest);
1375         if (node->refs == 0) {
1376                 remove_cache_extent(&wc->shared, &node->cache);
1377                 free(node);
1378         }
1379         return 1;
1380 }
1381
1382 static int leave_shared_node(struct btrfs_root *root,
1383                              struct walk_control *wc, int level)
1384 {
1385         struct shared_node *node;
1386         struct shared_node *dest;
1387         int i;
1388
1389         if (level == wc->root_level)
1390                 return 0;
1391
1392         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1393                 if (wc->nodes[i])
1394                         break;
1395         }
1396         BUG_ON(i >= BTRFS_MAX_LEVEL);
1397
1398         node = wc->nodes[wc->active_node];
1399         wc->nodes[wc->active_node] = NULL;
1400         wc->active_node = i;
1401
1402         dest = wc->nodes[wc->active_node];
1403         if (wc->active_node < wc->root_level ||
1404             btrfs_root_refs(&root->root_item) > 0) {
1405                 BUG_ON(node->refs <= 1);
1406                 splice_shared_node(node, dest);
1407         } else {
1408                 BUG_ON(node->refs < 2);
1409                 node->refs--;
1410         }
1411         return 0;
1412 }
1413
1414 /*
1415  * Returns:
1416  * < 0 - on error
1417  * 1   - if the root with id child_root_id is a child of root parent_root_id
1418  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1419  *       has other root(s) as parent(s)
1420  * 2   - if the root child_root_id doesn't have any parent roots
1421  */
1422 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1423                          u64 child_root_id)
1424 {
1425         struct btrfs_path path;
1426         struct btrfs_key key;
1427         struct extent_buffer *leaf;
1428         int has_parent = 0;
1429         int ret;
1430
1431         btrfs_init_path(&path);
1432
1433         key.objectid = parent_root_id;
1434         key.type = BTRFS_ROOT_REF_KEY;
1435         key.offset = child_root_id;
1436         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1437                                 0, 0);
1438         if (ret < 0)
1439                 return ret;
1440         btrfs_release_path(&path);
1441         if (!ret)
1442                 return 1;
1443
1444         key.objectid = child_root_id;
1445         key.type = BTRFS_ROOT_BACKREF_KEY;
1446         key.offset = 0;
1447         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1448                                 0, 0);
1449         if (ret < 0)
1450                 goto out;
1451
1452         while (1) {
1453                 leaf = path.nodes[0];
1454                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1455                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1456                         if (ret)
1457                                 break;
1458                         leaf = path.nodes[0];
1459                 }
1460
1461                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1462                 if (key.objectid != child_root_id ||
1463                     key.type != BTRFS_ROOT_BACKREF_KEY)
1464                         break;
1465
1466                 has_parent = 1;
1467
1468                 if (key.offset == parent_root_id) {
1469                         btrfs_release_path(&path);
1470                         return 1;
1471                 }
1472
1473                 path.slots[0]++;
1474         }
1475 out:
1476         btrfs_release_path(&path);
1477         if (ret < 0)
1478                 return ret;
1479         return has_parent ? 0 : 2;
1480 }
1481
1482 static int process_dir_item(struct extent_buffer *eb,
1483                             int slot, struct btrfs_key *key,
1484                             struct shared_node *active_node)
1485 {
1486         u32 total;
1487         u32 cur = 0;
1488         u32 len;
1489         u32 name_len;
1490         u32 data_len;
1491         int error;
1492         int nritems = 0;
1493         u8 filetype;
1494         struct btrfs_dir_item *di;
1495         struct inode_record *rec;
1496         struct cache_tree *root_cache;
1497         struct cache_tree *inode_cache;
1498         struct btrfs_key location;
1499         char namebuf[BTRFS_NAME_LEN];
1500
1501         root_cache = &active_node->root_cache;
1502         inode_cache = &active_node->inode_cache;
1503         rec = active_node->current;
1504         rec->found_dir_item = 1;
1505
1506         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1507         total = btrfs_item_size_nr(eb, slot);
1508         while (cur < total) {
1509                 nritems++;
1510                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1511                 name_len = btrfs_dir_name_len(eb, di);
1512                 data_len = btrfs_dir_data_len(eb, di);
1513                 filetype = btrfs_dir_type(eb, di);
1514
1515                 rec->found_size += name_len;
1516                 if (cur + sizeof(*di) + name_len > total ||
1517                     name_len > BTRFS_NAME_LEN) {
1518                         error = REF_ERR_NAME_TOO_LONG;
1519
1520                         if (cur + sizeof(*di) > total)
1521                                 break;
1522                         len = min_t(u32, total - cur - sizeof(*di),
1523                                     BTRFS_NAME_LEN);
1524                 } else {
1525                         len = name_len;
1526                         error = 0;
1527                 }
1528
1529                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1530
1531                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1532                         add_inode_backref(inode_cache, location.objectid,
1533                                           key->objectid, key->offset, namebuf,
1534                                           len, filetype, key->type, error);
1535                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1536                         add_inode_backref(root_cache, location.objectid,
1537                                           key->objectid, key->offset,
1538                                           namebuf, len, filetype,
1539                                           key->type, error);
1540                 } else {
1541                         fprintf(stderr, "invalid location in dir item %u\n",
1542                                 location.type);
1543                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1544                                           key->objectid, key->offset, namebuf,
1545                                           len, filetype, key->type, error);
1546                 }
1547
1548                 len = sizeof(*di) + name_len + data_len;
1549                 di = (struct btrfs_dir_item *)((char *)di + len);
1550                 cur += len;
1551         }
1552         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1553                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1554
1555         return 0;
1556 }
1557
1558 static int process_inode_ref(struct extent_buffer *eb,
1559                              int slot, struct btrfs_key *key,
1560                              struct shared_node *active_node)
1561 {
1562         u32 total;
1563         u32 cur = 0;
1564         u32 len;
1565         u32 name_len;
1566         u64 index;
1567         int error;
1568         struct cache_tree *inode_cache;
1569         struct btrfs_inode_ref *ref;
1570         char namebuf[BTRFS_NAME_LEN];
1571
1572         inode_cache = &active_node->inode_cache;
1573
1574         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1575         total = btrfs_item_size_nr(eb, slot);
1576         while (cur < total) {
1577                 name_len = btrfs_inode_ref_name_len(eb, ref);
1578                 index = btrfs_inode_ref_index(eb, ref);
1579
1580                 /* inode_ref + namelen should not cross item boundary */
1581                 if (cur + sizeof(*ref) + name_len > total ||
1582                     name_len > BTRFS_NAME_LEN) {
1583                         if (total < cur + sizeof(*ref))
1584                                 break;
1585
1586                         /* Still try to read out the remaining part */
1587                         len = min_t(u32, total - cur - sizeof(*ref),
1588                                     BTRFS_NAME_LEN);
1589                         error = REF_ERR_NAME_TOO_LONG;
1590                 } else {
1591                         len = name_len;
1592                         error = 0;
1593                 }
1594
1595                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1596                 add_inode_backref(inode_cache, key->objectid, key->offset,
1597                                   index, namebuf, len, 0, key->type, error);
1598
1599                 len = sizeof(*ref) + name_len;
1600                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1601                 cur += len;
1602         }
1603         return 0;
1604 }
1605
1606 static int process_inode_extref(struct extent_buffer *eb,
1607                                 int slot, struct btrfs_key *key,
1608                                 struct shared_node *active_node)
1609 {
1610         u32 total;
1611         u32 cur = 0;
1612         u32 len;
1613         u32 name_len;
1614         u64 index;
1615         u64 parent;
1616         int error;
1617         struct cache_tree *inode_cache;
1618         struct btrfs_inode_extref *extref;
1619         char namebuf[BTRFS_NAME_LEN];
1620
1621         inode_cache = &active_node->inode_cache;
1622
1623         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1624         total = btrfs_item_size_nr(eb, slot);
1625         while (cur < total) {
1626                 name_len = btrfs_inode_extref_name_len(eb, extref);
1627                 index = btrfs_inode_extref_index(eb, extref);
1628                 parent = btrfs_inode_extref_parent(eb, extref);
1629                 if (name_len <= BTRFS_NAME_LEN) {
1630                         len = name_len;
1631                         error = 0;
1632                 } else {
1633                         len = BTRFS_NAME_LEN;
1634                         error = REF_ERR_NAME_TOO_LONG;
1635                 }
1636                 read_extent_buffer(eb, namebuf,
1637                                    (unsigned long)(extref + 1), len);
1638                 add_inode_backref(inode_cache, key->objectid, parent,
1639                                   index, namebuf, len, 0, key->type, error);
1640
1641                 len = sizeof(*extref) + name_len;
1642                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1643                 cur += len;
1644         }
1645         return 0;
1646
1647 }
1648
1649 static int count_csum_range(struct btrfs_root *root, u64 start,
1650                             u64 len, u64 *found)
1651 {
1652         struct btrfs_key key;
1653         struct btrfs_path path;
1654         struct extent_buffer *leaf;
1655         int ret;
1656         size_t size;
1657         *found = 0;
1658         u64 csum_end;
1659         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1660
1661         btrfs_init_path(&path);
1662
1663         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1664         key.offset = start;
1665         key.type = BTRFS_EXTENT_CSUM_KEY;
1666
1667         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1668                                 &key, &path, 0, 0);
1669         if (ret < 0)
1670                 goto out;
1671         if (ret > 0 && path.slots[0] > 0) {
1672                 leaf = path.nodes[0];
1673                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1674                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1675                     key.type == BTRFS_EXTENT_CSUM_KEY)
1676                         path.slots[0]--;
1677         }
1678
1679         while (len > 0) {
1680                 leaf = path.nodes[0];
1681                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1682                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1683                         if (ret > 0)
1684                                 break;
1685                         else if (ret < 0)
1686                                 goto out;
1687                         leaf = path.nodes[0];
1688                 }
1689
1690                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1691                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1692                     key.type != BTRFS_EXTENT_CSUM_KEY)
1693                         break;
1694
1695                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1696                 if (key.offset >= start + len)
1697                         break;
1698
1699                 if (key.offset > start)
1700                         start = key.offset;
1701
1702                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1703                 csum_end = key.offset + (size / csum_size) *
1704                            root->fs_info->sectorsize;
1705                 if (csum_end > start) {
1706                         size = min(csum_end - start, len);
1707                         len -= size;
1708                         start += size;
1709                         *found += size;
1710                 }
1711
1712                 path.slots[0]++;
1713         }
1714 out:
1715         btrfs_release_path(&path);
1716         if (ret < 0)
1717                 return ret;
1718         return 0;
1719 }
1720
1721 static int process_file_extent(struct btrfs_root *root,
1722                                 struct extent_buffer *eb,
1723                                 int slot, struct btrfs_key *key,
1724                                 struct shared_node *active_node)
1725 {
1726         struct inode_record *rec;
1727         struct btrfs_file_extent_item *fi;
1728         u64 num_bytes = 0;
1729         u64 disk_bytenr = 0;
1730         u64 extent_offset = 0;
1731         u64 mask = root->fs_info->sectorsize - 1;
1732         int extent_type;
1733         int ret;
1734
1735         rec = active_node->current;
1736         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1737         rec->found_file_extent = 1;
1738
1739         if (rec->extent_start == (u64)-1) {
1740                 rec->extent_start = key->offset;
1741                 rec->extent_end = key->offset;
1742         }
1743
1744         if (rec->extent_end > key->offset)
1745                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1746         else if (rec->extent_end < key->offset) {
1747                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1748                                            key->offset - rec->extent_end);
1749                 if (ret < 0)
1750                         return ret;
1751         }
1752
1753         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1754         extent_type = btrfs_file_extent_type(eb, fi);
1755
1756         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1757                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1758                 if (num_bytes == 0)
1759                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760                 rec->found_size += num_bytes;
1761                 num_bytes = (num_bytes + mask) & ~mask;
1762         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1763                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1764                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1765                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1766                 extent_offset = btrfs_file_extent_offset(eb, fi);
1767                 if (num_bytes == 0 || (num_bytes & mask))
1768                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1769                 if (num_bytes + extent_offset >
1770                     btrfs_file_extent_ram_bytes(eb, fi))
1771                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1772                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1773                     (btrfs_file_extent_compression(eb, fi) ||
1774                      btrfs_file_extent_encryption(eb, fi) ||
1775                      btrfs_file_extent_other_encoding(eb, fi)))
1776                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1777                 if (disk_bytenr > 0)
1778                         rec->found_size += num_bytes;
1779         } else {
1780                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1781         }
1782         rec->extent_end = key->offset + num_bytes;
1783
1784         /*
1785          * The data reloc tree will copy full extents into its inode and then
1786          * copy the corresponding csums.  Because the extent it copied could be
1787          * a preallocated extent that hasn't been written to yet there may be no
1788          * csums to copy, ergo we won't have csums for our file extent.  This is
1789          * ok so just don't bother checking csums if the inode belongs to the
1790          * data reloc tree.
1791          */
1792         if (disk_bytenr > 0 &&
1793             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1794                 u64 found;
1795                 if (btrfs_file_extent_compression(eb, fi))
1796                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1797                 else
1798                         disk_bytenr += extent_offset;
1799
1800                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1801                 if (ret < 0)
1802                         return ret;
1803                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1804                         if (found > 0)
1805                                 rec->found_csum_item = 1;
1806                         if (found < num_bytes)
1807                                 rec->some_csum_missing = 1;
1808                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1809                         if (found > 0)
1810                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1811                 }
1812         }
1813         return 0;
1814 }
1815
1816 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1817                             struct walk_control *wc)
1818 {
1819         struct btrfs_key key;
1820         u32 nritems;
1821         int i;
1822         int ret = 0;
1823         struct cache_tree *inode_cache;
1824         struct shared_node *active_node;
1825
1826         if (wc->root_level == wc->active_node &&
1827             btrfs_root_refs(&root->root_item) == 0)
1828                 return 0;
1829
1830         active_node = wc->nodes[wc->active_node];
1831         inode_cache = &active_node->inode_cache;
1832         nritems = btrfs_header_nritems(eb);
1833         for (i = 0; i < nritems; i++) {
1834                 btrfs_item_key_to_cpu(eb, &key, i);
1835
1836                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1837                         continue;
1838                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1839                         continue;
1840
1841                 if (active_node->current == NULL ||
1842                     active_node->current->ino < key.objectid) {
1843                         if (active_node->current) {
1844                                 active_node->current->checked = 1;
1845                                 maybe_free_inode_rec(inode_cache,
1846                                                      active_node->current);
1847                         }
1848                         active_node->current = get_inode_rec(inode_cache,
1849                                                              key.objectid, 1);
1850                         BUG_ON(IS_ERR(active_node->current));
1851                 }
1852                 switch (key.type) {
1853                 case BTRFS_DIR_ITEM_KEY:
1854                 case BTRFS_DIR_INDEX_KEY:
1855                         ret = process_dir_item(eb, i, &key, active_node);
1856                         break;
1857                 case BTRFS_INODE_REF_KEY:
1858                         ret = process_inode_ref(eb, i, &key, active_node);
1859                         break;
1860                 case BTRFS_INODE_EXTREF_KEY:
1861                         ret = process_inode_extref(eb, i, &key, active_node);
1862                         break;
1863                 case BTRFS_INODE_ITEM_KEY:
1864                         ret = process_inode_item(eb, i, &key, active_node);
1865                         break;
1866                 case BTRFS_EXTENT_DATA_KEY:
1867                         ret = process_file_extent(root, eb, i, &key,
1868                                                   active_node);
1869                         break;
1870                 default:
1871                         break;
1872                 };
1873         }
1874         return ret;
1875 }
1876
1877 struct node_refs {
1878         u64 bytenr[BTRFS_MAX_LEVEL];
1879         u64 refs[BTRFS_MAX_LEVEL];
1880         int need_check[BTRFS_MAX_LEVEL];
1881 };
1882
1883 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1884                              struct node_refs *nrefs, u64 level);
1885 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1886                             unsigned int ext_ref);
1887
1888 /*
1889  * Returns >0  Found error, not fatal, should continue
1890  * Returns <0  Fatal error, must exit the whole check
1891  * Returns 0   No errors found
1892  */
1893 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1894                                struct node_refs *nrefs, int *level, int ext_ref)
1895 {
1896         struct extent_buffer *cur = path->nodes[0];
1897         struct btrfs_key key;
1898         u64 cur_bytenr;
1899         u32 nritems;
1900         u64 first_ino = 0;
1901         int root_level = btrfs_header_level(root->node);
1902         int i;
1903         int ret = 0; /* Final return value */
1904         int err = 0; /* Positive error bitmap */
1905
1906         cur_bytenr = cur->start;
1907
1908         /* skip to first inode item or the first inode number change */
1909         nritems = btrfs_header_nritems(cur);
1910         for (i = 0; i < nritems; i++) {
1911                 btrfs_item_key_to_cpu(cur, &key, i);
1912                 if (i == 0)
1913                         first_ino = key.objectid;
1914                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1915                     (first_ino && first_ino != key.objectid))
1916                         break;
1917         }
1918         if (i == nritems) {
1919                 path->slots[0] = nritems;
1920                 return 0;
1921         }
1922         path->slots[0] = i;
1923
1924 again:
1925         err |= check_inode_item(root, path, ext_ref);
1926
1927         if (err & LAST_ITEM)
1928                 goto out;
1929
1930         /* still have inode items in thie leaf */
1931         if (cur->start == cur_bytenr)
1932                 goto again;
1933
1934         /*
1935          * we have switched to another leaf, above nodes may
1936          * have changed, here walk down the path, if a node
1937          * or leaf is shared, check whether we can skip this
1938          * node or leaf.
1939          */
1940         for (i = root_level; i >= 0; i--) {
1941                 if (path->nodes[i]->start == nrefs->bytenr[i])
1942                         continue;
1943
1944                 ret = update_nodes_refs(root,
1945                                 path->nodes[i]->start,
1946                                 nrefs, i);
1947                 if (ret)
1948                         goto out;
1949
1950                 if (!nrefs->need_check[i]) {
1951                         *level += 1;
1952                         break;
1953                 }
1954         }
1955
1956         for (i = 0; i < *level; i++) {
1957                 free_extent_buffer(path->nodes[i]);
1958                 path->nodes[i] = NULL;
1959         }
1960 out:
1961         err &= ~LAST_ITEM;
1962         if (err && !ret)
1963                 ret = err;
1964         return ret;
1965 }
1966
1967 static void reada_walk_down(struct btrfs_root *root,
1968                             struct extent_buffer *node, int slot)
1969 {
1970         u64 bytenr;
1971         u64 ptr_gen;
1972         u32 nritems;
1973         u32 blocksize;
1974         int i;
1975         int level;
1976
1977         level = btrfs_header_level(node);
1978         if (level != 1)
1979                 return;
1980
1981         nritems = btrfs_header_nritems(node);
1982         blocksize = root->fs_info->nodesize;
1983         for (i = slot; i < nritems; i++) {
1984                 bytenr = btrfs_node_blockptr(node, i);
1985                 ptr_gen = btrfs_node_ptr_generation(node, i);
1986                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1987         }
1988 }
1989
1990 /*
1991  * Check the child node/leaf by the following condition:
1992  * 1. the first item key of the node/leaf should be the same with the one
1993  *    in parent.
1994  * 2. block in parent node should match the child node/leaf.
1995  * 3. generation of parent node and child's header should be consistent.
1996  *
1997  * Or the child node/leaf pointed by the key in parent is not valid.
1998  *
1999  * We hope to check leaf owner too, but since subvol may share leaves,
2000  * which makes leaf owner check not so strong, key check should be
2001  * sufficient enough for that case.
2002  */
2003 static int check_child_node(struct extent_buffer *parent, int slot,
2004                             struct extent_buffer *child)
2005 {
2006         struct btrfs_key parent_key;
2007         struct btrfs_key child_key;
2008         int ret = 0;
2009
2010         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2011         if (btrfs_header_level(child) == 0)
2012                 btrfs_item_key_to_cpu(child, &child_key, 0);
2013         else
2014                 btrfs_node_key_to_cpu(child, &child_key, 0);
2015
2016         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2017                 ret = -EINVAL;
2018                 fprintf(stderr,
2019                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2020                         parent_key.objectid, parent_key.type, parent_key.offset,
2021                         child_key.objectid, child_key.type, child_key.offset);
2022         }
2023         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2024                 ret = -EINVAL;
2025                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2026                         btrfs_node_blockptr(parent, slot),
2027                         btrfs_header_bytenr(child));
2028         }
2029         if (btrfs_node_ptr_generation(parent, slot) !=
2030             btrfs_header_generation(child)) {
2031                 ret = -EINVAL;
2032                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2033                         btrfs_header_generation(child),
2034                         btrfs_node_ptr_generation(parent, slot));
2035         }
2036         return ret;
2037 }
2038
2039 /*
2040  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2041  * in every fs or file tree check. Here we find its all root ids, and only check
2042  * it in the fs or file tree which has the smallest root id.
2043  */
2044 static int need_check(struct btrfs_root *root, struct ulist *roots)
2045 {
2046         struct rb_node *node;
2047         struct ulist_node *u;
2048
2049         if (roots->nnodes == 1)
2050                 return 1;
2051
2052         node = rb_first(&roots->root);
2053         u = rb_entry(node, struct ulist_node, rb_node);
2054         /*
2055          * current root id is not smallest, we skip it and let it be checked
2056          * in the fs or file tree who hash the smallest root id.
2057          */
2058         if (root->objectid != u->val)
2059                 return 0;
2060
2061         return 1;
2062 }
2063
2064 /*
2065  * for a tree node or leaf, we record its reference count, so later if we still
2066  * process this node or leaf, don't need to compute its reference count again.
2067  */
2068 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2069                              struct node_refs *nrefs, u64 level)
2070 {
2071         int check, ret;
2072         u64 refs;
2073         struct ulist *roots;
2074
2075         if (nrefs->bytenr[level] != bytenr) {
2076                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2077                                        level, 1, &refs, NULL);
2078                 if (ret < 0)
2079                         return ret;
2080
2081                 nrefs->bytenr[level] = bytenr;
2082                 nrefs->refs[level] = refs;
2083                 if (refs > 1) {
2084                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2085                                                    0, &roots);
2086                         if (ret)
2087                                 return -EIO;
2088
2089                         check = need_check(root, roots);
2090                         ulist_free(roots);
2091                         nrefs->need_check[level] = check;
2092                 } else {
2093                         nrefs->need_check[level] = 1;
2094                 }
2095         }
2096
2097         return 0;
2098 }
2099
2100 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2101                           struct walk_control *wc, int *level,
2102                           struct node_refs *nrefs)
2103 {
2104         enum btrfs_tree_block_status status;
2105         u64 bytenr;
2106         u64 ptr_gen;
2107         struct extent_buffer *next;
2108         struct extent_buffer *cur;
2109         u32 blocksize;
2110         int ret, err = 0;
2111         u64 refs;
2112
2113         WARN_ON(*level < 0);
2114         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2115
2116         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2117                 refs = nrefs->refs[*level];
2118                 ret = 0;
2119         } else {
2120                 ret = btrfs_lookup_extent_info(NULL, root,
2121                                        path->nodes[*level]->start,
2122                                        *level, 1, &refs, NULL);
2123                 if (ret < 0) {
2124                         err = ret;
2125                         goto out;
2126                 }
2127                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2128                 nrefs->refs[*level] = refs;
2129         }
2130
2131         if (refs > 1) {
2132                 ret = enter_shared_node(root, path->nodes[*level]->start,
2133                                         refs, wc, *level);
2134                 if (ret > 0) {
2135                         err = ret;
2136                         goto out;
2137                 }
2138         }
2139
2140         while (*level >= 0) {
2141                 WARN_ON(*level < 0);
2142                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2143                 cur = path->nodes[*level];
2144
2145                 if (btrfs_header_level(cur) != *level)
2146                         WARN_ON(1);
2147
2148                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2149                         break;
2150                 if (*level == 0) {
2151                         ret = process_one_leaf(root, cur, wc);
2152                         if (ret < 0)
2153                                 err = ret;
2154                         break;
2155                 }
2156                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2157                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2158                 blocksize = root->fs_info->nodesize;
2159
2160                 if (bytenr == nrefs->bytenr[*level - 1]) {
2161                         refs = nrefs->refs[*level - 1];
2162                 } else {
2163                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2164                                         *level - 1, 1, &refs, NULL);
2165                         if (ret < 0) {
2166                                 refs = 0;
2167                         } else {
2168                                 nrefs->bytenr[*level - 1] = bytenr;
2169                                 nrefs->refs[*level - 1] = refs;
2170                         }
2171                 }
2172
2173                 if (refs > 1) {
2174                         ret = enter_shared_node(root, bytenr, refs,
2175                                                 wc, *level - 1);
2176                         if (ret > 0) {
2177                                 path->slots[*level]++;
2178                                 continue;
2179                         }
2180                 }
2181
2182                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2183                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2184                         free_extent_buffer(next);
2185                         reada_walk_down(root, cur, path->slots[*level]);
2186                         next = read_tree_block(root->fs_info, bytenr, blocksize,
2187                                                ptr_gen);
2188                         if (!extent_buffer_uptodate(next)) {
2189                                 struct btrfs_key node_key;
2190
2191                                 btrfs_node_key_to_cpu(path->nodes[*level],
2192                                                       &node_key,
2193                                                       path->slots[*level]);
2194                                 btrfs_add_corrupt_extent_record(root->fs_info,
2195                                                 &node_key,
2196                                                 path->nodes[*level]->start,
2197                                                 root->fs_info->nodesize,
2198                                                 *level);
2199                                 err = -EIO;
2200                                 goto out;
2201                         }
2202                 }
2203
2204                 ret = check_child_node(cur, path->slots[*level], next);
2205                 if (ret) {
2206                         free_extent_buffer(next);
2207                         err = ret;
2208                         goto out;
2209                 }
2210
2211                 if (btrfs_is_leaf(next))
2212                         status = btrfs_check_leaf(root, NULL, next);
2213                 else
2214                         status = btrfs_check_node(root, NULL, next);
2215                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2216                         free_extent_buffer(next);
2217                         err = -EIO;
2218                         goto out;
2219                 }
2220
2221                 *level = *level - 1;
2222                 free_extent_buffer(path->nodes[*level]);
2223                 path->nodes[*level] = next;
2224                 path->slots[*level] = 0;
2225         }
2226 out:
2227         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2228         return err;
2229 }
2230
2231 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2232                             unsigned int ext_ref);
2233
2234 /*
2235  * Returns >0  Found error, should continue
2236  * Returns <0  Fatal error, must exit the whole check
2237  * Returns 0   No errors found
2238  */
2239 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2240                              int *level, struct node_refs *nrefs, int ext_ref)
2241 {
2242         enum btrfs_tree_block_status status;
2243         u64 bytenr;
2244         u64 ptr_gen;
2245         struct extent_buffer *next;
2246         struct extent_buffer *cur;
2247         u32 blocksize;
2248         int ret;
2249
2250         WARN_ON(*level < 0);
2251         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2252
2253         ret = update_nodes_refs(root, path->nodes[*level]->start,
2254                                 nrefs, *level);
2255         if (ret < 0)
2256                 return ret;
2257
2258         while (*level >= 0) {
2259                 WARN_ON(*level < 0);
2260                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2261                 cur = path->nodes[*level];
2262
2263                 if (btrfs_header_level(cur) != *level)
2264                         WARN_ON(1);
2265
2266                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2267                         break;
2268                 /* Don't forgot to check leaf/node validation */
2269                 if (*level == 0) {
2270                         ret = btrfs_check_leaf(root, NULL, cur);
2271                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2272                                 ret = -EIO;
2273                                 break;
2274                         }
2275                         ret = process_one_leaf_v2(root, path, nrefs,
2276                                                   level, ext_ref);
2277                         break;
2278                 } else {
2279                         ret = btrfs_check_node(root, NULL, cur);
2280                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2281                                 ret = -EIO;
2282                                 break;
2283                         }
2284                 }
2285                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2286                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2287                 blocksize = root->fs_info->nodesize;
2288
2289                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2290                 if (ret)
2291                         break;
2292                 if (!nrefs->need_check[*level - 1]) {
2293                         path->slots[*level]++;
2294                         continue;
2295                 }
2296
2297                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2298                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2299                         free_extent_buffer(next);
2300                         reada_walk_down(root, cur, path->slots[*level]);
2301                         next = read_tree_block(root->fs_info, bytenr, blocksize,
2302                                                ptr_gen);
2303                         if (!extent_buffer_uptodate(next)) {
2304                                 struct btrfs_key node_key;
2305
2306                                 btrfs_node_key_to_cpu(path->nodes[*level],
2307                                                       &node_key,
2308                                                       path->slots[*level]);
2309                                 btrfs_add_corrupt_extent_record(root->fs_info,
2310                                                 &node_key,
2311                                                 path->nodes[*level]->start,
2312                                                 root->fs_info->nodesize,
2313                                                 *level);
2314                                 ret = -EIO;
2315                                 break;
2316                         }
2317                 }
2318
2319                 ret = check_child_node(cur, path->slots[*level], next);
2320                 if (ret < 0) 
2321                         break;
2322
2323                 if (btrfs_is_leaf(next))
2324                         status = btrfs_check_leaf(root, NULL, next);
2325                 else
2326                         status = btrfs_check_node(root, NULL, next);
2327                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2328                         free_extent_buffer(next);
2329                         ret = -EIO;
2330                         break;
2331                 }
2332
2333                 *level = *level - 1;
2334                 free_extent_buffer(path->nodes[*level]);
2335                 path->nodes[*level] = next;
2336                 path->slots[*level] = 0;
2337         }
2338         return ret;
2339 }
2340
2341 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2342                         struct walk_control *wc, int *level)
2343 {
2344         int i;
2345         struct extent_buffer *leaf;
2346
2347         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2348                 leaf = path->nodes[i];
2349                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2350                         path->slots[i]++;
2351                         *level = i;
2352                         return 0;
2353                 } else {
2354                         free_extent_buffer(path->nodes[*level]);
2355                         path->nodes[*level] = NULL;
2356                         BUG_ON(*level > wc->active_node);
2357                         if (*level == wc->active_node)
2358                                 leave_shared_node(root, wc, *level);
2359                         *level = i + 1;
2360                 }
2361         }
2362         return 1;
2363 }
2364
2365 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2366                            int *level)
2367 {
2368         int i;
2369         struct extent_buffer *leaf;
2370
2371         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2372                 leaf = path->nodes[i];
2373                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2374                         path->slots[i]++;
2375                         *level = i;
2376                         return 0;
2377                 } else {
2378                         free_extent_buffer(path->nodes[*level]);
2379                         path->nodes[*level] = NULL;
2380                         *level = i + 1;
2381                 }
2382         }
2383         return 1;
2384 }
2385
2386 static int check_root_dir(struct inode_record *rec)
2387 {
2388         struct inode_backref *backref;
2389         int ret = -1;
2390
2391         if (!rec->found_inode_item || rec->errors)
2392                 goto out;
2393         if (rec->nlink != 1 || rec->found_link != 0)
2394                 goto out;
2395         if (list_empty(&rec->backrefs))
2396                 goto out;
2397         backref = to_inode_backref(rec->backrefs.next);
2398         if (!backref->found_inode_ref)
2399                 goto out;
2400         if (backref->index != 0 || backref->namelen != 2 ||
2401             memcmp(backref->name, "..", 2))
2402                 goto out;
2403         if (backref->found_dir_index || backref->found_dir_item)
2404                 goto out;
2405         ret = 0;
2406 out:
2407         return ret;
2408 }
2409
2410 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2411                               struct btrfs_root *root, struct btrfs_path *path,
2412                               struct inode_record *rec)
2413 {
2414         struct btrfs_inode_item *ei;
2415         struct btrfs_key key;
2416         int ret;
2417
2418         key.objectid = rec->ino;
2419         key.type = BTRFS_INODE_ITEM_KEY;
2420         key.offset = (u64)-1;
2421
2422         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2423         if (ret < 0)
2424                 goto out;
2425         if (ret) {
2426                 if (!path->slots[0]) {
2427                         ret = -ENOENT;
2428                         goto out;
2429                 }
2430                 path->slots[0]--;
2431                 ret = 0;
2432         }
2433         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2434         if (key.objectid != rec->ino) {
2435                 ret = -ENOENT;
2436                 goto out;
2437         }
2438
2439         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2440                             struct btrfs_inode_item);
2441         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2442         btrfs_mark_buffer_dirty(path->nodes[0]);
2443         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2444         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2445                root->root_key.objectid);
2446 out:
2447         btrfs_release_path(path);
2448         return ret;
2449 }
2450
2451 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2452                                     struct btrfs_root *root,
2453                                     struct btrfs_path *path,
2454                                     struct inode_record *rec)
2455 {
2456         int ret;
2457
2458         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2459         btrfs_release_path(path);
2460         if (!ret)
2461                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2462         return ret;
2463 }
2464
2465 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2466                                struct btrfs_root *root,
2467                                struct btrfs_path *path,
2468                                struct inode_record *rec)
2469 {
2470         struct btrfs_inode_item *ei;
2471         struct btrfs_key key;
2472         int ret = 0;
2473
2474         key.objectid = rec->ino;
2475         key.type = BTRFS_INODE_ITEM_KEY;
2476         key.offset = 0;
2477
2478         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2479         if (ret) {
2480                 if (ret > 0)
2481                         ret = -ENOENT;
2482                 goto out;
2483         }
2484
2485         /* Since ret == 0, no need to check anything */
2486         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2487                             struct btrfs_inode_item);
2488         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2489         btrfs_mark_buffer_dirty(path->nodes[0]);
2490         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2491         printf("reset nbytes for ino %llu root %llu\n",
2492                rec->ino, root->root_key.objectid);
2493 out:
2494         btrfs_release_path(path);
2495         return ret;
2496 }
2497
2498 static int add_missing_dir_index(struct btrfs_root *root,
2499                                  struct cache_tree *inode_cache,
2500                                  struct inode_record *rec,
2501                                  struct inode_backref *backref)
2502 {
2503         struct btrfs_path path;
2504         struct btrfs_trans_handle *trans;
2505         struct btrfs_dir_item *dir_item;
2506         struct extent_buffer *leaf;
2507         struct btrfs_key key;
2508         struct btrfs_disk_key disk_key;
2509         struct inode_record *dir_rec;
2510         unsigned long name_ptr;
2511         u32 data_size = sizeof(*dir_item) + backref->namelen;
2512         int ret;
2513
2514         trans = btrfs_start_transaction(root, 1);
2515         if (IS_ERR(trans))
2516                 return PTR_ERR(trans);
2517
2518         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2519                 (unsigned long long)rec->ino);
2520
2521         btrfs_init_path(&path);
2522         key.objectid = backref->dir;
2523         key.type = BTRFS_DIR_INDEX_KEY;
2524         key.offset = backref->index;
2525         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2526         BUG_ON(ret);
2527
2528         leaf = path.nodes[0];
2529         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2530
2531         disk_key.objectid = cpu_to_le64(rec->ino);
2532         disk_key.type = BTRFS_INODE_ITEM_KEY;
2533         disk_key.offset = 0;
2534
2535         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2536         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2537         btrfs_set_dir_data_len(leaf, dir_item, 0);
2538         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2539         name_ptr = (unsigned long)(dir_item + 1);
2540         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2541         btrfs_mark_buffer_dirty(leaf);
2542         btrfs_release_path(&path);
2543         btrfs_commit_transaction(trans, root);
2544
2545         backref->found_dir_index = 1;
2546         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2547         BUG_ON(IS_ERR(dir_rec));
2548         if (!dir_rec)
2549                 return 0;
2550         dir_rec->found_size += backref->namelen;
2551         if (dir_rec->found_size == dir_rec->isize &&
2552             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2553                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2554         if (dir_rec->found_size != dir_rec->isize)
2555                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2556
2557         return 0;
2558 }
2559
2560 static int delete_dir_index(struct btrfs_root *root,
2561                             struct inode_backref *backref)
2562 {
2563         struct btrfs_trans_handle *trans;
2564         struct btrfs_dir_item *di;
2565         struct btrfs_path path;
2566         int ret = 0;
2567
2568         trans = btrfs_start_transaction(root, 1);
2569         if (IS_ERR(trans))
2570                 return PTR_ERR(trans);
2571
2572         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2573                 (unsigned long long)backref->dir,
2574                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2575                 (unsigned long long)root->objectid);
2576
2577         btrfs_init_path(&path);
2578         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2579                                     backref->name, backref->namelen,
2580                                     backref->index, -1);
2581         if (IS_ERR(di)) {
2582                 ret = PTR_ERR(di);
2583                 btrfs_release_path(&path);
2584                 btrfs_commit_transaction(trans, root);
2585                 if (ret == -ENOENT)
2586                         return 0;
2587                 return ret;
2588         }
2589
2590         if (!di)
2591                 ret = btrfs_del_item(trans, root, &path);
2592         else
2593                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2594         BUG_ON(ret);
2595         btrfs_release_path(&path);
2596         btrfs_commit_transaction(trans, root);
2597         return ret;
2598 }
2599
2600 static int create_inode_item(struct btrfs_root *root,
2601                              struct inode_record *rec,
2602                              int root_dir)
2603 {
2604         struct btrfs_trans_handle *trans;
2605         struct btrfs_inode_item inode_item;
2606         time_t now = time(NULL);
2607         int ret;
2608
2609         trans = btrfs_start_transaction(root, 1);
2610         if (IS_ERR(trans)) {
2611                 ret = PTR_ERR(trans);
2612                 return ret;
2613         }
2614
2615         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2616                 "be incomplete, please check permissions and content after "
2617                 "the fsck completes.\n", (unsigned long long)root->objectid,
2618                 (unsigned long long)rec->ino);
2619
2620         memset(&inode_item, 0, sizeof(inode_item));
2621         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2622         if (root_dir)
2623                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2624         else
2625                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2626         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2627         if (rec->found_dir_item) {
2628                 if (rec->found_file_extent)
2629                         fprintf(stderr, "root %llu inode %llu has both a dir "
2630                                 "item and extents, unsure if it is a dir or a "
2631                                 "regular file so setting it as a directory\n",
2632                                 (unsigned long long)root->objectid,
2633                                 (unsigned long long)rec->ino);
2634                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2635                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2636         } else if (!rec->found_dir_item) {
2637                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2638                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2639         }
2640         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2641         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2642         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2643         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2644         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2645         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2646         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2647         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2648
2649         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2650         BUG_ON(ret);
2651         btrfs_commit_transaction(trans, root);
2652         return 0;
2653 }
2654
2655 static int repair_inode_backrefs(struct btrfs_root *root,
2656                                  struct inode_record *rec,
2657                                  struct cache_tree *inode_cache,
2658                                  int delete)
2659 {
2660         struct inode_backref *tmp, *backref;
2661         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2662         int ret = 0;
2663         int repaired = 0;
2664
2665         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2666                 if (!delete && rec->ino == root_dirid) {
2667                         if (!rec->found_inode_item) {
2668                                 ret = create_inode_item(root, rec, 1);
2669                                 if (ret)
2670                                         break;
2671                                 repaired++;
2672                         }
2673                 }
2674
2675                 /* Index 0 for root dir's are special, don't mess with it */
2676                 if (rec->ino == root_dirid && backref->index == 0)
2677                         continue;
2678
2679                 if (delete &&
2680                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2681                      (backref->found_dir_index && backref->found_inode_ref &&
2682                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2683                         ret = delete_dir_index(root, backref);
2684                         if (ret)
2685                                 break;
2686                         repaired++;
2687                         list_del(&backref->list);
2688                         free(backref);
2689                         continue;
2690                 }
2691
2692                 if (!delete && !backref->found_dir_index &&
2693                     backref->found_dir_item && backref->found_inode_ref) {
2694                         ret = add_missing_dir_index(root, inode_cache, rec,
2695                                                     backref);
2696                         if (ret)
2697                                 break;
2698                         repaired++;
2699                         if (backref->found_dir_item &&
2700                             backref->found_dir_index) {
2701                                 if (!backref->errors &&
2702                                     backref->found_inode_ref) {
2703                                         list_del(&backref->list);
2704                                         free(backref);
2705                                         continue;
2706                                 }
2707                         }
2708                 }
2709
2710                 if (!delete && (!backref->found_dir_index &&
2711                                 !backref->found_dir_item &&
2712                                 backref->found_inode_ref)) {
2713                         struct btrfs_trans_handle *trans;
2714                         struct btrfs_key location;
2715
2716                         ret = check_dir_conflict(root, backref->name,
2717                                                  backref->namelen,
2718                                                  backref->dir,
2719                                                  backref->index);
2720                         if (ret) {
2721                                 /*
2722                                  * let nlink fixing routine to handle it,
2723                                  * which can do it better.
2724                                  */
2725                                 ret = 0;
2726                                 break;
2727                         }
2728                         location.objectid = rec->ino;
2729                         location.type = BTRFS_INODE_ITEM_KEY;
2730                         location.offset = 0;
2731
2732                         trans = btrfs_start_transaction(root, 1);
2733                         if (IS_ERR(trans)) {
2734                                 ret = PTR_ERR(trans);
2735                                 break;
2736                         }
2737                         fprintf(stderr, "adding missing dir index/item pair "
2738                                 "for inode %llu\n",
2739                                 (unsigned long long)rec->ino);
2740                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2741                                                     backref->namelen,
2742                                                     backref->dir, &location,
2743                                                     imode_to_type(rec->imode),
2744                                                     backref->index);
2745                         BUG_ON(ret);
2746                         btrfs_commit_transaction(trans, root);
2747                         repaired++;
2748                 }
2749
2750                 if (!delete && (backref->found_inode_ref &&
2751                                 backref->found_dir_index &&
2752                                 backref->found_dir_item &&
2753                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2754                                 !rec->found_inode_item)) {
2755                         ret = create_inode_item(root, rec, 0);
2756                         if (ret)
2757                                 break;
2758                         repaired++;
2759                 }
2760
2761         }
2762         return ret ? ret : repaired;
2763 }
2764
2765 /*
2766  * To determine the file type for nlink/inode_item repair
2767  *
2768  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2769  * Return -ENOENT if file type is not found.
2770  */
2771 static int find_file_type(struct inode_record *rec, u8 *type)
2772 {
2773         struct inode_backref *backref;
2774
2775         /* For inode item recovered case */
2776         if (rec->found_inode_item) {
2777                 *type = imode_to_type(rec->imode);
2778                 return 0;
2779         }
2780
2781         list_for_each_entry(backref, &rec->backrefs, list) {
2782                 if (backref->found_dir_index || backref->found_dir_item) {
2783                         *type = backref->filetype;
2784                         return 0;
2785                 }
2786         }
2787         return -ENOENT;
2788 }
2789
2790 /*
2791  * To determine the file name for nlink repair
2792  *
2793  * Return 0 if file name is found, set name and namelen.
2794  * Return -ENOENT if file name is not found.
2795  */
2796 static int find_file_name(struct inode_record *rec,
2797                           char *name, int *namelen)
2798 {
2799         struct inode_backref *backref;
2800
2801         list_for_each_entry(backref, &rec->backrefs, list) {
2802                 if (backref->found_dir_index || backref->found_dir_item ||
2803                     backref->found_inode_ref) {
2804                         memcpy(name, backref->name, backref->namelen);
2805                         *namelen = backref->namelen;
2806                         return 0;
2807                 }
2808         }
2809         return -ENOENT;
2810 }
2811
2812 /* Reset the nlink of the inode to the correct one */
2813 static int reset_nlink(struct btrfs_trans_handle *trans,
2814                        struct btrfs_root *root,
2815                        struct btrfs_path *path,
2816                        struct inode_record *rec)
2817 {
2818         struct inode_backref *backref;
2819         struct inode_backref *tmp;
2820         struct btrfs_key key;
2821         struct btrfs_inode_item *inode_item;
2822         int ret = 0;
2823
2824         /* We don't believe this either, reset it and iterate backref */
2825         rec->found_link = 0;
2826
2827         /* Remove all backref including the valid ones */
2828         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2829                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2830                                    backref->index, backref->name,
2831                                    backref->namelen, 0);
2832                 if (ret < 0)
2833                         goto out;
2834
2835                 /* remove invalid backref, so it won't be added back */
2836                 if (!(backref->found_dir_index &&
2837                       backref->found_dir_item &&
2838                       backref->found_inode_ref)) {
2839                         list_del(&backref->list);
2840                         free(backref);
2841                 } else {
2842                         rec->found_link++;
2843                 }
2844         }
2845
2846         /* Set nlink to 0 */
2847         key.objectid = rec->ino;
2848         key.type = BTRFS_INODE_ITEM_KEY;
2849         key.offset = 0;
2850         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2851         if (ret < 0)
2852                 goto out;
2853         if (ret > 0) {
2854                 ret = -ENOENT;
2855                 goto out;
2856         }
2857         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2858                                     struct btrfs_inode_item);
2859         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2860         btrfs_mark_buffer_dirty(path->nodes[0]);
2861         btrfs_release_path(path);
2862
2863         /*
2864          * Add back valid inode_ref/dir_item/dir_index,
2865          * add_link() will handle the nlink inc, so new nlink must be correct
2866          */
2867         list_for_each_entry(backref, &rec->backrefs, list) {
2868                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2869                                      backref->name, backref->namelen,
2870                                      backref->filetype, &backref->index, 1);
2871                 if (ret < 0)
2872                         goto out;
2873         }
2874 out:
2875         btrfs_release_path(path);
2876         return ret;
2877 }
2878
2879 static int get_highest_inode(struct btrfs_trans_handle *trans,
2880                                 struct btrfs_root *root,
2881                                 struct btrfs_path *path,
2882                                 u64 *highest_ino)
2883 {
2884         struct btrfs_key key, found_key;
2885         int ret;
2886
2887         btrfs_init_path(path);
2888         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2889         key.offset = -1;
2890         key.type = BTRFS_INODE_ITEM_KEY;
2891         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2892         if (ret == 1) {
2893                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2894                                 path->slots[0] - 1);
2895                 *highest_ino = found_key.objectid;
2896                 ret = 0;
2897         }
2898         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2899                 ret = -EOVERFLOW;
2900         btrfs_release_path(path);
2901         return ret;
2902 }
2903
2904 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2905                                struct btrfs_root *root,
2906                                struct btrfs_path *path,
2907                                struct inode_record *rec)
2908 {
2909         char *dir_name = "lost+found";
2910         char namebuf[BTRFS_NAME_LEN] = {0};
2911         u64 lost_found_ino;
2912         u32 mode = 0700;
2913         u8 type = 0;
2914         int namelen = 0;
2915         int name_recovered = 0;
2916         int type_recovered = 0;
2917         int ret = 0;
2918
2919         /*
2920          * Get file name and type first before these invalid inode ref
2921          * are deleted by remove_all_invalid_backref()
2922          */
2923         name_recovered = !find_file_name(rec, namebuf, &namelen);
2924         type_recovered = !find_file_type(rec, &type);
2925
2926         if (!name_recovered) {
2927                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2928                        rec->ino, rec->ino);
2929                 namelen = count_digits(rec->ino);
2930                 sprintf(namebuf, "%llu", rec->ino);
2931                 name_recovered = 1;
2932         }
2933         if (!type_recovered) {
2934                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2935                        rec->ino);
2936                 type = BTRFS_FT_REG_FILE;
2937                 type_recovered = 1;
2938         }
2939
2940         ret = reset_nlink(trans, root, path, rec);
2941         if (ret < 0) {
2942                 fprintf(stderr,
2943                         "Failed to reset nlink for inode %llu: %s\n",
2944                         rec->ino, strerror(-ret));
2945                 goto out;
2946         }
2947
2948         if (rec->found_link == 0) {
2949                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2950                 if (ret < 0)
2951                         goto out;
2952                 lost_found_ino++;
2953                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2954                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2955                                   mode);
2956                 if (ret < 0) {
2957                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2958                                 dir_name, strerror(-ret));
2959                         goto out;
2960                 }
2961                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2962                                      namebuf, namelen, type, NULL, 1);
2963                 /*
2964                  * Add ".INO" suffix several times to handle case where
2965                  * "FILENAME.INO" is already taken by another file.
2966                  */
2967                 while (ret == -EEXIST) {
2968                         /*
2969                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2970                          */
2971                         if (namelen + count_digits(rec->ino) + 1 >
2972                             BTRFS_NAME_LEN) {
2973                                 ret = -EFBIG;
2974                                 goto out;
2975                         }
2976                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2977                                  ".%llu", rec->ino);
2978                         namelen += count_digits(rec->ino) + 1;
2979                         ret = btrfs_add_link(trans, root, rec->ino,
2980                                              lost_found_ino, namebuf,
2981                                              namelen, type, NULL, 1);
2982                 }
2983                 if (ret < 0) {
2984                         fprintf(stderr,
2985                                 "Failed to link the inode %llu to %s dir: %s\n",
2986                                 rec->ino, dir_name, strerror(-ret));
2987                         goto out;
2988                 }
2989                 /*
2990                  * Just increase the found_link, don't actually add the
2991                  * backref. This will make things easier and this inode
2992                  * record will be freed after the repair is done.
2993                  * So fsck will not report problem about this inode.
2994                  */
2995                 rec->found_link++;
2996                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2997                        namelen, namebuf, dir_name);
2998         }
2999         printf("Fixed the nlink of inode %llu\n", rec->ino);
3000 out:
3001         /*
3002          * Clear the flag anyway, or we will loop forever for the same inode
3003          * as it will not be removed from the bad inode list and the dead loop
3004          * happens.
3005          */
3006         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3007         btrfs_release_path(path);
3008         return ret;
3009 }
3010
3011 /*
3012  * Check if there is any normal(reg or prealloc) file extent for given
3013  * ino.
3014  * This is used to determine the file type when neither its dir_index/item or
3015  * inode_item exists.
3016  *
3017  * This will *NOT* report error, if any error happens, just consider it does
3018  * not have any normal file extent.
3019  */
3020 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3021 {
3022         struct btrfs_path path;
3023         struct btrfs_key key;
3024         struct btrfs_key found_key;
3025         struct btrfs_file_extent_item *fi;
3026         u8 type;
3027         int ret = 0;
3028
3029         btrfs_init_path(&path);
3030         key.objectid = ino;
3031         key.type = BTRFS_EXTENT_DATA_KEY;
3032         key.offset = 0;
3033
3034         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3035         if (ret < 0) {
3036                 ret = 0;
3037                 goto out;
3038         }
3039         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3040                 ret = btrfs_next_leaf(root, &path);
3041                 if (ret) {
3042                         ret = 0;
3043                         goto out;
3044                 }
3045         }
3046         while (1) {
3047                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3048                                       path.slots[0]);
3049                 if (found_key.objectid != ino ||
3050                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3051                         break;
3052                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3053                                     struct btrfs_file_extent_item);
3054                 type = btrfs_file_extent_type(path.nodes[0], fi);
3055                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3056                         ret = 1;
3057                         goto out;
3058                 }
3059         }
3060 out:
3061         btrfs_release_path(&path);
3062         return ret;
3063 }
3064
3065 static u32 btrfs_type_to_imode(u8 type)
3066 {
3067         static u32 imode_by_btrfs_type[] = {
3068                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3069                 [BTRFS_FT_DIR]          = S_IFDIR,
3070                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3071                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3072                 [BTRFS_FT_FIFO]         = S_IFIFO,
3073                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3074                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3075         };
3076
3077         return imode_by_btrfs_type[(type)];
3078 }
3079
3080 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3081                                 struct btrfs_root *root,
3082                                 struct btrfs_path *path,
3083                                 struct inode_record *rec)
3084 {
3085         u8 filetype;
3086         u32 mode = 0700;
3087         int type_recovered = 0;
3088         int ret = 0;
3089
3090         printf("Trying to rebuild inode:%llu\n", rec->ino);
3091
3092         type_recovered = !find_file_type(rec, &filetype);
3093
3094         /*
3095          * Try to determine inode type if type not found.
3096          *
3097          * For found regular file extent, it must be FILE.
3098          * For found dir_item/index, it must be DIR.
3099          *
3100          * For undetermined one, use FILE as fallback.
3101          *
3102          * TODO:
3103          * 1. If found backref(inode_index/item is already handled) to it,
3104          *    it must be DIR.
3105          *    Need new inode-inode ref structure to allow search for that.
3106          */
3107         if (!type_recovered) {
3108                 if (rec->found_file_extent &&
3109                     find_normal_file_extent(root, rec->ino)) {
3110                         type_recovered = 1;
3111                         filetype = BTRFS_FT_REG_FILE;
3112                 } else if (rec->found_dir_item) {
3113                         type_recovered = 1;
3114                         filetype = BTRFS_FT_DIR;
3115                 } else if (!list_empty(&rec->orphan_extents)) {
3116                         type_recovered = 1;
3117                         filetype = BTRFS_FT_REG_FILE;
3118                 } else{
3119                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3120                                rec->ino);
3121                         type_recovered = 1;
3122                         filetype = BTRFS_FT_REG_FILE;
3123                 }
3124         }
3125
3126         ret = btrfs_new_inode(trans, root, rec->ino,
3127                               mode | btrfs_type_to_imode(filetype));
3128         if (ret < 0)
3129                 goto out;
3130
3131         /*
3132          * Here inode rebuild is done, we only rebuild the inode item,
3133          * don't repair the nlink(like move to lost+found).
3134          * That is the job of nlink repair.
3135          *
3136          * We just fill the record and return
3137          */
3138         rec->found_dir_item = 1;
3139         rec->imode = mode | btrfs_type_to_imode(filetype);
3140         rec->nlink = 0;
3141         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3142         /* Ensure the inode_nlinks repair function will be called */
3143         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3144 out:
3145         return ret;
3146 }
3147
3148 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3149                                       struct btrfs_root *root,
3150                                       struct btrfs_path *path,
3151                                       struct inode_record *rec)
3152 {
3153         struct orphan_data_extent *orphan;
3154         struct orphan_data_extent *tmp;
3155         int ret = 0;
3156
3157         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3158                 /*
3159                  * Check for conflicting file extents
3160                  *
3161                  * Here we don't know whether the extents is compressed or not,
3162                  * so we can only assume it not compressed nor data offset,
3163                  * and use its disk_len as extent length.
3164                  */
3165                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3166                                        orphan->offset, orphan->disk_len, 0);
3167                 btrfs_release_path(path);
3168                 if (ret < 0)
3169                         goto out;
3170                 if (!ret) {
3171                         fprintf(stderr,
3172                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3173                                 orphan->disk_bytenr, orphan->disk_len);
3174                         ret = btrfs_free_extent(trans,
3175                                         root->fs_info->extent_root,
3176                                         orphan->disk_bytenr, orphan->disk_len,
3177                                         0, root->objectid, orphan->objectid,
3178                                         orphan->offset);
3179                         if (ret < 0)
3180                                 goto out;
3181                 }
3182                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3183                                 orphan->offset, orphan->disk_bytenr,
3184                                 orphan->disk_len, orphan->disk_len);
3185                 if (ret < 0)
3186                         goto out;
3187
3188                 /* Update file size info */
3189                 rec->found_size += orphan->disk_len;
3190                 if (rec->found_size == rec->nbytes)
3191                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3192
3193                 /* Update the file extent hole info too */
3194                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3195                                            orphan->disk_len);
3196                 if (ret < 0)
3197                         goto out;
3198                 if (RB_EMPTY_ROOT(&rec->holes))
3199                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3200
3201                 list_del(&orphan->list);
3202                 free(orphan);
3203         }
3204         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3205 out:
3206         return ret;
3207 }
3208
3209 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3210                                         struct btrfs_root *root,
3211                                         struct btrfs_path *path,
3212                                         struct inode_record *rec)
3213 {
3214         struct rb_node *node;
3215         struct file_extent_hole *hole;
3216         int found = 0;
3217         int ret = 0;
3218
3219         node = rb_first(&rec->holes);
3220
3221         while (node) {
3222                 found = 1;
3223                 hole = rb_entry(node, struct file_extent_hole, node);
3224                 ret = btrfs_punch_hole(trans, root, rec->ino,
3225                                        hole->start, hole->len);
3226                 if (ret < 0)
3227                         goto out;
3228                 ret = del_file_extent_hole(&rec->holes, hole->start,
3229                                            hole->len);
3230                 if (ret < 0)
3231                         goto out;
3232                 if (RB_EMPTY_ROOT(&rec->holes))
3233                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3234                 node = rb_first(&rec->holes);
3235         }
3236         /* special case for a file losing all its file extent */
3237         if (!found) {
3238                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3239                                        round_up(rec->isize,
3240                                                 root->fs_info->sectorsize));
3241                 if (ret < 0)
3242                         goto out;
3243         }
3244         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3245                rec->ino, root->objectid);
3246 out:
3247         return ret;
3248 }
3249
3250 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3251 {
3252         struct btrfs_trans_handle *trans;
3253         struct btrfs_path path;
3254         int ret = 0;
3255
3256         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3257                              I_ERR_NO_ORPHAN_ITEM |
3258                              I_ERR_LINK_COUNT_WRONG |
3259                              I_ERR_NO_INODE_ITEM |
3260                              I_ERR_FILE_EXTENT_ORPHAN |
3261                              I_ERR_FILE_EXTENT_DISCOUNT|
3262                              I_ERR_FILE_NBYTES_WRONG)))
3263                 return rec->errors;
3264
3265         /*
3266          * For nlink repair, it may create a dir and add link, so
3267          * 2 for parent(256)'s dir_index and dir_item
3268          * 2 for lost+found dir's inode_item and inode_ref
3269          * 1 for the new inode_ref of the file
3270          * 2 for lost+found dir's dir_index and dir_item for the file
3271          */
3272         trans = btrfs_start_transaction(root, 7);
3273         if (IS_ERR(trans))
3274                 return PTR_ERR(trans);
3275
3276         btrfs_init_path(&path);
3277         if (rec->errors & I_ERR_NO_INODE_ITEM)
3278                 ret = repair_inode_no_item(trans, root, &path, rec);
3279         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3280                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3281         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3282                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3283         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3284                 ret = repair_inode_isize(trans, root, &path, rec);
3285         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3286                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3287         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3288                 ret = repair_inode_nlinks(trans, root, &path, rec);
3289         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3290                 ret = repair_inode_nbytes(trans, root, &path, rec);
3291         btrfs_commit_transaction(trans, root);
3292         btrfs_release_path(&path);
3293         return ret;
3294 }
3295
3296 static int check_inode_recs(struct btrfs_root *root,
3297                             struct cache_tree *inode_cache)
3298 {
3299         struct cache_extent *cache;
3300         struct ptr_node *node;
3301         struct inode_record *rec;
3302         struct inode_backref *backref;
3303         int stage = 0;
3304         int ret = 0;
3305         int err = 0;
3306         u64 error = 0;
3307         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3308
3309         if (btrfs_root_refs(&root->root_item) == 0) {
3310                 if (!cache_tree_empty(inode_cache))
3311                         fprintf(stderr, "warning line %d\n", __LINE__);
3312                 return 0;
3313         }
3314
3315         /*
3316          * We need to repair backrefs first because we could change some of the
3317          * errors in the inode recs.
3318          *
3319          * We also need to go through and delete invalid backrefs first and then
3320          * add the correct ones second.  We do this because we may get EEXIST
3321          * when adding back the correct index because we hadn't yet deleted the
3322          * invalid index.
3323          *
3324          * For example, if we were missing a dir index then the directories
3325          * isize would be wrong, so if we fixed the isize to what we thought it
3326          * would be and then fixed the backref we'd still have a invalid fs, so
3327          * we need to add back the dir index and then check to see if the isize
3328          * is still wrong.
3329          */
3330         while (stage < 3) {
3331                 stage++;
3332                 if (stage == 3 && !err)
3333                         break;
3334
3335                 cache = search_cache_extent(inode_cache, 0);
3336                 while (repair && cache) {
3337                         node = container_of(cache, struct ptr_node, cache);
3338                         rec = node->data;
3339                         cache = next_cache_extent(cache);
3340
3341                         /* Need to free everything up and rescan */
3342                         if (stage == 3) {
3343                                 remove_cache_extent(inode_cache, &node->cache);
3344                                 free(node);
3345                                 free_inode_rec(rec);
3346                                 continue;
3347                         }
3348
3349                         if (list_empty(&rec->backrefs))
3350                                 continue;
3351
3352                         ret = repair_inode_backrefs(root, rec, inode_cache,
3353                                                     stage == 1);
3354                         if (ret < 0) {
3355                                 err = ret;
3356                                 stage = 2;
3357                                 break;
3358                         } if (ret > 0) {
3359                                 err = -EAGAIN;
3360                         }
3361                 }
3362         }
3363         if (err)
3364                 return err;
3365
3366         rec = get_inode_rec(inode_cache, root_dirid, 0);
3367         BUG_ON(IS_ERR(rec));
3368         if (rec) {
3369                 ret = check_root_dir(rec);
3370                 if (ret) {
3371                         fprintf(stderr, "root %llu root dir %llu error\n",
3372                                 (unsigned long long)root->root_key.objectid,
3373                                 (unsigned long long)root_dirid);
3374                         print_inode_error(root, rec);
3375                         error++;
3376                 }
3377         } else {
3378                 if (repair) {
3379                         struct btrfs_trans_handle *trans;
3380
3381                         trans = btrfs_start_transaction(root, 1);
3382                         if (IS_ERR(trans)) {
3383                                 err = PTR_ERR(trans);
3384                                 return err;
3385                         }
3386
3387                         fprintf(stderr,
3388                                 "root %llu missing its root dir, recreating\n",
3389                                 (unsigned long long)root->objectid);
3390
3391                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3392                         BUG_ON(ret);
3393
3394                         btrfs_commit_transaction(trans, root);
3395                         return -EAGAIN;
3396                 }
3397
3398                 fprintf(stderr, "root %llu root dir %llu not found\n",
3399                         (unsigned long long)root->root_key.objectid,
3400                         (unsigned long long)root_dirid);
3401         }
3402
3403         while (1) {
3404                 cache = search_cache_extent(inode_cache, 0);
3405                 if (!cache)
3406                         break;
3407                 node = container_of(cache, struct ptr_node, cache);
3408                 rec = node->data;
3409                 remove_cache_extent(inode_cache, &node->cache);
3410                 free(node);
3411                 if (rec->ino == root_dirid ||
3412                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3413                         free_inode_rec(rec);
3414                         continue;
3415                 }
3416
3417                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3418                         ret = check_orphan_item(root, rec->ino);
3419                         if (ret == 0)
3420                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3421                         if (can_free_inode_rec(rec)) {
3422                                 free_inode_rec(rec);
3423                                 continue;
3424                         }
3425                 }
3426
3427                 if (!rec->found_inode_item)
3428                         rec->errors |= I_ERR_NO_INODE_ITEM;
3429                 if (rec->found_link != rec->nlink)
3430                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3431                 if (repair) {
3432                         ret = try_repair_inode(root, rec);
3433                         if (ret == 0 && can_free_inode_rec(rec)) {
3434                                 free_inode_rec(rec);
3435                                 continue;
3436                         }
3437                         ret = 0;
3438                 }
3439
3440                 if (!(repair && ret == 0))
3441                         error++;
3442                 print_inode_error(root, rec);
3443                 list_for_each_entry(backref, &rec->backrefs, list) {
3444                         if (!backref->found_dir_item)
3445                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3446                         if (!backref->found_dir_index)
3447                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3448                         if (!backref->found_inode_ref)
3449                                 backref->errors |= REF_ERR_NO_INODE_REF;
3450                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3451                                 " namelen %u name %s filetype %d errors %x",
3452                                 (unsigned long long)backref->dir,
3453                                 (unsigned long long)backref->index,
3454                                 backref->namelen, backref->name,
3455                                 backref->filetype, backref->errors);
3456                         print_ref_error(backref->errors);
3457                 }
3458                 free_inode_rec(rec);
3459         }
3460         return (error > 0) ? -1 : 0;
3461 }
3462
3463 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3464                                         u64 objectid)
3465 {
3466         struct cache_extent *cache;
3467         struct root_record *rec = NULL;
3468         int ret;
3469
3470         cache = lookup_cache_extent(root_cache, objectid, 1);
3471         if (cache) {
3472                 rec = container_of(cache, struct root_record, cache);
3473         } else {
3474                 rec = calloc(1, sizeof(*rec));
3475                 if (!rec)
3476                         return ERR_PTR(-ENOMEM);
3477                 rec->objectid = objectid;
3478                 INIT_LIST_HEAD(&rec->backrefs);
3479                 rec->cache.start = objectid;
3480                 rec->cache.size = 1;
3481
3482                 ret = insert_cache_extent(root_cache, &rec->cache);
3483                 if (ret)
3484                         return ERR_PTR(-EEXIST);
3485         }
3486         return rec;
3487 }
3488
3489 static struct root_backref *get_root_backref(struct root_record *rec,
3490                                              u64 ref_root, u64 dir, u64 index,
3491                                              const char *name, int namelen)
3492 {
3493         struct root_backref *backref;
3494
3495         list_for_each_entry(backref, &rec->backrefs, list) {
3496                 if (backref->ref_root != ref_root || backref->dir != dir ||
3497                     backref->namelen != namelen)
3498                         continue;
3499                 if (memcmp(name, backref->name, namelen))
3500                         continue;
3501                 return backref;
3502         }
3503
3504         backref = calloc(1, sizeof(*backref) + namelen + 1);
3505         if (!backref)
3506                 return NULL;
3507         backref->ref_root = ref_root;
3508         backref->dir = dir;
3509         backref->index = index;
3510         backref->namelen = namelen;
3511         memcpy(backref->name, name, namelen);
3512         backref->name[namelen] = '\0';
3513         list_add_tail(&backref->list, &rec->backrefs);
3514         return backref;
3515 }
3516
3517 static void free_root_record(struct cache_extent *cache)
3518 {
3519         struct root_record *rec;
3520         struct root_backref *backref;
3521
3522         rec = container_of(cache, struct root_record, cache);
3523         while (!list_empty(&rec->backrefs)) {
3524                 backref = to_root_backref(rec->backrefs.next);
3525                 list_del(&backref->list);
3526                 free(backref);
3527         }
3528
3529         free(rec);
3530 }
3531
3532 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3533
3534 static int add_root_backref(struct cache_tree *root_cache,
3535                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3536                             const char *name, int namelen,
3537                             int item_type, int errors)
3538 {
3539         struct root_record *rec;
3540         struct root_backref *backref;
3541
3542         rec = get_root_rec(root_cache, root_id);
3543         BUG_ON(IS_ERR(rec));
3544         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3545         BUG_ON(!backref);
3546
3547         backref->errors |= errors;
3548
3549         if (item_type != BTRFS_DIR_ITEM_KEY) {
3550                 if (backref->found_dir_index || backref->found_back_ref ||
3551                     backref->found_forward_ref) {
3552                         if (backref->index != index)
3553                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3554                 } else {
3555                         backref->index = index;
3556                 }
3557         }
3558
3559         if (item_type == BTRFS_DIR_ITEM_KEY) {
3560                 if (backref->found_forward_ref)
3561                         rec->found_ref++;
3562                 backref->found_dir_item = 1;
3563         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3564                 backref->found_dir_index = 1;
3565         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3566                 if (backref->found_forward_ref)
3567                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3568                 else if (backref->found_dir_item)
3569                         rec->found_ref++;
3570                 backref->found_forward_ref = 1;
3571         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3572                 if (backref->found_back_ref)
3573                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3574                 backref->found_back_ref = 1;
3575         } else {
3576                 BUG_ON(1);
3577         }
3578
3579         if (backref->found_forward_ref && backref->found_dir_item)
3580                 backref->reachable = 1;
3581         return 0;
3582 }
3583
3584 static int merge_root_recs(struct btrfs_root *root,
3585                            struct cache_tree *src_cache,
3586                            struct cache_tree *dst_cache)
3587 {
3588         struct cache_extent *cache;
3589         struct ptr_node *node;
3590         struct inode_record *rec;
3591         struct inode_backref *backref;
3592         int ret = 0;
3593
3594         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3595                 free_inode_recs_tree(src_cache);
3596                 return 0;
3597         }
3598
3599         while (1) {
3600                 cache = search_cache_extent(src_cache, 0);
3601                 if (!cache)
3602                         break;
3603                 node = container_of(cache, struct ptr_node, cache);
3604                 rec = node->data;
3605                 remove_cache_extent(src_cache, &node->cache);
3606                 free(node);
3607
3608                 ret = is_child_root(root, root->objectid, rec->ino);
3609                 if (ret < 0)
3610                         break;
3611                 else if (ret == 0)
3612                         goto skip;
3613
3614                 list_for_each_entry(backref, &rec->backrefs, list) {
3615                         BUG_ON(backref->found_inode_ref);
3616                         if (backref->found_dir_item)
3617                                 add_root_backref(dst_cache, rec->ino,
3618                                         root->root_key.objectid, backref->dir,
3619                                         backref->index, backref->name,
3620                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3621                                         backref->errors);
3622                         if (backref->found_dir_index)
3623                                 add_root_backref(dst_cache, rec->ino,
3624                                         root->root_key.objectid, backref->dir,
3625                                         backref->index, backref->name,
3626                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3627                                         backref->errors);
3628                 }
3629 skip:
3630                 free_inode_rec(rec);
3631         }
3632         if (ret < 0)
3633                 return ret;
3634         return 0;
3635 }
3636
3637 static int check_root_refs(struct btrfs_root *root,
3638                            struct cache_tree *root_cache)
3639 {
3640         struct root_record *rec;
3641         struct root_record *ref_root;
3642         struct root_backref *backref;
3643         struct cache_extent *cache;
3644         int loop = 1;
3645         int ret;
3646         int error;
3647         int errors = 0;
3648
3649         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3650         BUG_ON(IS_ERR(rec));
3651         rec->found_ref = 1;
3652
3653         /* fixme: this can not detect circular references */
3654         while (loop) {
3655                 loop = 0;
3656                 cache = search_cache_extent(root_cache, 0);
3657                 while (1) {
3658                         if (!cache)
3659                                 break;
3660                         rec = container_of(cache, struct root_record, cache);
3661                         cache = next_cache_extent(cache);
3662
3663                         if (rec->found_ref == 0)
3664                                 continue;
3665
3666                         list_for_each_entry(backref, &rec->backrefs, list) {
3667                                 if (!backref->reachable)
3668                                         continue;
3669
3670                                 ref_root = get_root_rec(root_cache,
3671                                                         backref->ref_root);
3672                                 BUG_ON(IS_ERR(ref_root));
3673                                 if (ref_root->found_ref > 0)
3674                                         continue;
3675
3676                                 backref->reachable = 0;
3677                                 rec->found_ref--;
3678                                 if (rec->found_ref == 0)
3679                                         loop = 1;
3680                         }
3681                 }
3682         }
3683
3684         cache = search_cache_extent(root_cache, 0);
3685         while (1) {
3686                 if (!cache)
3687                         break;
3688                 rec = container_of(cache, struct root_record, cache);
3689                 cache = next_cache_extent(cache);
3690
3691                 if (rec->found_ref == 0 &&
3692                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3693                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3694                         ret = check_orphan_item(root->fs_info->tree_root,
3695                                                 rec->objectid);
3696                         if (ret == 0)
3697                                 continue;
3698
3699                         /*
3700                          * If we don't have a root item then we likely just have
3701                          * a dir item in a snapshot for this root but no actual
3702                          * ref key or anything so it's meaningless.
3703                          */
3704                         if (!rec->found_root_item)
3705                                 continue;
3706                         errors++;
3707                         fprintf(stderr, "fs tree %llu not referenced\n",
3708                                 (unsigned long long)rec->objectid);
3709                 }
3710
3711                 error = 0;
3712                 if (rec->found_ref > 0 && !rec->found_root_item)
3713                         error = 1;
3714                 list_for_each_entry(backref, &rec->backrefs, list) {
3715                         if (!backref->found_dir_item)
3716                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3717                         if (!backref->found_dir_index)
3718                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3719                         if (!backref->found_back_ref)
3720                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3721                         if (!backref->found_forward_ref)
3722                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3723                         if (backref->reachable && backref->errors)
3724                                 error = 1;
3725                 }
3726                 if (!error)
3727                         continue;
3728
3729                 errors++;
3730                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3731                         (unsigned long long)rec->objectid, rec->found_ref,
3732                          rec->found_root_item ? "" : "not found");
3733
3734                 list_for_each_entry(backref, &rec->backrefs, list) {
3735                         if (!backref->reachable)
3736                                 continue;
3737                         if (!backref->errors && rec->found_root_item)
3738                                 continue;
3739                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3740                                 " index %llu namelen %u name %s errors %x\n",
3741                                 (unsigned long long)backref->ref_root,
3742                                 (unsigned long long)backref->dir,
3743                                 (unsigned long long)backref->index,
3744                                 backref->namelen, backref->name,
3745                                 backref->errors);
3746                         print_ref_error(backref->errors);
3747                 }
3748         }
3749         return errors > 0 ? 1 : 0;
3750 }
3751
3752 static int process_root_ref(struct extent_buffer *eb, int slot,
3753                             struct btrfs_key *key,
3754                             struct cache_tree *root_cache)
3755 {
3756         u64 dirid;
3757         u64 index;
3758         u32 len;
3759         u32 name_len;
3760         struct btrfs_root_ref *ref;
3761         char namebuf[BTRFS_NAME_LEN];
3762         int error;
3763
3764         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3765
3766         dirid = btrfs_root_ref_dirid(eb, ref);
3767         index = btrfs_root_ref_sequence(eb, ref);
3768         name_len = btrfs_root_ref_name_len(eb, ref);
3769
3770         if (name_len <= BTRFS_NAME_LEN) {
3771                 len = name_len;
3772                 error = 0;
3773         } else {
3774                 len = BTRFS_NAME_LEN;
3775                 error = REF_ERR_NAME_TOO_LONG;
3776         }
3777         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3778
3779         if (key->type == BTRFS_ROOT_REF_KEY) {
3780                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3781                                  index, namebuf, len, key->type, error);
3782         } else {
3783                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3784                                  index, namebuf, len, key->type, error);
3785         }
3786         return 0;
3787 }
3788
3789 static void free_corrupt_block(struct cache_extent *cache)
3790 {
3791         struct btrfs_corrupt_block *corrupt;
3792
3793         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3794         free(corrupt);
3795 }
3796
3797 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3798
3799 /*
3800  * Repair the btree of the given root.
3801  *
3802  * The fix is to remove the node key in corrupt_blocks cache_tree.
3803  * and rebalance the tree.
3804  * After the fix, the btree should be writeable.
3805  */
3806 static int repair_btree(struct btrfs_root *root,
3807                         struct cache_tree *corrupt_blocks)
3808 {
3809         struct btrfs_trans_handle *trans;
3810         struct btrfs_path path;
3811         struct btrfs_corrupt_block *corrupt;
3812         struct cache_extent *cache;
3813         struct btrfs_key key;
3814         u64 offset;
3815         int level;
3816         int ret = 0;
3817
3818         if (cache_tree_empty(corrupt_blocks))
3819                 return 0;
3820
3821         trans = btrfs_start_transaction(root, 1);
3822         if (IS_ERR(trans)) {
3823                 ret = PTR_ERR(trans);
3824                 fprintf(stderr, "Error starting transaction: %s\n",
3825                         strerror(-ret));
3826                 return ret;
3827         }
3828         btrfs_init_path(&path);
3829         cache = first_cache_extent(corrupt_blocks);
3830         while (cache) {
3831                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3832                                        cache);
3833                 level = corrupt->level;
3834                 path.lowest_level = level;
3835                 key.objectid = corrupt->key.objectid;
3836                 key.type = corrupt->key.type;
3837                 key.offset = corrupt->key.offset;
3838
3839                 /*
3840                  * Here we don't want to do any tree balance, since it may
3841                  * cause a balance with corrupted brother leaf/node,
3842                  * so ins_len set to 0 here.
3843                  * Balance will be done after all corrupt node/leaf is deleted.
3844                  */
3845                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3846                 if (ret < 0)
3847                         goto out;
3848                 offset = btrfs_node_blockptr(path.nodes[level],
3849                                              path.slots[level]);
3850
3851                 /* Remove the ptr */
3852                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3853                 if (ret < 0)
3854                         goto out;
3855                 /*
3856                  * Remove the corresponding extent
3857                  * return value is not concerned.
3858                  */
3859                 btrfs_release_path(&path);
3860                 ret = btrfs_free_extent(trans, root, offset,
3861                                 root->fs_info->nodesize, 0,
3862                                 root->root_key.objectid, level - 1, 0);
3863                 cache = next_cache_extent(cache);
3864         }
3865
3866         /* Balance the btree using btrfs_search_slot() */
3867         cache = first_cache_extent(corrupt_blocks);
3868         while (cache) {
3869                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3870                                        cache);
3871                 memcpy(&key, &corrupt->key, sizeof(key));
3872                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3873                 if (ret < 0)
3874                         goto out;
3875                 /* return will always >0 since it won't find the item */
3876                 ret = 0;
3877                 btrfs_release_path(&path);
3878                 cache = next_cache_extent(cache);
3879         }
3880 out:
3881         btrfs_commit_transaction(trans, root);
3882         btrfs_release_path(&path);
3883         return ret;
3884 }
3885
3886 static int check_fs_root(struct btrfs_root *root,
3887                          struct cache_tree *root_cache,
3888                          struct walk_control *wc)
3889 {
3890         int ret = 0;
3891         int err = 0;
3892         int wret;
3893         int level;
3894         struct btrfs_path path;
3895         struct shared_node root_node;
3896         struct root_record *rec;
3897         struct btrfs_root_item *root_item = &root->root_item;
3898         struct cache_tree corrupt_blocks;
3899         struct orphan_data_extent *orphan;
3900         struct orphan_data_extent *tmp;
3901         enum btrfs_tree_block_status status;
3902         struct node_refs nrefs;
3903
3904         /*
3905          * Reuse the corrupt_block cache tree to record corrupted tree block
3906          *
3907          * Unlike the usage in extent tree check, here we do it in a per
3908          * fs/subvol tree base.
3909          */
3910         cache_tree_init(&corrupt_blocks);
3911         root->fs_info->corrupt_blocks = &corrupt_blocks;
3912
3913         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3914                 rec = get_root_rec(root_cache, root->root_key.objectid);
3915                 BUG_ON(IS_ERR(rec));
3916                 if (btrfs_root_refs(root_item) > 0)
3917                         rec->found_root_item = 1;
3918         }
3919
3920         btrfs_init_path(&path);
3921         memset(&root_node, 0, sizeof(root_node));
3922         cache_tree_init(&root_node.root_cache);
3923         cache_tree_init(&root_node.inode_cache);
3924         memset(&nrefs, 0, sizeof(nrefs));
3925
3926         /* Move the orphan extent record to corresponding inode_record */
3927         list_for_each_entry_safe(orphan, tmp,
3928                                  &root->orphan_data_extents, list) {
3929                 struct inode_record *inode;
3930
3931                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3932                                       1);
3933                 BUG_ON(IS_ERR(inode));
3934                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3935                 list_move(&orphan->list, &inode->orphan_extents);
3936         }
3937
3938         level = btrfs_header_level(root->node);
3939         memset(wc->nodes, 0, sizeof(wc->nodes));
3940         wc->nodes[level] = &root_node;
3941         wc->active_node = level;
3942         wc->root_level = level;
3943
3944         /* We may not have checked the root block, lets do that now */
3945         if (btrfs_is_leaf(root->node))
3946                 status = btrfs_check_leaf(root, NULL, root->node);
3947         else
3948                 status = btrfs_check_node(root, NULL, root->node);
3949         if (status != BTRFS_TREE_BLOCK_CLEAN)
3950                 return -EIO;
3951
3952         if (btrfs_root_refs(root_item) > 0 ||
3953             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3954                 path.nodes[level] = root->node;
3955                 extent_buffer_get(root->node);
3956                 path.slots[level] = 0;
3957         } else {
3958                 struct btrfs_key key;
3959                 struct btrfs_disk_key found_key;
3960
3961                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3962                 level = root_item->drop_level;
3963                 path.lowest_level = level;
3964                 if (level > btrfs_header_level(root->node) ||
3965                     level >= BTRFS_MAX_LEVEL) {
3966                         error("ignoring invalid drop level: %u", level);
3967                         goto skip_walking;
3968                 }
3969                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3970                 if (wret < 0)
3971                         goto skip_walking;
3972                 btrfs_node_key(path.nodes[level], &found_key,
3973                                 path.slots[level]);
3974                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3975                                         sizeof(found_key)));
3976         }
3977
3978         while (1) {
3979                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3980                 if (wret < 0)
3981                         ret = wret;
3982                 if (wret != 0)
3983                         break;
3984
3985                 wret = walk_up_tree(root, &path, wc, &level);
3986                 if (wret < 0)
3987                         ret = wret;
3988                 if (wret != 0)
3989                         break;
3990         }
3991 skip_walking:
3992         btrfs_release_path(&path);
3993
3994         if (!cache_tree_empty(&corrupt_blocks)) {
3995                 struct cache_extent *cache;
3996                 struct btrfs_corrupt_block *corrupt;
3997
3998                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3999                        root->root_key.objectid);
4000                 cache = first_cache_extent(&corrupt_blocks);
4001                 while (cache) {
4002                         corrupt = container_of(cache,
4003                                                struct btrfs_corrupt_block,
4004                                                cache);
4005                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4006                                cache->start, corrupt->level,
4007                                corrupt->key.objectid, corrupt->key.type,
4008                                corrupt->key.offset);
4009                         cache = next_cache_extent(cache);
4010                 }
4011                 if (repair) {
4012                         printf("Try to repair the btree for root %llu\n",
4013                                root->root_key.objectid);
4014                         ret = repair_btree(root, &corrupt_blocks);
4015                         if (ret < 0)
4016                                 fprintf(stderr, "Failed to repair btree: %s\n",
4017                                         strerror(-ret));
4018                         if (!ret)
4019                                 printf("Btree for root %llu is fixed\n",
4020                                        root->root_key.objectid);
4021                 }
4022         }
4023
4024         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4025         if (err < 0)
4026                 ret = err;
4027
4028         if (root_node.current) {
4029                 root_node.current->checked = 1;
4030                 maybe_free_inode_rec(&root_node.inode_cache,
4031                                 root_node.current);
4032         }
4033
4034         err = check_inode_recs(root, &root_node.inode_cache);
4035         if (!ret)
4036                 ret = err;
4037
4038         free_corrupt_blocks_tree(&corrupt_blocks);
4039         root->fs_info->corrupt_blocks = NULL;
4040         free_orphan_data_extents(&root->orphan_data_extents);
4041         return ret;
4042 }
4043
4044 static int fs_root_objectid(u64 objectid)
4045 {
4046         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4047             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4048                 return 1;
4049         return is_fstree(objectid);
4050 }
4051
4052 static int check_fs_roots(struct btrfs_root *root,
4053                           struct cache_tree *root_cache)
4054 {
4055         struct btrfs_path path;
4056         struct btrfs_key key;
4057         struct walk_control wc;
4058         struct extent_buffer *leaf, *tree_node;
4059         struct btrfs_root *tmp_root;
4060         struct btrfs_root *tree_root = root->fs_info->tree_root;
4061         int ret;
4062         int err = 0;
4063
4064         if (ctx.progress_enabled) {
4065                 ctx.tp = TASK_FS_ROOTS;
4066                 task_start(ctx.info);
4067         }
4068
4069         /*
4070          * Just in case we made any changes to the extent tree that weren't
4071          * reflected into the free space cache yet.
4072          */
4073         if (repair)
4074                 reset_cached_block_groups(root->fs_info);
4075         memset(&wc, 0, sizeof(wc));
4076         cache_tree_init(&wc.shared);
4077         btrfs_init_path(&path);
4078
4079 again:
4080         key.offset = 0;
4081         key.objectid = 0;
4082         key.type = BTRFS_ROOT_ITEM_KEY;
4083         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4084         if (ret < 0) {
4085                 err = 1;
4086                 goto out;
4087         }
4088         tree_node = tree_root->node;
4089         while (1) {
4090                 if (tree_node != tree_root->node) {
4091                         free_root_recs_tree(root_cache);
4092                         btrfs_release_path(&path);
4093                         goto again;
4094                 }
4095                 leaf = path.nodes[0];
4096                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4097                         ret = btrfs_next_leaf(tree_root, &path);
4098                         if (ret) {
4099                                 if (ret < 0)
4100                                         err = 1;
4101                                 break;
4102                         }
4103                         leaf = path.nodes[0];
4104                 }
4105                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4106                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4107                     fs_root_objectid(key.objectid)) {
4108                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4109                                 tmp_root = btrfs_read_fs_root_no_cache(
4110                                                 root->fs_info, &key);
4111                         } else {
4112                                 key.offset = (u64)-1;
4113                                 tmp_root = btrfs_read_fs_root(
4114                                                 root->fs_info, &key);
4115                         }
4116                         if (IS_ERR(tmp_root)) {
4117                                 err = 1;
4118                                 goto next;
4119                         }
4120                         ret = check_fs_root(tmp_root, root_cache, &wc);
4121                         if (ret == -EAGAIN) {
4122                                 free_root_recs_tree(root_cache);
4123                                 btrfs_release_path(&path);
4124                                 goto again;
4125                         }
4126                         if (ret)
4127                                 err = 1;
4128                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4129                                 btrfs_free_fs_root(tmp_root);
4130                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4131                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4132                         process_root_ref(leaf, path.slots[0], &key,
4133                                          root_cache);
4134                 }
4135 next:
4136                 path.slots[0]++;
4137         }
4138 out:
4139         btrfs_release_path(&path);
4140         if (err)
4141                 free_extent_cache_tree(&wc.shared);
4142         if (!cache_tree_empty(&wc.shared))
4143                 fprintf(stderr, "warning line %d\n", __LINE__);
4144
4145         task_stop(ctx.info);
4146
4147         return err;
4148 }
4149
4150 /*
4151  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4152  * INODE_REF/INODE_EXTREF match.
4153  *
4154  * @root:       the root of the fs/file tree
4155  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4156  * @key:        the key of the DIR_ITEM/DIR_INDEX
4157  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4158  *              distinguish root_dir between normal dir/file
4159  * @name:       the name in the INODE_REF/INODE_EXTREF
4160  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4161  * @mode:       the st_mode of INODE_ITEM
4162  *
4163  * Return 0 if no error occurred.
4164  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4165  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4166  * dir/file.
4167  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4168  * not match for normal dir/file.
4169  */
4170 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4171                          struct btrfs_key *key, u64 index, char *name,
4172                          u32 namelen, u32 mode)
4173 {
4174         struct btrfs_path path;
4175         struct extent_buffer *node;
4176         struct btrfs_dir_item *di;
4177         struct btrfs_key location;
4178         char namebuf[BTRFS_NAME_LEN] = {0};
4179         u32 total;
4180         u32 cur = 0;
4181         u32 len;
4182         u32 name_len;
4183         u32 data_len;
4184         u8 filetype;
4185         int slot;
4186         int ret;
4187
4188         btrfs_init_path(&path);
4189         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4190         if (ret < 0) {
4191                 ret = DIR_ITEM_MISSING;
4192                 goto out;
4193         }
4194
4195         /* Process root dir and goto out*/
4196         if (index == 0) {
4197                 if (ret == 0) {
4198                         ret = ROOT_DIR_ERROR;
4199                         error(
4200                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4201                                 root->objectid,
4202                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4203                                         "REF" : "EXTREF",
4204                                 ref_key->objectid, ref_key->offset,
4205                                 key->type == BTRFS_DIR_ITEM_KEY ?
4206                                         "DIR_ITEM" : "DIR_INDEX");
4207                 } else {
4208                         ret = 0;
4209                 }
4210
4211                 goto out;
4212         }
4213
4214         /* Process normal file/dir */
4215         if (ret > 0) {
4216                 ret = DIR_ITEM_MISSING;
4217                 error(
4218                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4219                         root->objectid,
4220                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4221                         ref_key->objectid, ref_key->offset,
4222                         key->type == BTRFS_DIR_ITEM_KEY ?
4223                                 "DIR_ITEM" : "DIR_INDEX",
4224                         key->objectid, key->offset, namelen, name,
4225                         imode_to_type(mode));
4226                 goto out;
4227         }
4228
4229         /* Check whether inode_id/filetype/name match */
4230         node = path.nodes[0];
4231         slot = path.slots[0];
4232         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4233         total = btrfs_item_size_nr(node, slot);
4234         while (cur < total) {
4235                 ret = DIR_ITEM_MISMATCH;
4236                 name_len = btrfs_dir_name_len(node, di);
4237                 data_len = btrfs_dir_data_len(node, di);
4238
4239                 btrfs_dir_item_key_to_cpu(node, di, &location);
4240                 if (location.objectid != ref_key->objectid ||
4241                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4242                     location.offset != 0)
4243                         goto next;
4244
4245                 filetype = btrfs_dir_type(node, di);
4246                 if (imode_to_type(mode) != filetype)
4247                         goto next;
4248
4249                 if (cur + sizeof(*di) + name_len > total ||
4250                     name_len > BTRFS_NAME_LEN) {
4251                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4252                                 root->objectid,
4253                                 key->type == BTRFS_DIR_ITEM_KEY ?
4254                                 "DIR_ITEM" : "DIR_INDEX",
4255                                 key->objectid, key->offset, name_len);
4256
4257                         if (cur + sizeof(*di) > total)
4258                                 break;
4259                         len = min_t(u32, total - cur - sizeof(*di),
4260                                     BTRFS_NAME_LEN);
4261                 } else {
4262                         len = name_len;
4263                 }
4264
4265                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4266                 if (len != namelen || strncmp(namebuf, name, len))
4267                         goto next;
4268
4269                 ret = 0;
4270                 goto out;
4271 next:
4272                 len = sizeof(*di) + name_len + data_len;
4273                 di = (struct btrfs_dir_item *)((char *)di + len);
4274                 cur += len;
4275         }
4276         if (ret == DIR_ITEM_MISMATCH)
4277                 error(
4278                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4279                         root->objectid,
4280                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4281                         ref_key->objectid, ref_key->offset,
4282                         key->type == BTRFS_DIR_ITEM_KEY ?
4283                                 "DIR_ITEM" : "DIR_INDEX",
4284                         key->objectid, key->offset, namelen, name,
4285                         imode_to_type(mode));
4286 out:
4287         btrfs_release_path(&path);
4288         return ret;
4289 }
4290
4291 /*
4292  * Traverse the given INODE_REF and call find_dir_item() to find related
4293  * DIR_ITEM/DIR_INDEX.
4294  *
4295  * @root:       the root of the fs/file tree
4296  * @ref_key:    the key of the INODE_REF
4297  * @refs:       the count of INODE_REF
4298  * @mode:       the st_mode of INODE_ITEM
4299  *
4300  * Return 0 if no error occurred.
4301  */
4302 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4303                            struct extent_buffer *node, int slot, u64 *refs,
4304                            int mode)
4305 {
4306         struct btrfs_key key;
4307         struct btrfs_inode_ref *ref;
4308         char namebuf[BTRFS_NAME_LEN] = {0};
4309         u32 total;
4310         u32 cur = 0;
4311         u32 len;
4312         u32 name_len;
4313         u64 index;
4314         int ret, err = 0;
4315
4316         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4317         total = btrfs_item_size_nr(node, slot);
4318
4319 next:
4320         /* Update inode ref count */
4321         (*refs)++;
4322
4323         index = btrfs_inode_ref_index(node, ref);
4324         name_len = btrfs_inode_ref_name_len(node, ref);
4325         if (cur + sizeof(*ref) + name_len > total ||
4326             name_len > BTRFS_NAME_LEN) {
4327                 warning("root %llu INODE_REF[%llu %llu] name too long",
4328                         root->objectid, ref_key->objectid, ref_key->offset);
4329
4330                 if (total < cur + sizeof(*ref))
4331                         goto out;
4332                 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4333         } else {
4334                 len = name_len;
4335         }
4336
4337         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4338
4339         /* Check root dir ref name */
4340         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4341                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4342                       root->objectid, ref_key->objectid, ref_key->offset,
4343                       namebuf);
4344                 err |= ROOT_DIR_ERROR;
4345         }
4346
4347         /* Find related DIR_INDEX */
4348         key.objectid = ref_key->offset;
4349         key.type = BTRFS_DIR_INDEX_KEY;
4350         key.offset = index;
4351         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4352         err |= ret;
4353
4354         /* Find related dir_item */
4355         key.objectid = ref_key->offset;
4356         key.type = BTRFS_DIR_ITEM_KEY;
4357         key.offset = btrfs_name_hash(namebuf, len);
4358         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4359         err |= ret;
4360
4361         len = sizeof(*ref) + name_len;
4362         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4363         cur += len;
4364         if (cur < total)
4365                 goto next;
4366
4367 out:
4368         return err;
4369 }
4370
4371 /*
4372  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4373  * DIR_ITEM/DIR_INDEX.
4374  *
4375  * @root:       the root of the fs/file tree
4376  * @ref_key:    the key of the INODE_EXTREF
4377  * @refs:       the count of INODE_EXTREF
4378  * @mode:       the st_mode of INODE_ITEM
4379  *
4380  * Return 0 if no error occurred.
4381  */
4382 static int check_inode_extref(struct btrfs_root *root,
4383                               struct btrfs_key *ref_key,
4384                               struct extent_buffer *node, int slot, u64 *refs,
4385                               int mode)
4386 {
4387         struct btrfs_key key;
4388         struct btrfs_inode_extref *extref;
4389         char namebuf[BTRFS_NAME_LEN] = {0};
4390         u32 total;
4391         u32 cur = 0;
4392         u32 len;
4393         u32 name_len;
4394         u64 index;
4395         u64 parent;
4396         int ret;
4397         int err = 0;
4398
4399         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4400         total = btrfs_item_size_nr(node, slot);
4401
4402 next:
4403         /* update inode ref count */
4404         (*refs)++;
4405         name_len = btrfs_inode_extref_name_len(node, extref);
4406         index = btrfs_inode_extref_index(node, extref);
4407         parent = btrfs_inode_extref_parent(node, extref);
4408         if (name_len <= BTRFS_NAME_LEN) {
4409                 len = name_len;
4410         } else {
4411                 len = BTRFS_NAME_LEN;
4412                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4413                         root->objectid, ref_key->objectid, ref_key->offset);
4414         }
4415         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4416
4417         /* Check root dir ref name */
4418         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4419                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4420                       root->objectid, ref_key->objectid, ref_key->offset,
4421                       namebuf);
4422                 err |= ROOT_DIR_ERROR;
4423         }
4424
4425         /* find related dir_index */
4426         key.objectid = parent;
4427         key.type = BTRFS_DIR_INDEX_KEY;
4428         key.offset = index;
4429         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4430         err |= ret;
4431
4432         /* find related dir_item */
4433         key.objectid = parent;
4434         key.type = BTRFS_DIR_ITEM_KEY;
4435         key.offset = btrfs_name_hash(namebuf, len);
4436         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4437         err |= ret;
4438
4439         len = sizeof(*extref) + name_len;
4440         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4441         cur += len;
4442
4443         if (cur < total)
4444                 goto next;
4445
4446         return err;
4447 }
4448
4449 /*
4450  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4451  * DIR_ITEM/DIR_INDEX match.
4452  *
4453  * @root:       the root of the fs/file tree
4454  * @key:        the key of the INODE_REF/INODE_EXTREF
4455  * @name:       the name in the INODE_REF/INODE_EXTREF
4456  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4457  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4458  * to (u64)-1
4459  * @ext_ref:    the EXTENDED_IREF feature
4460  *
4461  * Return 0 if no error occurred.
4462  * Return >0 for error bitmap
4463  */
4464 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4465                           char *name, int namelen, u64 index,
4466                           unsigned int ext_ref)
4467 {
4468         struct btrfs_path path;
4469         struct btrfs_inode_ref *ref;
4470         struct btrfs_inode_extref *extref;
4471         struct extent_buffer *node;
4472         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4473         u32 total;
4474         u32 cur = 0;
4475         u32 len;
4476         u32 ref_namelen;
4477         u64 ref_index;
4478         u64 parent;
4479         u64 dir_id;
4480         int slot;
4481         int ret;
4482
4483         btrfs_init_path(&path);
4484         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4485         if (ret) {
4486                 ret = INODE_REF_MISSING;
4487                 goto extref;
4488         }
4489
4490         node = path.nodes[0];
4491         slot = path.slots[0];
4492
4493         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4494         total = btrfs_item_size_nr(node, slot);
4495
4496         /* Iterate all entry of INODE_REF */
4497         while (cur < total) {
4498                 ret = INODE_REF_MISSING;
4499
4500                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4501                 ref_index = btrfs_inode_ref_index(node, ref);
4502                 if (index != (u64)-1 && index != ref_index)
4503                         goto next_ref;
4504
4505                 if (cur + sizeof(*ref) + ref_namelen > total ||
4506                     ref_namelen > BTRFS_NAME_LEN) {
4507                         warning("root %llu INODE %s[%llu %llu] name too long",
4508                                 root->objectid,
4509                                 key->type == BTRFS_INODE_REF_KEY ?
4510                                         "REF" : "EXTREF",
4511                                 key->objectid, key->offset);
4512
4513                         if (cur + sizeof(*ref) > total)
4514                                 break;
4515                         len = min_t(u32, total - cur - sizeof(*ref),
4516                                     BTRFS_NAME_LEN);
4517                 } else {
4518                         len = ref_namelen;
4519                 }
4520
4521                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4522                                    len);
4523
4524                 if (len != namelen || strncmp(ref_namebuf, name, len))
4525                         goto next_ref;
4526
4527                 ret = 0;
4528                 goto out;
4529 next_ref:
4530                 len = sizeof(*ref) + ref_namelen;
4531                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4532                 cur += len;
4533         }
4534
4535 extref:
4536         /* Skip if not support EXTENDED_IREF feature */
4537         if (!ext_ref)
4538                 goto out;
4539
4540         btrfs_release_path(&path);
4541         btrfs_init_path(&path);
4542
4543         dir_id = key->offset;
4544         key->type = BTRFS_INODE_EXTREF_KEY;
4545         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4546
4547         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4548         if (ret) {
4549                 ret = INODE_REF_MISSING;
4550                 goto out;
4551         }
4552
4553         node = path.nodes[0];
4554         slot = path.slots[0];
4555
4556         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4557         cur = 0;
4558         total = btrfs_item_size_nr(node, slot);
4559
4560         /* Iterate all entry of INODE_EXTREF */
4561         while (cur < total) {
4562                 ret = INODE_REF_MISSING;
4563
4564                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4565                 ref_index = btrfs_inode_extref_index(node, extref);
4566                 parent = btrfs_inode_extref_parent(node, extref);
4567                 if (index != (u64)-1 && index != ref_index)
4568                         goto next_extref;
4569
4570                 if (parent != dir_id)
4571                         goto next_extref;
4572
4573                 if (ref_namelen <= BTRFS_NAME_LEN) {
4574                         len = ref_namelen;
4575                 } else {
4576                         len = BTRFS_NAME_LEN;
4577                         warning("root %llu INODE %s[%llu %llu] name too long",
4578                                 root->objectid,
4579                                 key->type == BTRFS_INODE_REF_KEY ?
4580                                         "REF" : "EXTREF",
4581                                 key->objectid, key->offset);
4582                 }
4583                 read_extent_buffer(node, ref_namebuf,
4584                                    (unsigned long)(extref + 1), len);
4585
4586                 if (len != namelen || strncmp(ref_namebuf, name, len))
4587                         goto next_extref;
4588
4589                 ret = 0;
4590                 goto out;
4591
4592 next_extref:
4593                 len = sizeof(*extref) + ref_namelen;
4594                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4595                 cur += len;
4596
4597         }
4598 out:
4599         btrfs_release_path(&path);
4600         return ret;
4601 }
4602
4603 /*
4604  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4605  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4606  *
4607  * @root:       the root of the fs/file tree
4608  * @key:        the key of the INODE_REF/INODE_EXTREF
4609  * @size:       the st_size of the INODE_ITEM
4610  * @ext_ref:    the EXTENDED_IREF feature
4611  *
4612  * Return 0 if no error occurred.
4613  */
4614 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4615                           struct extent_buffer *node, int slot, u64 *size,
4616                           unsigned int ext_ref)
4617 {
4618         struct btrfs_dir_item *di;
4619         struct btrfs_inode_item *ii;
4620         struct btrfs_path path;
4621         struct btrfs_key location;
4622         char namebuf[BTRFS_NAME_LEN] = {0};
4623         u32 total;
4624         u32 cur = 0;
4625         u32 len;
4626         u32 name_len;
4627         u32 data_len;
4628         u8 filetype;
4629         u32 mode;
4630         u64 index;
4631         int ret;
4632         int err = 0;
4633
4634         /*
4635          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4636          * ignore index check.
4637          */
4638         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4639
4640         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4641         total = btrfs_item_size_nr(node, slot);
4642
4643         while (cur < total) {
4644                 data_len = btrfs_dir_data_len(node, di);
4645                 if (data_len)
4646                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4647                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4648                               "DIR_ITEM" : "DIR_INDEX",
4649                               key->objectid, key->offset, data_len);
4650
4651                 name_len = btrfs_dir_name_len(node, di);
4652                 if (cur + sizeof(*di) + name_len > total ||
4653                     name_len > BTRFS_NAME_LEN) {
4654                         warning("root %llu %s[%llu %llu] name too long",
4655                                 root->objectid,
4656                                 key->type == BTRFS_DIR_ITEM_KEY ?
4657                                 "DIR_ITEM" : "DIR_INDEX",
4658                                 key->objectid, key->offset);
4659
4660                         if (cur + sizeof(*di) > total)
4661                                 break;
4662                         len = min_t(u32, total - cur - sizeof(*di),
4663                                     BTRFS_NAME_LEN);
4664                 } else {
4665                         len = name_len;
4666                 }
4667                 (*size) += name_len;
4668
4669                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4670                 filetype = btrfs_dir_type(node, di);
4671
4672                 btrfs_init_path(&path);
4673                 btrfs_dir_item_key_to_cpu(node, di, &location);
4674
4675                 /* Ignore related ROOT_ITEM check */
4676                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4677                         goto next;
4678
4679                 /* Check relative INODE_ITEM(existence/filetype) */
4680                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4681                 if (ret) {
4682                         err |= INODE_ITEM_MISSING;
4683                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4684                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4685                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4686                               key->offset, location.objectid, name_len,
4687                               namebuf, filetype);
4688                         goto next;
4689                 }
4690
4691                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4692                                     struct btrfs_inode_item);
4693                 mode = btrfs_inode_mode(path.nodes[0], ii);
4694
4695                 if (imode_to_type(mode) != filetype) {
4696                         err |= INODE_ITEM_MISMATCH;
4697                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4698                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4699                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4700                               key->offset, name_len, namebuf, filetype);
4701                 }
4702
4703                 /* Check relative INODE_REF/INODE_EXTREF */
4704                 location.type = BTRFS_INODE_REF_KEY;
4705                 location.offset = key->objectid;
4706                 ret = find_inode_ref(root, &location, namebuf, len,
4707                                        index, ext_ref);
4708                 err |= ret;
4709                 if (ret & INODE_REF_MISSING)
4710                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4711                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4712                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4713                               key->offset, name_len, namebuf, filetype);
4714
4715 next:
4716                 btrfs_release_path(&path);
4717                 len = sizeof(*di) + name_len + data_len;
4718                 di = (struct btrfs_dir_item *)((char *)di + len);
4719                 cur += len;
4720
4721                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4722                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4723                               root->objectid, key->objectid, key->offset);
4724                         break;
4725                 }
4726         }
4727
4728         return err;
4729 }
4730
4731 /*
4732  * Check file extent datasum/hole, update the size of the file extents,
4733  * check and update the last offset of the file extent.
4734  *
4735  * @root:       the root of fs/file tree.
4736  * @fkey:       the key of the file extent.
4737  * @nodatasum:  INODE_NODATASUM feature.
4738  * @size:       the sum of all EXTENT_DATA items size for this inode.
4739  * @end:        the offset of the last extent.
4740  *
4741  * Return 0 if no error occurred.
4742  */
4743 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4744                              struct extent_buffer *node, int slot,
4745                              unsigned int nodatasum, u64 *size, u64 *end)
4746 {
4747         struct btrfs_file_extent_item *fi;
4748         u64 disk_bytenr;
4749         u64 disk_num_bytes;
4750         u64 extent_num_bytes;
4751         u64 extent_offset;
4752         u64 csum_found;         /* In byte size, sectorsize aligned */
4753         u64 search_start;       /* Logical range start we search for csum */
4754         u64 search_len;         /* Logical range len we search for csum */
4755         unsigned int extent_type;
4756         unsigned int is_hole;
4757         int compressed = 0;
4758         int ret;
4759         int err = 0;
4760
4761         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4762
4763         /* Check inline extent */
4764         extent_type = btrfs_file_extent_type(node, fi);
4765         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4766                 struct btrfs_item *e = btrfs_item_nr(slot);
4767                 u32 item_inline_len;
4768
4769                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4770                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4771                 compressed = btrfs_file_extent_compression(node, fi);
4772                 if (extent_num_bytes == 0) {
4773                         error(
4774                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4775                                 root->objectid, fkey->objectid, fkey->offset);
4776                         err |= FILE_EXTENT_ERROR;
4777                 }
4778                 if (!compressed && extent_num_bytes != item_inline_len) {
4779                         error(
4780                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4781                                 root->objectid, fkey->objectid, fkey->offset,
4782                                 extent_num_bytes, item_inline_len);
4783                         err |= FILE_EXTENT_ERROR;
4784                 }
4785                 *end += extent_num_bytes;
4786                 *size += extent_num_bytes;
4787                 return err;
4788         }
4789
4790         /* Check extent type */
4791         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4792                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4793                 err |= FILE_EXTENT_ERROR;
4794                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4795                       root->objectid, fkey->objectid, fkey->offset);
4796                 return err;
4797         }
4798
4799         /* Check REG_EXTENT/PREALLOC_EXTENT */
4800         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4801         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4802         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4803         extent_offset = btrfs_file_extent_offset(node, fi);
4804         compressed = btrfs_file_extent_compression(node, fi);
4805         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4806
4807         /*
4808          * Check EXTENT_DATA csum
4809          *
4810          * For plain (uncompressed) extent, we should only check the range
4811          * we're referring to, as it's possible that part of prealloc extent
4812          * has been written, and has csum:
4813          *
4814          * |<--- Original large preallocated extent A ---->|
4815          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4816          *      No csum                         Has csum
4817          *
4818          * For compressed extent, we should check the whole range.
4819          */
4820         if (!compressed) {
4821                 search_start = disk_bytenr + extent_offset;
4822                 search_len = extent_num_bytes;
4823         } else {
4824                 search_start = disk_bytenr;
4825                 search_len = disk_num_bytes;
4826         }
4827         ret = count_csum_range(root, search_start, search_len, &csum_found);
4828         if (csum_found > 0 && nodatasum) {
4829                 err |= ODD_CSUM_ITEM;
4830                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4831                       root->objectid, fkey->objectid, fkey->offset);
4832         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4833                    !is_hole && (ret < 0 || csum_found < search_len)) {
4834                 err |= CSUM_ITEM_MISSING;
4835                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4836                       root->objectid, fkey->objectid, fkey->offset,
4837                       csum_found, search_len);
4838         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4839                 err |= ODD_CSUM_ITEM;
4840                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4841                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4842         }
4843
4844         /* Check EXTENT_DATA hole */
4845         if (!no_holes && *end != fkey->offset) {
4846                 err |= FILE_EXTENT_ERROR;
4847                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4848                       root->objectid, fkey->objectid, fkey->offset);
4849         }
4850
4851         *end += extent_num_bytes;
4852         if (!is_hole)
4853                 *size += extent_num_bytes;
4854
4855         return err;
4856 }
4857
4858 /*
4859  * Check INODE_ITEM and related ITEMs (the same inode number)
4860  * 1. check link count
4861  * 2. check inode ref/extref
4862  * 3. check dir item/index
4863  *
4864  * @ext_ref:    the EXTENDED_IREF feature
4865  *
4866  * Return 0 if no error occurred.
4867  * Return >0 for error or hit the traversal is done(by error bitmap)
4868  */
4869 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4870                             unsigned int ext_ref)
4871 {
4872         struct extent_buffer *node;
4873         struct btrfs_inode_item *ii;
4874         struct btrfs_key key;
4875         u64 inode_id;
4876         u32 mode;
4877         u64 nlink;
4878         u64 nbytes;
4879         u64 isize;
4880         u64 size = 0;
4881         u64 refs = 0;
4882         u64 extent_end = 0;
4883         u64 extent_size = 0;
4884         unsigned int dir;
4885         unsigned int nodatasum;
4886         int slot;
4887         int ret;
4888         int err = 0;
4889
4890         node = path->nodes[0];
4891         slot = path->slots[0];
4892
4893         btrfs_item_key_to_cpu(node, &key, slot);
4894         inode_id = key.objectid;
4895
4896         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4897                 ret = btrfs_next_item(root, path);
4898                 if (ret > 0)
4899                         err |= LAST_ITEM;
4900                 return err;
4901         }
4902
4903         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4904         isize = btrfs_inode_size(node, ii);
4905         nbytes = btrfs_inode_nbytes(node, ii);
4906         mode = btrfs_inode_mode(node, ii);
4907         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4908         nlink = btrfs_inode_nlink(node, ii);
4909         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4910
4911         while (1) {
4912                 ret = btrfs_next_item(root, path);
4913                 if (ret < 0) {
4914                         /* out will fill 'err' rusing current statistics */
4915                         goto out;
4916                 } else if (ret > 0) {
4917                         err |= LAST_ITEM;
4918                         goto out;
4919                 }
4920
4921                 node = path->nodes[0];
4922                 slot = path->slots[0];
4923                 btrfs_item_key_to_cpu(node, &key, slot);
4924                 if (key.objectid != inode_id)
4925                         goto out;
4926
4927                 switch (key.type) {
4928                 case BTRFS_INODE_REF_KEY:
4929                         ret = check_inode_ref(root, &key, node, slot, &refs,
4930                                               mode);
4931                         err |= ret;
4932                         break;
4933                 case BTRFS_INODE_EXTREF_KEY:
4934                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4935                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4936                                         root->objectid, key.objectid,
4937                                         key.offset);
4938                         ret = check_inode_extref(root, &key, node, slot, &refs,
4939                                                  mode);
4940                         err |= ret;
4941                         break;
4942                 case BTRFS_DIR_ITEM_KEY:
4943                 case BTRFS_DIR_INDEX_KEY:
4944                         if (!dir) {
4945                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4946                                         root->objectid, inode_id,
4947                                         imode_to_type(mode), key.objectid,
4948                                         key.offset);
4949                         }
4950                         ret = check_dir_item(root, &key, node, slot, &size,
4951                                              ext_ref);
4952                         err |= ret;
4953                         break;
4954                 case BTRFS_EXTENT_DATA_KEY:
4955                         if (dir) {
4956                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4957                                         root->objectid, inode_id, key.objectid,
4958                                         key.offset);
4959                         }
4960                         ret = check_file_extent(root, &key, node, slot,
4961                                                 nodatasum, &extent_size,
4962                                                 &extent_end);
4963                         err |= ret;
4964                         break;
4965                 case BTRFS_XATTR_ITEM_KEY:
4966                         break;
4967                 default:
4968                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4969                               key.objectid, key.type, key.offset);
4970                 }
4971         }
4972
4973 out:
4974         /* verify INODE_ITEM nlink/isize/nbytes */
4975         if (dir) {
4976                 if (nlink != 1) {
4977                         err |= LINK_COUNT_ERROR;
4978                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4979                               root->objectid, inode_id, nlink);
4980                 }
4981
4982                 /*
4983                  * Just a warning, as dir inode nbytes is just an
4984                  * instructive value.
4985                  */
4986                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
4987                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4988                                 root->objectid, inode_id,
4989                                 root->fs_info->nodesize);
4990                 }
4991
4992                 if (isize != size) {
4993                         err |= ISIZE_ERROR;
4994                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4995                               root->objectid, inode_id, isize, size);
4996                 }
4997         } else {
4998                 if (nlink != refs) {
4999                         err |= LINK_COUNT_ERROR;
5000                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5001                               root->objectid, inode_id, nlink, refs);
5002                 } else if (!nlink) {
5003                         err |= ORPHAN_ITEM;
5004                 }
5005
5006                 if (!nbytes && !no_holes && extent_end < isize) {
5007                         err |= NBYTES_ERROR;
5008                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5009                               root->objectid, inode_id, isize);
5010                 }
5011
5012                 if (nbytes != extent_size) {
5013                         err |= NBYTES_ERROR;
5014                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5015                               root->objectid, inode_id, nbytes, extent_size);
5016                 }
5017         }
5018
5019         return err;
5020 }
5021
5022 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5023 {
5024         struct btrfs_path path;
5025         struct btrfs_key key;
5026         int err = 0;
5027         int ret;
5028
5029         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5030         key.type = BTRFS_INODE_ITEM_KEY;
5031         key.offset = 0;
5032
5033         /* For root being dropped, we don't need to check first inode */
5034         if (btrfs_root_refs(&root->root_item) == 0 &&
5035             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5036             key.objectid)
5037                 return 0;
5038
5039         btrfs_init_path(&path);
5040
5041         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5042         if (ret < 0)
5043                 goto out;
5044         if (ret > 0) {
5045                 ret = 0;
5046                 err |= INODE_ITEM_MISSING;
5047                 error("first inode item of root %llu is missing",
5048                       root->objectid);
5049         }
5050
5051         err |= check_inode_item(root, &path, ext_ref);
5052         err &= ~LAST_ITEM;
5053         if (err && !ret)
5054                 ret = -EIO;
5055 out:
5056         btrfs_release_path(&path);
5057         return ret;
5058 }
5059
5060 /*
5061  * Iterate all item on the tree and call check_inode_item() to check.
5062  *
5063  * @root:       the root of the tree to be checked.
5064  * @ext_ref:    the EXTENDED_IREF feature
5065  *
5066  * Return 0 if no error found.
5067  * Return <0 for error.
5068  */
5069 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5070 {
5071         struct btrfs_path path;
5072         struct node_refs nrefs;
5073         struct btrfs_root_item *root_item = &root->root_item;
5074         int ret;
5075         int level;
5076         int err = 0;
5077
5078         /*
5079          * We need to manually check the first inode item(256)
5080          * As the following traversal function will only start from
5081          * the first inode item in the leaf, if inode item(256) is missing
5082          * we will just skip it forever.
5083          */
5084         ret = check_fs_first_inode(root, ext_ref);
5085         if (ret < 0)
5086                 return ret;
5087
5088         memset(&nrefs, 0, sizeof(nrefs));
5089         level = btrfs_header_level(root->node);
5090         btrfs_init_path(&path);
5091
5092         if (btrfs_root_refs(root_item) > 0 ||
5093             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5094                 path.nodes[level] = root->node;
5095                 path.slots[level] = 0;
5096                 extent_buffer_get(root->node);
5097         } else {
5098                 struct btrfs_key key;
5099
5100                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5101                 level = root_item->drop_level;
5102                 path.lowest_level = level;
5103                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5104                 if (ret < 0)
5105                         goto out;
5106                 ret = 0;
5107         }
5108
5109         while (1) {
5110                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5111                 err |= !!ret;
5112
5113                 /* if ret is negative, walk shall stop */
5114                 if (ret < 0) {
5115                         ret = err;
5116                         break;
5117                 }
5118
5119                 ret = walk_up_tree_v2(root, &path, &level);
5120                 if (ret != 0) {
5121                         /* Normal exit, reset ret to err */
5122                         ret = err;
5123                         break;
5124                 }
5125         }
5126
5127 out:
5128         btrfs_release_path(&path);
5129         return ret;
5130 }
5131
5132 /*
5133  * Find the relative ref for root_ref and root_backref.
5134  *
5135  * @root:       the root of the root tree.
5136  * @ref_key:    the key of the root ref.
5137  *
5138  * Return 0 if no error occurred.
5139  */
5140 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5141                           struct extent_buffer *node, int slot)
5142 {
5143         struct btrfs_path path;
5144         struct btrfs_key key;
5145         struct btrfs_root_ref *ref;
5146         struct btrfs_root_ref *backref;
5147         char ref_name[BTRFS_NAME_LEN] = {0};
5148         char backref_name[BTRFS_NAME_LEN] = {0};
5149         u64 ref_dirid;
5150         u64 ref_seq;
5151         u32 ref_namelen;
5152         u64 backref_dirid;
5153         u64 backref_seq;
5154         u32 backref_namelen;
5155         u32 len;
5156         int ret;
5157         int err = 0;
5158
5159         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5160         ref_dirid = btrfs_root_ref_dirid(node, ref);
5161         ref_seq = btrfs_root_ref_sequence(node, ref);
5162         ref_namelen = btrfs_root_ref_name_len(node, ref);
5163
5164         if (ref_namelen <= BTRFS_NAME_LEN) {
5165                 len = ref_namelen;
5166         } else {
5167                 len = BTRFS_NAME_LEN;
5168                 warning("%s[%llu %llu] ref_name too long",
5169                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5170                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5171                         ref_key->offset);
5172         }
5173         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5174
5175         /* Find relative root_ref */
5176         key.objectid = ref_key->offset;
5177         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5178         key.offset = ref_key->objectid;
5179
5180         btrfs_init_path(&path);
5181         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5182         if (ret) {
5183                 err |= ROOT_REF_MISSING;
5184                 error("%s[%llu %llu] couldn't find relative ref",
5185                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5186                       "ROOT_REF" : "ROOT_BACKREF",
5187                       ref_key->objectid, ref_key->offset);
5188                 goto out;
5189         }
5190
5191         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5192                                  struct btrfs_root_ref);
5193         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5194         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5195         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5196
5197         if (backref_namelen <= BTRFS_NAME_LEN) {
5198                 len = backref_namelen;
5199         } else {
5200                 len = BTRFS_NAME_LEN;
5201                 warning("%s[%llu %llu] ref_name too long",
5202                         key.type == BTRFS_ROOT_REF_KEY ?
5203                         "ROOT_REF" : "ROOT_BACKREF",
5204                         key.objectid, key.offset);
5205         }
5206         read_extent_buffer(path.nodes[0], backref_name,
5207                            (unsigned long)(backref + 1), len);
5208
5209         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5210             ref_namelen != backref_namelen ||
5211             strncmp(ref_name, backref_name, len)) {
5212                 err |= ROOT_REF_MISMATCH;
5213                 error("%s[%llu %llu] mismatch relative ref",
5214                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5215                       "ROOT_REF" : "ROOT_BACKREF",
5216                       ref_key->objectid, ref_key->offset);
5217         }
5218 out:
5219         btrfs_release_path(&path);
5220         return err;
5221 }
5222
5223 /*
5224  * Check all fs/file tree in low_memory mode.
5225  *
5226  * 1. for fs tree root item, call check_fs_root_v2()
5227  * 2. for fs tree root ref/backref, call check_root_ref()
5228  *
5229  * Return 0 if no error occurred.
5230  */
5231 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5232 {
5233         struct btrfs_root *tree_root = fs_info->tree_root;
5234         struct btrfs_root *cur_root = NULL;
5235         struct btrfs_path path;
5236         struct btrfs_key key;
5237         struct extent_buffer *node;
5238         unsigned int ext_ref;
5239         int slot;
5240         int ret;
5241         int err = 0;
5242
5243         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5244
5245         btrfs_init_path(&path);
5246         key.objectid = BTRFS_FS_TREE_OBJECTID;
5247         key.offset = 0;
5248         key.type = BTRFS_ROOT_ITEM_KEY;
5249
5250         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5251         if (ret < 0) {
5252                 err = ret;
5253                 goto out;
5254         } else if (ret > 0) {
5255                 err = -ENOENT;
5256                 goto out;
5257         }
5258
5259         while (1) {
5260                 node = path.nodes[0];
5261                 slot = path.slots[0];
5262                 btrfs_item_key_to_cpu(node, &key, slot);
5263                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5264                         goto out;
5265                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5266                     fs_root_objectid(key.objectid)) {
5267                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5268                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5269                                                                        &key);
5270                         } else {
5271                                 key.offset = (u64)-1;
5272                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5273                         }
5274
5275                         if (IS_ERR(cur_root)) {
5276                                 error("Fail to read fs/subvol tree: %lld",
5277                                       key.objectid);
5278                                 err = -EIO;
5279                                 goto next;
5280                         }
5281
5282                         ret = check_fs_root_v2(cur_root, ext_ref);
5283                         err |= ret;
5284
5285                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5286                                 btrfs_free_fs_root(cur_root);
5287                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5288                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5289                         ret = check_root_ref(tree_root, &key, node, slot);
5290                         err |= ret;
5291                 }
5292 next:
5293                 ret = btrfs_next_item(tree_root, &path);
5294                 if (ret > 0)
5295                         goto out;
5296                 if (ret < 0) {
5297                         err = ret;
5298                         goto out;
5299                 }
5300         }
5301
5302 out:
5303         btrfs_release_path(&path);
5304         return err;
5305 }
5306
5307 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5308 {
5309         struct list_head *cur = rec->backrefs.next;
5310         struct extent_backref *back;
5311         struct tree_backref *tback;
5312         struct data_backref *dback;
5313         u64 found = 0;
5314         int err = 0;
5315
5316         while(cur != &rec->backrefs) {
5317                 back = to_extent_backref(cur);
5318                 cur = cur->next;
5319                 if (!back->found_extent_tree) {
5320                         err = 1;
5321                         if (!print_errs)
5322                                 goto out;
5323                         if (back->is_data) {
5324                                 dback = to_data_backref(back);
5325                                 fprintf(stderr, "Backref %llu %s %llu"
5326                                         " owner %llu offset %llu num_refs %lu"
5327                                         " not found in extent tree\n",
5328                                         (unsigned long long)rec->start,
5329                                         back->full_backref ?
5330                                         "parent" : "root",
5331                                         back->full_backref ?
5332                                         (unsigned long long)dback->parent:
5333                                         (unsigned long long)dback->root,
5334                                         (unsigned long long)dback->owner,
5335                                         (unsigned long long)dback->offset,
5336                                         (unsigned long)dback->num_refs);
5337                         } else {
5338                                 tback = to_tree_backref(back);
5339                                 fprintf(stderr, "Backref %llu parent %llu"
5340                                         " root %llu not found in extent tree\n",
5341                                         (unsigned long long)rec->start,
5342                                         (unsigned long long)tback->parent,
5343                                         (unsigned long long)tback->root);
5344                         }
5345                 }
5346                 if (!back->is_data && !back->found_ref) {
5347                         err = 1;
5348                         if (!print_errs)
5349                                 goto out;
5350                         tback = to_tree_backref(back);
5351                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5352                                 (unsigned long long)rec->start,
5353                                 back->full_backref ? "parent" : "root",
5354                                 back->full_backref ?
5355                                 (unsigned long long)tback->parent :
5356                                 (unsigned long long)tback->root, back);
5357                 }
5358                 if (back->is_data) {
5359                         dback = to_data_backref(back);
5360                         if (dback->found_ref != dback->num_refs) {
5361                                 err = 1;
5362                                 if (!print_errs)
5363                                         goto out;
5364                                 fprintf(stderr, "Incorrect local backref count"
5365                                         " on %llu %s %llu owner %llu"
5366                                         " offset %llu found %u wanted %u back %p\n",
5367                                         (unsigned long long)rec->start,
5368                                         back->full_backref ?
5369                                         "parent" : "root",
5370                                         back->full_backref ?
5371                                         (unsigned long long)dback->parent:
5372                                         (unsigned long long)dback->root,
5373                                         (unsigned long long)dback->owner,
5374                                         (unsigned long long)dback->offset,
5375                                         dback->found_ref, dback->num_refs, back);
5376                         }
5377                         if (dback->disk_bytenr != rec->start) {
5378                                 err = 1;
5379                                 if (!print_errs)
5380                                         goto out;
5381                                 fprintf(stderr, "Backref disk bytenr does not"
5382                                         " match extent record, bytenr=%llu, "
5383                                         "ref bytenr=%llu\n",
5384                                         (unsigned long long)rec->start,
5385                                         (unsigned long long)dback->disk_bytenr);
5386                         }
5387
5388                         if (dback->bytes != rec->nr) {
5389                                 err = 1;
5390                                 if (!print_errs)
5391                                         goto out;
5392                                 fprintf(stderr, "Backref bytes do not match "
5393                                         "extent backref, bytenr=%llu, ref "
5394                                         "bytes=%llu, backref bytes=%llu\n",
5395                                         (unsigned long long)rec->start,
5396                                         (unsigned long long)rec->nr,
5397                                         (unsigned long long)dback->bytes);
5398                         }
5399                 }
5400                 if (!back->is_data) {
5401                         found += 1;
5402                 } else {
5403                         dback = to_data_backref(back);
5404                         found += dback->found_ref;
5405                 }
5406         }
5407         if (found != rec->refs) {
5408                 err = 1;
5409                 if (!print_errs)
5410                         goto out;
5411                 fprintf(stderr, "Incorrect global backref count "
5412                         "on %llu found %llu wanted %llu\n",
5413                         (unsigned long long)rec->start,
5414                         (unsigned long long)found,
5415                         (unsigned long long)rec->refs);
5416         }
5417 out:
5418         return err;
5419 }
5420
5421 static int free_all_extent_backrefs(struct extent_record *rec)
5422 {
5423         struct extent_backref *back;
5424         struct list_head *cur;
5425         while (!list_empty(&rec->backrefs)) {
5426                 cur = rec->backrefs.next;
5427                 back = to_extent_backref(cur);
5428                 list_del(cur);
5429                 free(back);
5430         }
5431         return 0;
5432 }
5433
5434 static void free_extent_record_cache(struct cache_tree *extent_cache)
5435 {
5436         struct cache_extent *cache;
5437         struct extent_record *rec;
5438
5439         while (1) {
5440                 cache = first_cache_extent(extent_cache);
5441                 if (!cache)
5442                         break;
5443                 rec = container_of(cache, struct extent_record, cache);
5444                 remove_cache_extent(extent_cache, cache);
5445                 free_all_extent_backrefs(rec);
5446                 free(rec);
5447         }
5448 }
5449
5450 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5451                                  struct extent_record *rec)
5452 {
5453         if (rec->content_checked && rec->owner_ref_checked &&
5454             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5455             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5456             !rec->bad_full_backref && !rec->crossing_stripes &&
5457             !rec->wrong_chunk_type) {
5458                 remove_cache_extent(extent_cache, &rec->cache);
5459                 free_all_extent_backrefs(rec);
5460                 list_del_init(&rec->list);
5461                 free(rec);
5462         }
5463         return 0;
5464 }
5465
5466 static int check_owner_ref(struct btrfs_root *root,
5467                             struct extent_record *rec,
5468                             struct extent_buffer *buf)
5469 {
5470         struct extent_backref *node;
5471         struct tree_backref *back;
5472         struct btrfs_root *ref_root;
5473         struct btrfs_key key;
5474         struct btrfs_path path;
5475         struct extent_buffer *parent;
5476         int level;
5477         int found = 0;
5478         int ret;
5479
5480         list_for_each_entry(node, &rec->backrefs, list) {
5481                 if (node->is_data)
5482                         continue;
5483                 if (!node->found_ref)
5484                         continue;
5485                 if (node->full_backref)
5486                         continue;
5487                 back = to_tree_backref(node);
5488                 if (btrfs_header_owner(buf) == back->root)
5489                         return 0;
5490         }
5491         BUG_ON(rec->is_root);
5492
5493         /* try to find the block by search corresponding fs tree */
5494         key.objectid = btrfs_header_owner(buf);
5495         key.type = BTRFS_ROOT_ITEM_KEY;
5496         key.offset = (u64)-1;
5497
5498         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5499         if (IS_ERR(ref_root))
5500                 return 1;
5501
5502         level = btrfs_header_level(buf);
5503         if (level == 0)
5504                 btrfs_item_key_to_cpu(buf, &key, 0);
5505         else
5506                 btrfs_node_key_to_cpu(buf, &key, 0);
5507
5508         btrfs_init_path(&path);
5509         path.lowest_level = level + 1;
5510         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5511         if (ret < 0)
5512                 return 0;
5513
5514         parent = path.nodes[level + 1];
5515         if (parent && buf->start == btrfs_node_blockptr(parent,
5516                                                         path.slots[level + 1]))
5517                 found = 1;
5518
5519         btrfs_release_path(&path);
5520         return found ? 0 : 1;
5521 }
5522
5523 static int is_extent_tree_record(struct extent_record *rec)
5524 {
5525         struct list_head *cur = rec->backrefs.next;
5526         struct extent_backref *node;
5527         struct tree_backref *back;
5528         int is_extent = 0;
5529
5530         while(cur != &rec->backrefs) {
5531                 node = to_extent_backref(cur);
5532                 cur = cur->next;
5533                 if (node->is_data)
5534                         return 0;
5535                 back = to_tree_backref(node);
5536                 if (node->full_backref)
5537                         return 0;
5538                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5539                         is_extent = 1;
5540         }
5541         return is_extent;
5542 }
5543
5544
5545 static int record_bad_block_io(struct btrfs_fs_info *info,
5546                                struct cache_tree *extent_cache,
5547                                u64 start, u64 len)
5548 {
5549         struct extent_record *rec;
5550         struct cache_extent *cache;
5551         struct btrfs_key key;
5552
5553         cache = lookup_cache_extent(extent_cache, start, len);
5554         if (!cache)
5555                 return 0;
5556
5557         rec = container_of(cache, struct extent_record, cache);
5558         if (!is_extent_tree_record(rec))
5559                 return 0;
5560
5561         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5562         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5563 }
5564
5565 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5566                        struct extent_buffer *buf, int slot)
5567 {
5568         if (btrfs_header_level(buf)) {
5569                 struct btrfs_key_ptr ptr1, ptr2;
5570
5571                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5572                                    sizeof(struct btrfs_key_ptr));
5573                 read_extent_buffer(buf, &ptr2,
5574                                    btrfs_node_key_ptr_offset(slot + 1),
5575                                    sizeof(struct btrfs_key_ptr));
5576                 write_extent_buffer(buf, &ptr1,
5577                                     btrfs_node_key_ptr_offset(slot + 1),
5578                                     sizeof(struct btrfs_key_ptr));
5579                 write_extent_buffer(buf, &ptr2,
5580                                     btrfs_node_key_ptr_offset(slot),
5581                                     sizeof(struct btrfs_key_ptr));
5582                 if (slot == 0) {
5583                         struct btrfs_disk_key key;
5584                         btrfs_node_key(buf, &key, 0);
5585                         btrfs_fixup_low_keys(root, path, &key,
5586                                              btrfs_header_level(buf) + 1);
5587                 }
5588         } else {
5589                 struct btrfs_item *item1, *item2;
5590                 struct btrfs_key k1, k2;
5591                 char *item1_data, *item2_data;
5592                 u32 item1_offset, item2_offset, item1_size, item2_size;
5593
5594                 item1 = btrfs_item_nr(slot);
5595                 item2 = btrfs_item_nr(slot + 1);
5596                 btrfs_item_key_to_cpu(buf, &k1, slot);
5597                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5598                 item1_offset = btrfs_item_offset(buf, item1);
5599                 item2_offset = btrfs_item_offset(buf, item2);
5600                 item1_size = btrfs_item_size(buf, item1);
5601                 item2_size = btrfs_item_size(buf, item2);
5602
5603                 item1_data = malloc(item1_size);
5604                 if (!item1_data)
5605                         return -ENOMEM;
5606                 item2_data = malloc(item2_size);
5607                 if (!item2_data) {
5608                         free(item1_data);
5609                         return -ENOMEM;
5610                 }
5611
5612                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5613                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5614
5615                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5616                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5617                 free(item1_data);
5618                 free(item2_data);
5619
5620                 btrfs_set_item_offset(buf, item1, item2_offset);
5621                 btrfs_set_item_offset(buf, item2, item1_offset);
5622                 btrfs_set_item_size(buf, item1, item2_size);
5623                 btrfs_set_item_size(buf, item2, item1_size);
5624
5625                 path->slots[0] = slot;
5626                 btrfs_set_item_key_unsafe(root, path, &k2);
5627                 path->slots[0] = slot + 1;
5628                 btrfs_set_item_key_unsafe(root, path, &k1);
5629         }
5630         return 0;
5631 }
5632
5633 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5634 {
5635         struct extent_buffer *buf;
5636         struct btrfs_key k1, k2;
5637         int i;
5638         int level = path->lowest_level;
5639         int ret = -EIO;
5640
5641         buf = path->nodes[level];
5642         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5643                 if (level) {
5644                         btrfs_node_key_to_cpu(buf, &k1, i);
5645                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5646                 } else {
5647                         btrfs_item_key_to_cpu(buf, &k1, i);
5648                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5649                 }
5650                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5651                         continue;
5652                 ret = swap_values(root, path, buf, i);
5653                 if (ret)
5654                         break;
5655                 btrfs_mark_buffer_dirty(buf);
5656                 i = 0;
5657         }
5658         return ret;
5659 }
5660
5661 static int delete_bogus_item(struct btrfs_root *root,
5662                              struct btrfs_path *path,
5663                              struct extent_buffer *buf, int slot)
5664 {
5665         struct btrfs_key key;
5666         int nritems = btrfs_header_nritems(buf);
5667
5668         btrfs_item_key_to_cpu(buf, &key, slot);
5669
5670         /* These are all the keys we can deal with missing. */
5671         if (key.type != BTRFS_DIR_INDEX_KEY &&
5672             key.type != BTRFS_EXTENT_ITEM_KEY &&
5673             key.type != BTRFS_METADATA_ITEM_KEY &&
5674             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5675             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5676                 return -1;
5677
5678         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5679                (unsigned long long)key.objectid, key.type,
5680                (unsigned long long)key.offset, slot, buf->start);
5681         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5682                               btrfs_item_nr_offset(slot + 1),
5683                               sizeof(struct btrfs_item) *
5684                               (nritems - slot - 1));
5685         btrfs_set_header_nritems(buf, nritems - 1);
5686         if (slot == 0) {
5687                 struct btrfs_disk_key disk_key;
5688
5689                 btrfs_item_key(buf, &disk_key, 0);
5690                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5691         }
5692         btrfs_mark_buffer_dirty(buf);
5693         return 0;
5694 }
5695
5696 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5697 {
5698         struct extent_buffer *buf;
5699         int i;
5700         int ret = 0;
5701
5702         /* We should only get this for leaves */
5703         BUG_ON(path->lowest_level);
5704         buf = path->nodes[0];
5705 again:
5706         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5707                 unsigned int shift = 0, offset;
5708
5709                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5710                     BTRFS_LEAF_DATA_SIZE(root)) {
5711                         if (btrfs_item_end_nr(buf, i) >
5712                             BTRFS_LEAF_DATA_SIZE(root)) {
5713                                 ret = delete_bogus_item(root, path, buf, i);
5714                                 if (!ret)
5715                                         goto again;
5716                                 fprintf(stderr, "item is off the end of the "
5717                                         "leaf, can't fix\n");
5718                                 ret = -EIO;
5719                                 break;
5720                         }
5721                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5722                                 btrfs_item_end_nr(buf, i);
5723                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5724                            btrfs_item_offset_nr(buf, i - 1)) {
5725                         if (btrfs_item_end_nr(buf, i) >
5726                             btrfs_item_offset_nr(buf, i - 1)) {
5727                                 ret = delete_bogus_item(root, path, buf, i);
5728                                 if (!ret)
5729                                         goto again;
5730                                 fprintf(stderr, "items overlap, can't fix\n");
5731                                 ret = -EIO;
5732                                 break;
5733                         }
5734                         shift = btrfs_item_offset_nr(buf, i - 1) -
5735                                 btrfs_item_end_nr(buf, i);
5736                 }
5737                 if (!shift)
5738                         continue;
5739
5740                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5741                        i, shift, (unsigned long long)buf->start);
5742                 offset = btrfs_item_offset_nr(buf, i);
5743                 memmove_extent_buffer(buf,
5744                                       btrfs_leaf_data(buf) + offset + shift,
5745                                       btrfs_leaf_data(buf) + offset,
5746                                       btrfs_item_size_nr(buf, i));
5747                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5748                                       offset + shift);
5749                 btrfs_mark_buffer_dirty(buf);
5750         }
5751
5752         /*
5753          * We may have moved things, in which case we want to exit so we don't
5754          * write those changes out.  Once we have proper abort functionality in
5755          * progs this can be changed to something nicer.
5756          */
5757         BUG_ON(ret);
5758         return ret;
5759 }
5760
5761 /*
5762  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5763  * then just return -EIO.
5764  */
5765 static int try_to_fix_bad_block(struct btrfs_root *root,
5766                                 struct extent_buffer *buf,
5767                                 enum btrfs_tree_block_status status)
5768 {
5769         struct btrfs_trans_handle *trans;
5770         struct ulist *roots;
5771         struct ulist_node *node;
5772         struct btrfs_root *search_root;
5773         struct btrfs_path path;
5774         struct ulist_iterator iter;
5775         struct btrfs_key root_key, key;
5776         int ret;
5777
5778         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5779             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5780                 return -EIO;
5781
5782         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5783         if (ret)
5784                 return -EIO;
5785
5786         btrfs_init_path(&path);
5787         ULIST_ITER_INIT(&iter);
5788         while ((node = ulist_next(roots, &iter))) {
5789                 root_key.objectid = node->val;
5790                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5791                 root_key.offset = (u64)-1;
5792
5793                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5794                 if (IS_ERR(root)) {
5795                         ret = -EIO;
5796                         break;
5797                 }
5798
5799
5800                 trans = btrfs_start_transaction(search_root, 0);
5801                 if (IS_ERR(trans)) {
5802                         ret = PTR_ERR(trans);
5803                         break;
5804                 }
5805
5806                 path.lowest_level = btrfs_header_level(buf);
5807                 path.skip_check_block = 1;
5808                 if (path.lowest_level)
5809                         btrfs_node_key_to_cpu(buf, &key, 0);
5810                 else
5811                         btrfs_item_key_to_cpu(buf, &key, 0);
5812                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5813                 if (ret) {
5814                         ret = -EIO;
5815                         btrfs_commit_transaction(trans, search_root);
5816                         break;
5817                 }
5818                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5819                         ret = fix_key_order(search_root, &path);
5820                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5821                         ret = fix_item_offset(search_root, &path);
5822                 if (ret) {
5823                         btrfs_commit_transaction(trans, search_root);
5824                         break;
5825                 }
5826                 btrfs_release_path(&path);
5827                 btrfs_commit_transaction(trans, search_root);
5828         }
5829         ulist_free(roots);
5830         btrfs_release_path(&path);
5831         return ret;
5832 }
5833
5834 static int check_block(struct btrfs_root *root,
5835                        struct cache_tree *extent_cache,
5836                        struct extent_buffer *buf, u64 flags)
5837 {
5838         struct extent_record *rec;
5839         struct cache_extent *cache;
5840         struct btrfs_key key;
5841         enum btrfs_tree_block_status status;
5842         int ret = 0;
5843         int level;
5844
5845         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5846         if (!cache)
5847                 return 1;
5848         rec = container_of(cache, struct extent_record, cache);
5849         rec->generation = btrfs_header_generation(buf);
5850
5851         level = btrfs_header_level(buf);
5852         if (btrfs_header_nritems(buf) > 0) {
5853
5854                 if (level == 0)
5855                         btrfs_item_key_to_cpu(buf, &key, 0);
5856                 else
5857                         btrfs_node_key_to_cpu(buf, &key, 0);
5858
5859                 rec->info_objectid = key.objectid;
5860         }
5861         rec->info_level = level;
5862
5863         if (btrfs_is_leaf(buf))
5864                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5865         else
5866                 status = btrfs_check_node(root, &rec->parent_key, buf);
5867
5868         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5869                 if (repair)
5870                         status = try_to_fix_bad_block(root, buf, status);
5871                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5872                         ret = -EIO;
5873                         fprintf(stderr, "bad block %llu\n",
5874                                 (unsigned long long)buf->start);
5875                 } else {
5876                         /*
5877                          * Signal to callers we need to start the scan over
5878                          * again since we'll have cowed blocks.
5879                          */
5880                         ret = -EAGAIN;
5881                 }
5882         } else {
5883                 rec->content_checked = 1;
5884                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5885                         rec->owner_ref_checked = 1;
5886                 else {
5887                         ret = check_owner_ref(root, rec, buf);
5888                         if (!ret)
5889                                 rec->owner_ref_checked = 1;
5890                 }
5891         }
5892         if (!ret)
5893                 maybe_free_extent_rec(extent_cache, rec);
5894         return ret;
5895 }
5896
5897 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5898                                                 u64 parent, u64 root)
5899 {
5900         struct list_head *cur = rec->backrefs.next;
5901         struct extent_backref *node;
5902         struct tree_backref *back;
5903
5904         while(cur != &rec->backrefs) {
5905                 node = to_extent_backref(cur);
5906                 cur = cur->next;
5907                 if (node->is_data)
5908                         continue;
5909                 back = to_tree_backref(node);
5910                 if (parent > 0) {
5911                         if (!node->full_backref)
5912                                 continue;
5913                         if (parent == back->parent)
5914                                 return back;
5915                 } else {
5916                         if (node->full_backref)
5917                                 continue;
5918                         if (back->root == root)
5919                                 return back;
5920                 }
5921         }
5922         return NULL;
5923 }
5924
5925 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5926                                                 u64 parent, u64 root)
5927 {
5928         struct tree_backref *ref = malloc(sizeof(*ref));
5929
5930         if (!ref)
5931                 return NULL;
5932         memset(&ref->node, 0, sizeof(ref->node));
5933         if (parent > 0) {
5934                 ref->parent = parent;
5935                 ref->node.full_backref = 1;
5936         } else {
5937                 ref->root = root;
5938                 ref->node.full_backref = 0;
5939         }
5940         list_add_tail(&ref->node.list, &rec->backrefs);
5941
5942         return ref;
5943 }
5944
5945 static struct data_backref *find_data_backref(struct extent_record *rec,
5946                                                 u64 parent, u64 root,
5947                                                 u64 owner, u64 offset,
5948                                                 int found_ref,
5949                                                 u64 disk_bytenr, u64 bytes)
5950 {
5951         struct list_head *cur = rec->backrefs.next;
5952         struct extent_backref *node;
5953         struct data_backref *back;
5954
5955         while(cur != &rec->backrefs) {
5956                 node = to_extent_backref(cur);
5957                 cur = cur->next;
5958                 if (!node->is_data)
5959                         continue;
5960                 back = to_data_backref(node);
5961                 if (parent > 0) {
5962                         if (!node->full_backref)
5963                                 continue;
5964                         if (parent == back->parent)
5965                                 return back;
5966                 } else {
5967                         if (node->full_backref)
5968                                 continue;
5969                         if (back->root == root && back->owner == owner &&
5970                             back->offset == offset) {
5971                                 if (found_ref && node->found_ref &&
5972                                     (back->bytes != bytes ||
5973                                     back->disk_bytenr != disk_bytenr))
5974                                         continue;
5975                                 return back;
5976                         }
5977                 }
5978         }
5979         return NULL;
5980 }
5981
5982 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5983                                                 u64 parent, u64 root,
5984                                                 u64 owner, u64 offset,
5985                                                 u64 max_size)
5986 {
5987         struct data_backref *ref = malloc(sizeof(*ref));
5988
5989         if (!ref)
5990                 return NULL;
5991         memset(&ref->node, 0, sizeof(ref->node));
5992         ref->node.is_data = 1;
5993
5994         if (parent > 0) {
5995                 ref->parent = parent;
5996                 ref->owner = 0;
5997                 ref->offset = 0;
5998                 ref->node.full_backref = 1;
5999         } else {
6000                 ref->root = root;
6001                 ref->owner = owner;
6002                 ref->offset = offset;
6003                 ref->node.full_backref = 0;
6004         }
6005         ref->bytes = max_size;
6006         ref->found_ref = 0;
6007         ref->num_refs = 0;
6008         list_add_tail(&ref->node.list, &rec->backrefs);
6009         if (max_size > rec->max_size)
6010                 rec->max_size = max_size;
6011         return ref;
6012 }
6013
6014 /* Check if the type of extent matches with its chunk */
6015 static void check_extent_type(struct extent_record *rec)
6016 {
6017         struct btrfs_block_group_cache *bg_cache;
6018
6019         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6020         if (!bg_cache)
6021                 return;
6022
6023         /* data extent, check chunk directly*/
6024         if (!rec->metadata) {
6025                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6026                         rec->wrong_chunk_type = 1;
6027                 return;
6028         }
6029
6030         /* metadata extent, check the obvious case first */
6031         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6032                                  BTRFS_BLOCK_GROUP_METADATA))) {
6033                 rec->wrong_chunk_type = 1;
6034                 return;
6035         }
6036
6037         /*
6038          * Check SYSTEM extent, as it's also marked as metadata, we can only
6039          * make sure it's a SYSTEM extent by its backref
6040          */
6041         if (!list_empty(&rec->backrefs)) {
6042                 struct extent_backref *node;
6043                 struct tree_backref *tback;
6044                 u64 bg_type;
6045
6046                 node = to_extent_backref(rec->backrefs.next);
6047                 if (node->is_data) {
6048                         /* tree block shouldn't have data backref */
6049                         rec->wrong_chunk_type = 1;
6050                         return;
6051                 }
6052                 tback = container_of(node, struct tree_backref, node);
6053
6054                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6055                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6056                 else
6057                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6058                 if (!(bg_cache->flags & bg_type))
6059                         rec->wrong_chunk_type = 1;
6060         }
6061 }
6062
6063 /*
6064  * Allocate a new extent record, fill default values from @tmpl and insert int
6065  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6066  * the cache, otherwise it fails.
6067  */
6068 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6069                 struct extent_record *tmpl)
6070 {
6071         struct extent_record *rec;
6072         int ret = 0;
6073
6074         BUG_ON(tmpl->max_size == 0);
6075         rec = malloc(sizeof(*rec));
6076         if (!rec)
6077                 return -ENOMEM;
6078         rec->start = tmpl->start;
6079         rec->max_size = tmpl->max_size;
6080         rec->nr = max(tmpl->nr, tmpl->max_size);
6081         rec->found_rec = tmpl->found_rec;
6082         rec->content_checked = tmpl->content_checked;
6083         rec->owner_ref_checked = tmpl->owner_ref_checked;
6084         rec->num_duplicates = 0;
6085         rec->metadata = tmpl->metadata;
6086         rec->flag_block_full_backref = FLAG_UNSET;
6087         rec->bad_full_backref = 0;
6088         rec->crossing_stripes = 0;
6089         rec->wrong_chunk_type = 0;
6090         rec->is_root = tmpl->is_root;
6091         rec->refs = tmpl->refs;
6092         rec->extent_item_refs = tmpl->extent_item_refs;
6093         rec->parent_generation = tmpl->parent_generation;
6094         INIT_LIST_HEAD(&rec->backrefs);
6095         INIT_LIST_HEAD(&rec->dups);
6096         INIT_LIST_HEAD(&rec->list);
6097         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6098         rec->cache.start = tmpl->start;
6099         rec->cache.size = tmpl->nr;
6100         ret = insert_cache_extent(extent_cache, &rec->cache);
6101         if (ret) {
6102                 free(rec);
6103                 return ret;
6104         }
6105         bytes_used += rec->nr;
6106
6107         if (tmpl->metadata)
6108                 rec->crossing_stripes = check_crossing_stripes(global_info,
6109                                 rec->start, global_info->nodesize);
6110         check_extent_type(rec);
6111         return ret;
6112 }
6113
6114 /*
6115  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6116  * some are hints:
6117  * - refs              - if found, increase refs
6118  * - is_root           - if found, set
6119  * - content_checked   - if found, set
6120  * - owner_ref_checked - if found, set
6121  *
6122  * If not found, create a new one, initialize and insert.
6123  */
6124 static int add_extent_rec(struct cache_tree *extent_cache,
6125                 struct extent_record *tmpl)
6126 {
6127         struct extent_record *rec;
6128         struct cache_extent *cache;
6129         int ret = 0;
6130         int dup = 0;
6131
6132         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6133         if (cache) {
6134                 rec = container_of(cache, struct extent_record, cache);
6135                 if (tmpl->refs)
6136                         rec->refs++;
6137                 if (rec->nr == 1)
6138                         rec->nr = max(tmpl->nr, tmpl->max_size);
6139
6140                 /*
6141                  * We need to make sure to reset nr to whatever the extent
6142                  * record says was the real size, this way we can compare it to
6143                  * the backrefs.
6144                  */
6145                 if (tmpl->found_rec) {
6146                         if (tmpl->start != rec->start || rec->found_rec) {
6147                                 struct extent_record *tmp;
6148
6149                                 dup = 1;
6150                                 if (list_empty(&rec->list))
6151                                         list_add_tail(&rec->list,
6152                                                       &duplicate_extents);
6153
6154                                 /*
6155                                  * We have to do this song and dance in case we
6156                                  * find an extent record that falls inside of
6157                                  * our current extent record but does not have
6158                                  * the same objectid.
6159                                  */
6160                                 tmp = malloc(sizeof(*tmp));
6161                                 if (!tmp)
6162                                         return -ENOMEM;
6163                                 tmp->start = tmpl->start;
6164                                 tmp->max_size = tmpl->max_size;
6165                                 tmp->nr = tmpl->nr;
6166                                 tmp->found_rec = 1;
6167                                 tmp->metadata = tmpl->metadata;
6168                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6169                                 INIT_LIST_HEAD(&tmp->list);
6170                                 list_add_tail(&tmp->list, &rec->dups);
6171                                 rec->num_duplicates++;
6172                         } else {
6173                                 rec->nr = tmpl->nr;
6174                                 rec->found_rec = 1;
6175                         }
6176                 }
6177
6178                 if (tmpl->extent_item_refs && !dup) {
6179                         if (rec->extent_item_refs) {
6180                                 fprintf(stderr, "block %llu rec "
6181                                         "extent_item_refs %llu, passed %llu\n",
6182                                         (unsigned long long)tmpl->start,
6183                                         (unsigned long long)
6184                                                         rec->extent_item_refs,
6185                                         (unsigned long long)tmpl->extent_item_refs);
6186                         }
6187                         rec->extent_item_refs = tmpl->extent_item_refs;
6188                 }
6189                 if (tmpl->is_root)
6190                         rec->is_root = 1;
6191                 if (tmpl->content_checked)
6192                         rec->content_checked = 1;
6193                 if (tmpl->owner_ref_checked)
6194                         rec->owner_ref_checked = 1;
6195                 memcpy(&rec->parent_key, &tmpl->parent_key,
6196                                 sizeof(tmpl->parent_key));
6197                 if (tmpl->parent_generation)
6198                         rec->parent_generation = tmpl->parent_generation;
6199                 if (rec->max_size < tmpl->max_size)
6200                         rec->max_size = tmpl->max_size;
6201
6202                 /*
6203                  * A metadata extent can't cross stripe_len boundary, otherwise
6204                  * kernel scrub won't be able to handle it.
6205                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6206                  * it.
6207                  */
6208                 if (tmpl->metadata)
6209                         rec->crossing_stripes = check_crossing_stripes(
6210                                         global_info, rec->start,
6211                                         global_info->nodesize);
6212                 check_extent_type(rec);
6213                 maybe_free_extent_rec(extent_cache, rec);
6214                 return ret;
6215         }
6216
6217         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6218
6219         return ret;
6220 }
6221
6222 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6223                             u64 parent, u64 root, int found_ref)
6224 {
6225         struct extent_record *rec;
6226         struct tree_backref *back;
6227         struct cache_extent *cache;
6228         int ret;
6229
6230         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6231         if (!cache) {
6232                 struct extent_record tmpl;
6233
6234                 memset(&tmpl, 0, sizeof(tmpl));
6235                 tmpl.start = bytenr;
6236                 tmpl.nr = 1;
6237                 tmpl.metadata = 1;
6238                 tmpl.max_size = 1;
6239
6240                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6241                 if (ret)
6242                         return ret;
6243
6244                 /* really a bug in cache_extent implement now */
6245                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6246                 if (!cache)
6247                         return -ENOENT;
6248         }
6249
6250         rec = container_of(cache, struct extent_record, cache);
6251         if (rec->start != bytenr) {
6252                 /*
6253                  * Several cause, from unaligned bytenr to over lapping extents
6254                  */
6255                 return -EEXIST;
6256         }
6257
6258         back = find_tree_backref(rec, parent, root);
6259         if (!back) {
6260                 back = alloc_tree_backref(rec, parent, root);
6261                 if (!back)
6262                         return -ENOMEM;
6263         }
6264
6265         if (found_ref) {
6266                 if (back->node.found_ref) {
6267                         fprintf(stderr, "Extent back ref already exists "
6268                                 "for %llu parent %llu root %llu \n",
6269                                 (unsigned long long)bytenr,
6270                                 (unsigned long long)parent,
6271                                 (unsigned long long)root);
6272                 }
6273                 back->node.found_ref = 1;
6274         } else {
6275                 if (back->node.found_extent_tree) {
6276                         fprintf(stderr, "Extent back ref already exists "
6277                                 "for %llu parent %llu root %llu \n",
6278                                 (unsigned long long)bytenr,
6279                                 (unsigned long long)parent,
6280                                 (unsigned long long)root);
6281                 }
6282                 back->node.found_extent_tree = 1;
6283         }
6284         check_extent_type(rec);
6285         maybe_free_extent_rec(extent_cache, rec);
6286         return 0;
6287 }
6288
6289 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6290                             u64 parent, u64 root, u64 owner, u64 offset,
6291                             u32 num_refs, int found_ref, u64 max_size)
6292 {
6293         struct extent_record *rec;
6294         struct data_backref *back;
6295         struct cache_extent *cache;
6296         int ret;
6297
6298         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6299         if (!cache) {
6300                 struct extent_record tmpl;
6301
6302                 memset(&tmpl, 0, sizeof(tmpl));
6303                 tmpl.start = bytenr;
6304                 tmpl.nr = 1;
6305                 tmpl.max_size = max_size;
6306
6307                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6308                 if (ret)
6309                         return ret;
6310
6311                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6312                 if (!cache)
6313                         abort();
6314         }
6315
6316         rec = container_of(cache, struct extent_record, cache);
6317         if (rec->max_size < max_size)
6318                 rec->max_size = max_size;
6319
6320         /*
6321          * If found_ref is set then max_size is the real size and must match the
6322          * existing refs.  So if we have already found a ref then we need to
6323          * make sure that this ref matches the existing one, otherwise we need
6324          * to add a new backref so we can notice that the backrefs don't match
6325          * and we need to figure out who is telling the truth.  This is to
6326          * account for that awful fsync bug I introduced where we'd end up with
6327          * a btrfs_file_extent_item that would have its length include multiple
6328          * prealloc extents or point inside of a prealloc extent.
6329          */
6330         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6331                                  bytenr, max_size);
6332         if (!back) {
6333                 back = alloc_data_backref(rec, parent, root, owner, offset,
6334                                           max_size);
6335                 BUG_ON(!back);
6336         }
6337
6338         if (found_ref) {
6339                 BUG_ON(num_refs != 1);
6340                 if (back->node.found_ref)
6341                         BUG_ON(back->bytes != max_size);
6342                 back->node.found_ref = 1;
6343                 back->found_ref += 1;
6344                 back->bytes = max_size;
6345                 back->disk_bytenr = bytenr;
6346                 rec->refs += 1;
6347                 rec->content_checked = 1;
6348                 rec->owner_ref_checked = 1;
6349         } else {
6350                 if (back->node.found_extent_tree) {
6351                         fprintf(stderr, "Extent back ref already exists "
6352                                 "for %llu parent %llu root %llu "
6353                                 "owner %llu offset %llu num_refs %lu\n",
6354                                 (unsigned long long)bytenr,
6355                                 (unsigned long long)parent,
6356                                 (unsigned long long)root,
6357                                 (unsigned long long)owner,
6358                                 (unsigned long long)offset,
6359                                 (unsigned long)num_refs);
6360                 }
6361                 back->num_refs = num_refs;
6362                 back->node.found_extent_tree = 1;
6363         }
6364         maybe_free_extent_rec(extent_cache, rec);
6365         return 0;
6366 }
6367
6368 static int add_pending(struct cache_tree *pending,
6369                        struct cache_tree *seen, u64 bytenr, u32 size)
6370 {
6371         int ret;
6372         ret = add_cache_extent(seen, bytenr, size);
6373         if (ret)
6374                 return ret;
6375         add_cache_extent(pending, bytenr, size);
6376         return 0;
6377 }
6378
6379 static int pick_next_pending(struct cache_tree *pending,
6380                         struct cache_tree *reada,
6381                         struct cache_tree *nodes,
6382                         u64 last, struct block_info *bits, int bits_nr,
6383                         int *reada_bits)
6384 {
6385         unsigned long node_start = last;
6386         struct cache_extent *cache;
6387         int ret;
6388
6389         cache = search_cache_extent(reada, 0);
6390         if (cache) {
6391                 bits[0].start = cache->start;
6392                 bits[0].size = cache->size;
6393                 *reada_bits = 1;
6394                 return 1;
6395         }
6396         *reada_bits = 0;
6397         if (node_start > 32768)
6398                 node_start -= 32768;
6399
6400         cache = search_cache_extent(nodes, node_start);
6401         if (!cache)
6402                 cache = search_cache_extent(nodes, 0);
6403
6404         if (!cache) {
6405                  cache = search_cache_extent(pending, 0);
6406                  if (!cache)
6407                          return 0;
6408                  ret = 0;
6409                  do {
6410                          bits[ret].start = cache->start;
6411                          bits[ret].size = cache->size;
6412                          cache = next_cache_extent(cache);
6413                          ret++;
6414                  } while (cache && ret < bits_nr);
6415                  return ret;
6416         }
6417
6418         ret = 0;
6419         do {
6420                 bits[ret].start = cache->start;
6421                 bits[ret].size = cache->size;
6422                 cache = next_cache_extent(cache);
6423                 ret++;
6424         } while (cache && ret < bits_nr);
6425
6426         if (bits_nr - ret > 8) {
6427                 u64 lookup = bits[0].start + bits[0].size;
6428                 struct cache_extent *next;
6429                 next = search_cache_extent(pending, lookup);
6430                 while(next) {
6431                         if (next->start - lookup > 32768)
6432                                 break;
6433                         bits[ret].start = next->start;
6434                         bits[ret].size = next->size;
6435                         lookup = next->start + next->size;
6436                         ret++;
6437                         if (ret == bits_nr)
6438                                 break;
6439                         next = next_cache_extent(next);
6440                         if (!next)
6441                                 break;
6442                 }
6443         }
6444         return ret;
6445 }
6446
6447 static void free_chunk_record(struct cache_extent *cache)
6448 {
6449         struct chunk_record *rec;
6450
6451         rec = container_of(cache, struct chunk_record, cache);
6452         list_del_init(&rec->list);
6453         list_del_init(&rec->dextents);
6454         free(rec);
6455 }
6456
6457 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6458 {
6459         cache_tree_free_extents(chunk_cache, free_chunk_record);
6460 }
6461
6462 static void free_device_record(struct rb_node *node)
6463 {
6464         struct device_record *rec;
6465
6466         rec = container_of(node, struct device_record, node);
6467         free(rec);
6468 }
6469
6470 FREE_RB_BASED_TREE(device_cache, free_device_record);
6471
6472 int insert_block_group_record(struct block_group_tree *tree,
6473                               struct block_group_record *bg_rec)
6474 {
6475         int ret;
6476
6477         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6478         if (ret)
6479                 return ret;
6480
6481         list_add_tail(&bg_rec->list, &tree->block_groups);
6482         return 0;
6483 }
6484
6485 static void free_block_group_record(struct cache_extent *cache)
6486 {
6487         struct block_group_record *rec;
6488
6489         rec = container_of(cache, struct block_group_record, cache);
6490         list_del_init(&rec->list);
6491         free(rec);
6492 }
6493
6494 void free_block_group_tree(struct block_group_tree *tree)
6495 {
6496         cache_tree_free_extents(&tree->tree, free_block_group_record);
6497 }
6498
6499 int insert_device_extent_record(struct device_extent_tree *tree,
6500                                 struct device_extent_record *de_rec)
6501 {
6502         int ret;
6503
6504         /*
6505          * Device extent is a bit different from the other extents, because
6506          * the extents which belong to the different devices may have the
6507          * same start and size, so we need use the special extent cache
6508          * search/insert functions.
6509          */
6510         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6511         if (ret)
6512                 return ret;
6513
6514         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6515         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6516         return 0;
6517 }
6518
6519 static void free_device_extent_record(struct cache_extent *cache)
6520 {
6521         struct device_extent_record *rec;
6522
6523         rec = container_of(cache, struct device_extent_record, cache);
6524         if (!list_empty(&rec->chunk_list))
6525                 list_del_init(&rec->chunk_list);
6526         if (!list_empty(&rec->device_list))
6527                 list_del_init(&rec->device_list);
6528         free(rec);
6529 }
6530
6531 void free_device_extent_tree(struct device_extent_tree *tree)
6532 {
6533         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6534 }
6535
6536 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6537 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6538                                  struct extent_buffer *leaf, int slot)
6539 {
6540         struct btrfs_extent_ref_v0 *ref0;
6541         struct btrfs_key key;
6542         int ret;
6543
6544         btrfs_item_key_to_cpu(leaf, &key, slot);
6545         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6546         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6547                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6548                                 0, 0);
6549         } else {
6550                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6551                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6552         }
6553         return ret;
6554 }
6555 #endif
6556
6557 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6558                                             struct btrfs_key *key,
6559                                             int slot)
6560 {
6561         struct btrfs_chunk *ptr;
6562         struct chunk_record *rec;
6563         int num_stripes, i;
6564
6565         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6566         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6567
6568         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6569         if (!rec) {
6570                 fprintf(stderr, "memory allocation failed\n");
6571                 exit(-1);
6572         }
6573
6574         INIT_LIST_HEAD(&rec->list);
6575         INIT_LIST_HEAD(&rec->dextents);
6576         rec->bg_rec = NULL;
6577
6578         rec->cache.start = key->offset;
6579         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6580
6581         rec->generation = btrfs_header_generation(leaf);
6582
6583         rec->objectid = key->objectid;
6584         rec->type = key->type;
6585         rec->offset = key->offset;
6586
6587         rec->length = rec->cache.size;
6588         rec->owner = btrfs_chunk_owner(leaf, ptr);
6589         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6590         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6591         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6592         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6593         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6594         rec->num_stripes = num_stripes;
6595         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6596
6597         for (i = 0; i < rec->num_stripes; ++i) {
6598                 rec->stripes[i].devid =
6599                         btrfs_stripe_devid_nr(leaf, ptr, i);
6600                 rec->stripes[i].offset =
6601                         btrfs_stripe_offset_nr(leaf, ptr, i);
6602                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6603                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6604                                 BTRFS_UUID_SIZE);
6605         }
6606
6607         return rec;
6608 }
6609
6610 static int process_chunk_item(struct cache_tree *chunk_cache,
6611                               struct btrfs_key *key, struct extent_buffer *eb,
6612                               int slot)
6613 {
6614         struct chunk_record *rec;
6615         struct btrfs_chunk *chunk;
6616         int ret = 0;
6617
6618         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6619         /*
6620          * Do extra check for this chunk item,
6621          *
6622          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6623          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6624          * and owner<->key_type check.
6625          */
6626         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6627                                       key->offset);
6628         if (ret < 0) {
6629                 error("chunk(%llu, %llu) is not valid, ignore it",
6630                       key->offset, btrfs_chunk_length(eb, chunk));
6631                 return 0;
6632         }
6633         rec = btrfs_new_chunk_record(eb, key, slot);
6634         ret = insert_cache_extent(chunk_cache, &rec->cache);
6635         if (ret) {
6636                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6637                         rec->offset, rec->length);
6638                 free(rec);
6639         }
6640
6641         return ret;
6642 }
6643
6644 static int process_device_item(struct rb_root *dev_cache,
6645                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6646 {
6647         struct btrfs_dev_item *ptr;
6648         struct device_record *rec;
6649         int ret = 0;
6650
6651         ptr = btrfs_item_ptr(eb,
6652                 slot, struct btrfs_dev_item);
6653
6654         rec = malloc(sizeof(*rec));
6655         if (!rec) {
6656                 fprintf(stderr, "memory allocation failed\n");
6657                 return -ENOMEM;
6658         }
6659
6660         rec->devid = key->offset;
6661         rec->generation = btrfs_header_generation(eb);
6662
6663         rec->objectid = key->objectid;
6664         rec->type = key->type;
6665         rec->offset = key->offset;
6666
6667         rec->devid = btrfs_device_id(eb, ptr);
6668         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6669         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6670
6671         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6672         if (ret) {
6673                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6674                 free(rec);
6675         }
6676
6677         return ret;
6678 }
6679
6680 struct block_group_record *
6681 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6682                              int slot)
6683 {
6684         struct btrfs_block_group_item *ptr;
6685         struct block_group_record *rec;
6686
6687         rec = calloc(1, sizeof(*rec));
6688         if (!rec) {
6689                 fprintf(stderr, "memory allocation failed\n");
6690                 exit(-1);
6691         }
6692
6693         rec->cache.start = key->objectid;
6694         rec->cache.size = key->offset;
6695
6696         rec->generation = btrfs_header_generation(leaf);
6697
6698         rec->objectid = key->objectid;
6699         rec->type = key->type;
6700         rec->offset = key->offset;
6701
6702         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6703         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6704
6705         INIT_LIST_HEAD(&rec->list);
6706
6707         return rec;
6708 }
6709
6710 static int process_block_group_item(struct block_group_tree *block_group_cache,
6711                                     struct btrfs_key *key,
6712                                     struct extent_buffer *eb, int slot)
6713 {
6714         struct block_group_record *rec;
6715         int ret = 0;
6716
6717         rec = btrfs_new_block_group_record(eb, key, slot);
6718         ret = insert_block_group_record(block_group_cache, rec);
6719         if (ret) {
6720                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6721                         rec->objectid, rec->offset);
6722                 free(rec);
6723         }
6724
6725         return ret;
6726 }
6727
6728 struct device_extent_record *
6729 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6730                                struct btrfs_key *key, int slot)
6731 {
6732         struct device_extent_record *rec;
6733         struct btrfs_dev_extent *ptr;
6734
6735         rec = calloc(1, sizeof(*rec));
6736         if (!rec) {
6737                 fprintf(stderr, "memory allocation failed\n");
6738                 exit(-1);
6739         }
6740
6741         rec->cache.objectid = key->objectid;
6742         rec->cache.start = key->offset;
6743
6744         rec->generation = btrfs_header_generation(leaf);
6745
6746         rec->objectid = key->objectid;
6747         rec->type = key->type;
6748         rec->offset = key->offset;
6749
6750         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6751         rec->chunk_objecteid =
6752                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6753         rec->chunk_offset =
6754                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6755         rec->length = btrfs_dev_extent_length(leaf, ptr);
6756         rec->cache.size = rec->length;
6757
6758         INIT_LIST_HEAD(&rec->chunk_list);
6759         INIT_LIST_HEAD(&rec->device_list);
6760
6761         return rec;
6762 }
6763
6764 static int
6765 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6766                            struct btrfs_key *key, struct extent_buffer *eb,
6767                            int slot)
6768 {
6769         struct device_extent_record *rec;
6770         int ret;
6771
6772         rec = btrfs_new_device_extent_record(eb, key, slot);
6773         ret = insert_device_extent_record(dev_extent_cache, rec);
6774         if (ret) {
6775                 fprintf(stderr,
6776                         "Device extent[%llu, %llu, %llu] existed.\n",
6777                         rec->objectid, rec->offset, rec->length);
6778                 free(rec);
6779         }
6780
6781         return ret;
6782 }
6783
6784 static int process_extent_item(struct btrfs_root *root,
6785                                struct cache_tree *extent_cache,
6786                                struct extent_buffer *eb, int slot)
6787 {
6788         struct btrfs_extent_item *ei;
6789         struct btrfs_extent_inline_ref *iref;
6790         struct btrfs_extent_data_ref *dref;
6791         struct btrfs_shared_data_ref *sref;
6792         struct btrfs_key key;
6793         struct extent_record tmpl;
6794         unsigned long end;
6795         unsigned long ptr;
6796         int ret;
6797         int type;
6798         u32 item_size = btrfs_item_size_nr(eb, slot);
6799         u64 refs = 0;
6800         u64 offset;
6801         u64 num_bytes;
6802         int metadata = 0;
6803
6804         btrfs_item_key_to_cpu(eb, &key, slot);
6805
6806         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6807                 metadata = 1;
6808                 num_bytes = root->fs_info->nodesize;
6809         } else {
6810                 num_bytes = key.offset;
6811         }
6812
6813         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
6814                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6815                       key.objectid, root->fs_info->sectorsize);
6816                 return -EIO;
6817         }
6818         if (item_size < sizeof(*ei)) {
6819 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6820                 struct btrfs_extent_item_v0 *ei0;
6821                 BUG_ON(item_size != sizeof(*ei0));
6822                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6823                 refs = btrfs_extent_refs_v0(eb, ei0);
6824 #else
6825                 BUG();
6826 #endif
6827                 memset(&tmpl, 0, sizeof(tmpl));
6828                 tmpl.start = key.objectid;
6829                 tmpl.nr = num_bytes;
6830                 tmpl.extent_item_refs = refs;
6831                 tmpl.metadata = metadata;
6832                 tmpl.found_rec = 1;
6833                 tmpl.max_size = num_bytes;
6834
6835                 return add_extent_rec(extent_cache, &tmpl);
6836         }
6837
6838         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6839         refs = btrfs_extent_refs(eb, ei);
6840         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6841                 metadata = 1;
6842         else
6843                 metadata = 0;
6844         if (metadata && num_bytes != root->fs_info->nodesize) {
6845                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6846                       num_bytes, root->fs_info->nodesize);
6847                 return -EIO;
6848         }
6849         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
6850                 error("ignore invalid data extent, length %llu is not aligned to %u",
6851                       num_bytes, root->fs_info->sectorsize);
6852                 return -EIO;
6853         }
6854
6855         memset(&tmpl, 0, sizeof(tmpl));
6856         tmpl.start = key.objectid;
6857         tmpl.nr = num_bytes;
6858         tmpl.extent_item_refs = refs;
6859         tmpl.metadata = metadata;
6860         tmpl.found_rec = 1;
6861         tmpl.max_size = num_bytes;
6862         add_extent_rec(extent_cache, &tmpl);
6863
6864         ptr = (unsigned long)(ei + 1);
6865         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6866             key.type == BTRFS_EXTENT_ITEM_KEY)
6867                 ptr += sizeof(struct btrfs_tree_block_info);
6868
6869         end = (unsigned long)ei + item_size;
6870         while (ptr < end) {
6871                 iref = (struct btrfs_extent_inline_ref *)ptr;
6872                 type = btrfs_extent_inline_ref_type(eb, iref);
6873                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6874                 switch (type) {
6875                 case BTRFS_TREE_BLOCK_REF_KEY:
6876                         ret = add_tree_backref(extent_cache, key.objectid,
6877                                         0, offset, 0);
6878                         if (ret < 0)
6879                                 error(
6880                         "add_tree_backref failed (extent items tree block): %s",
6881                                       strerror(-ret));
6882                         break;
6883                 case BTRFS_SHARED_BLOCK_REF_KEY:
6884                         ret = add_tree_backref(extent_cache, key.objectid,
6885                                         offset, 0, 0);
6886                         if (ret < 0)
6887                                 error(
6888                         "add_tree_backref failed (extent items shared block): %s",
6889                                       strerror(-ret));
6890                         break;
6891                 case BTRFS_EXTENT_DATA_REF_KEY:
6892                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6893                         add_data_backref(extent_cache, key.objectid, 0,
6894                                         btrfs_extent_data_ref_root(eb, dref),
6895                                         btrfs_extent_data_ref_objectid(eb,
6896                                                                        dref),
6897                                         btrfs_extent_data_ref_offset(eb, dref),
6898                                         btrfs_extent_data_ref_count(eb, dref),
6899                                         0, num_bytes);
6900                         break;
6901                 case BTRFS_SHARED_DATA_REF_KEY:
6902                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6903                         add_data_backref(extent_cache, key.objectid, offset,
6904                                         0, 0, 0,
6905                                         btrfs_shared_data_ref_count(eb, sref),
6906                                         0, num_bytes);
6907                         break;
6908                 default:
6909                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6910                                 key.objectid, key.type, num_bytes);
6911                         goto out;
6912                 }
6913                 ptr += btrfs_extent_inline_ref_size(type);
6914         }
6915         WARN_ON(ptr > end);
6916 out:
6917         return 0;
6918 }
6919
6920 static int check_cache_range(struct btrfs_root *root,
6921                              struct btrfs_block_group_cache *cache,
6922                              u64 offset, u64 bytes)
6923 {
6924         struct btrfs_free_space *entry;
6925         u64 *logical;
6926         u64 bytenr;
6927         int stripe_len;
6928         int i, nr, ret;
6929
6930         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6931                 bytenr = btrfs_sb_offset(i);
6932                 ret = btrfs_rmap_block(root->fs_info,
6933                                        cache->key.objectid, bytenr, 0,
6934                                        &logical, &nr, &stripe_len);
6935                 if (ret)
6936                         return ret;
6937
6938                 while (nr--) {
6939                         if (logical[nr] + stripe_len <= offset)
6940                                 continue;
6941                         if (offset + bytes <= logical[nr])
6942                                 continue;
6943                         if (logical[nr] == offset) {
6944                                 if (stripe_len >= bytes) {
6945                                         free(logical);
6946                                         return 0;
6947                                 }
6948                                 bytes -= stripe_len;
6949                                 offset += stripe_len;
6950                         } else if (logical[nr] < offset) {
6951                                 if (logical[nr] + stripe_len >=
6952                                     offset + bytes) {
6953                                         free(logical);
6954                                         return 0;
6955                                 }
6956                                 bytes = (offset + bytes) -
6957                                         (logical[nr] + stripe_len);
6958                                 offset = logical[nr] + stripe_len;
6959                         } else {
6960                                 /*
6961                                  * Could be tricky, the super may land in the
6962                                  * middle of the area we're checking.  First
6963                                  * check the easiest case, it's at the end.
6964                                  */
6965                                 if (logical[nr] + stripe_len >=
6966                                     bytes + offset) {
6967                                         bytes = logical[nr] - offset;
6968                                         continue;
6969                                 }
6970
6971                                 /* Check the left side */
6972                                 ret = check_cache_range(root, cache,
6973                                                         offset,
6974                                                         logical[nr] - offset);
6975                                 if (ret) {
6976                                         free(logical);
6977                                         return ret;
6978                                 }
6979
6980                                 /* Now we continue with the right side */
6981                                 bytes = (offset + bytes) -
6982                                         (logical[nr] + stripe_len);
6983                                 offset = logical[nr] + stripe_len;
6984                         }
6985                 }
6986
6987                 free(logical);
6988         }
6989
6990         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6991         if (!entry) {
6992                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6993                         offset, offset+bytes);
6994                 return -EINVAL;
6995         }
6996
6997         if (entry->offset != offset) {
6998                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6999                         entry->offset);
7000                 return -EINVAL;
7001         }
7002
7003         if (entry->bytes != bytes) {
7004                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7005                         bytes, entry->bytes, offset);
7006                 return -EINVAL;
7007         }
7008
7009         unlink_free_space(cache->free_space_ctl, entry);
7010         free(entry);
7011         return 0;
7012 }
7013
7014 static int verify_space_cache(struct btrfs_root *root,
7015                               struct btrfs_block_group_cache *cache)
7016 {
7017         struct btrfs_path path;
7018         struct extent_buffer *leaf;
7019         struct btrfs_key key;
7020         u64 last;
7021         int ret = 0;
7022
7023         root = root->fs_info->extent_root;
7024
7025         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7026
7027         btrfs_init_path(&path);
7028         key.objectid = last;
7029         key.offset = 0;
7030         key.type = BTRFS_EXTENT_ITEM_KEY;
7031         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7032         if (ret < 0)
7033                 goto out;
7034         ret = 0;
7035         while (1) {
7036                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7037                         ret = btrfs_next_leaf(root, &path);
7038                         if (ret < 0)
7039                                 goto out;
7040                         if (ret > 0) {
7041                                 ret = 0;
7042                                 break;
7043                         }
7044                 }
7045                 leaf = path.nodes[0];
7046                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7047                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7048                         break;
7049                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7050                     key.type != BTRFS_METADATA_ITEM_KEY) {
7051                         path.slots[0]++;
7052                         continue;
7053                 }
7054
7055                 if (last == key.objectid) {
7056                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7057                                 last = key.objectid + key.offset;
7058                         else
7059                                 last = key.objectid + root->fs_info->nodesize;
7060                         path.slots[0]++;
7061                         continue;
7062                 }
7063
7064                 ret = check_cache_range(root, cache, last,
7065                                         key.objectid - last);
7066                 if (ret)
7067                         break;
7068                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7069                         last = key.objectid + key.offset;
7070                 else
7071                         last = key.objectid + root->fs_info->nodesize;
7072                 path.slots[0]++;
7073         }
7074
7075         if (last < cache->key.objectid + cache->key.offset)
7076                 ret = check_cache_range(root, cache, last,
7077                                         cache->key.objectid +
7078                                         cache->key.offset - last);
7079
7080 out:
7081         btrfs_release_path(&path);
7082
7083         if (!ret &&
7084             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7085                 fprintf(stderr, "There are still entries left in the space "
7086                         "cache\n");
7087                 ret = -EINVAL;
7088         }
7089
7090         return ret;
7091 }
7092
7093 static int check_space_cache(struct btrfs_root *root)
7094 {
7095         struct btrfs_block_group_cache *cache;
7096         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7097         int ret;
7098         int error = 0;
7099
7100         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7101             btrfs_super_generation(root->fs_info->super_copy) !=
7102             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7103                 printf("cache and super generation don't match, space cache "
7104                        "will be invalidated\n");
7105                 return 0;
7106         }
7107
7108         if (ctx.progress_enabled) {
7109                 ctx.tp = TASK_FREE_SPACE;
7110                 task_start(ctx.info);
7111         }
7112
7113         while (1) {
7114                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7115                 if (!cache)
7116                         break;
7117
7118                 start = cache->key.objectid + cache->key.offset;
7119                 if (!cache->free_space_ctl) {
7120                         if (btrfs_init_free_space_ctl(cache,
7121                                                 root->fs_info->sectorsize)) {
7122                                 ret = -ENOMEM;
7123                                 break;
7124                         }
7125                 } else {
7126                         btrfs_remove_free_space_cache(cache);
7127                 }
7128
7129                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7130                         ret = exclude_super_stripes(root, cache);
7131                         if (ret) {
7132                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7133                                         strerror(-ret));
7134                                 error++;
7135                                 continue;
7136                         }
7137                         ret = load_free_space_tree(root->fs_info, cache);
7138                         free_excluded_extents(root, cache);
7139                         if (ret < 0) {
7140                                 fprintf(stderr, "could not load free space tree: %s\n",
7141                                         strerror(-ret));
7142                                 error++;
7143                                 continue;
7144                         }
7145                         error += ret;
7146                 } else {
7147                         ret = load_free_space_cache(root->fs_info, cache);
7148                         if (!ret)
7149                                 continue;
7150                 }
7151
7152                 ret = verify_space_cache(root, cache);
7153                 if (ret) {
7154                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7155                                 cache->key.objectid);
7156                         error++;
7157                 }
7158         }
7159
7160         task_stop(ctx.info);
7161
7162         return error ? -EINVAL : 0;
7163 }
7164
7165 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7166                         u64 num_bytes, unsigned long leaf_offset,
7167                         struct extent_buffer *eb) {
7168
7169         struct btrfs_fs_info *fs_info = root->fs_info;
7170         u64 offset = 0;
7171         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7172         char *data;
7173         unsigned long csum_offset;
7174         u32 csum;
7175         u32 csum_expected;
7176         u64 read_len;
7177         u64 data_checked = 0;
7178         u64 tmp;
7179         int ret = 0;
7180         int mirror;
7181         int num_copies;
7182
7183         if (num_bytes % fs_info->sectorsize)
7184                 return -EINVAL;
7185
7186         data = malloc(num_bytes);
7187         if (!data)
7188                 return -ENOMEM;
7189
7190         while (offset < num_bytes) {
7191                 mirror = 0;
7192 again:
7193                 read_len = num_bytes - offset;
7194                 /* read as much space once a time */
7195                 ret = read_extent_data(fs_info, data + offset,
7196                                 bytenr + offset, &read_len, mirror);
7197                 if (ret)
7198                         goto out;
7199                 data_checked = 0;
7200                 /* verify every 4k data's checksum */
7201                 while (data_checked < read_len) {
7202                         csum = ~(u32)0;
7203                         tmp = offset + data_checked;
7204
7205                         csum = btrfs_csum_data((char *)data + tmp,
7206                                                csum, fs_info->sectorsize);
7207                         btrfs_csum_final(csum, (u8 *)&csum);
7208
7209                         csum_offset = leaf_offset +
7210                                  tmp / fs_info->sectorsize * csum_size;
7211                         read_extent_buffer(eb, (char *)&csum_expected,
7212                                            csum_offset, csum_size);
7213                         /* try another mirror */
7214                         if (csum != csum_expected) {
7215                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7216                                                 mirror, bytenr + tmp,
7217                                                 csum, csum_expected);
7218                                 num_copies = btrfs_num_copies(root->fs_info,
7219                                                 bytenr, num_bytes);
7220                                 if (mirror < num_copies - 1) {
7221                                         mirror += 1;
7222                                         goto again;
7223                                 }
7224                         }
7225                         data_checked += fs_info->sectorsize;
7226                 }
7227                 offset += read_len;
7228         }
7229 out:
7230         free(data);
7231         return ret;
7232 }
7233
7234 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7235                                u64 num_bytes)
7236 {
7237         struct btrfs_path path;
7238         struct extent_buffer *leaf;
7239         struct btrfs_key key;
7240         int ret;
7241
7242         btrfs_init_path(&path);
7243         key.objectid = bytenr;
7244         key.type = BTRFS_EXTENT_ITEM_KEY;
7245         key.offset = (u64)-1;
7246
7247 again:
7248         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7249                                 0, 0);
7250         if (ret < 0) {
7251                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7252                 btrfs_release_path(&path);
7253                 return ret;
7254         } else if (ret) {
7255                 if (path.slots[0] > 0) {
7256                         path.slots[0]--;
7257                 } else {
7258                         ret = btrfs_prev_leaf(root, &path);
7259                         if (ret < 0) {
7260                                 goto out;
7261                         } else if (ret > 0) {
7262                                 ret = 0;
7263                                 goto out;
7264                         }
7265                 }
7266         }
7267
7268         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7269
7270         /*
7271          * Block group items come before extent items if they have the same
7272          * bytenr, so walk back one more just in case.  Dear future traveller,
7273          * first congrats on mastering time travel.  Now if it's not too much
7274          * trouble could you go back to 2006 and tell Chris to make the
7275          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7276          * EXTENT_ITEM_KEY please?
7277          */
7278         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7279                 if (path.slots[0] > 0) {
7280                         path.slots[0]--;
7281                 } else {
7282                         ret = btrfs_prev_leaf(root, &path);
7283                         if (ret < 0) {
7284                                 goto out;
7285                         } else if (ret > 0) {
7286                                 ret = 0;
7287                                 goto out;
7288                         }
7289                 }
7290                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7291         }
7292
7293         while (num_bytes) {
7294                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7295                         ret = btrfs_next_leaf(root, &path);
7296                         if (ret < 0) {
7297                                 fprintf(stderr, "Error going to next leaf "
7298                                         "%d\n", ret);
7299                                 btrfs_release_path(&path);
7300                                 return ret;
7301                         } else if (ret) {
7302                                 break;
7303                         }
7304                 }
7305                 leaf = path.nodes[0];
7306                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7307                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7308                         path.slots[0]++;
7309                         continue;
7310                 }
7311                 if (key.objectid + key.offset < bytenr) {
7312                         path.slots[0]++;
7313                         continue;
7314                 }
7315                 if (key.objectid > bytenr + num_bytes)
7316                         break;
7317
7318                 if (key.objectid == bytenr) {
7319                         if (key.offset >= num_bytes) {
7320                                 num_bytes = 0;
7321                                 break;
7322                         }
7323                         num_bytes -= key.offset;
7324                         bytenr += key.offset;
7325                 } else if (key.objectid < bytenr) {
7326                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7327                                 num_bytes = 0;
7328                                 break;
7329                         }
7330                         num_bytes = (bytenr + num_bytes) -
7331                                 (key.objectid + key.offset);
7332                         bytenr = key.objectid + key.offset;
7333                 } else {
7334                         if (key.objectid + key.offset < bytenr + num_bytes) {
7335                                 u64 new_start = key.objectid + key.offset;
7336                                 u64 new_bytes = bytenr + num_bytes - new_start;
7337
7338                                 /*
7339                                  * Weird case, the extent is in the middle of
7340                                  * our range, we'll have to search one side
7341                                  * and then the other.  Not sure if this happens
7342                                  * in real life, but no harm in coding it up
7343                                  * anyway just in case.
7344                                  */
7345                                 btrfs_release_path(&path);
7346                                 ret = check_extent_exists(root, new_start,
7347                                                           new_bytes);
7348                                 if (ret) {
7349                                         fprintf(stderr, "Right section didn't "
7350                                                 "have a record\n");
7351                                         break;
7352                                 }
7353                                 num_bytes = key.objectid - bytenr;
7354                                 goto again;
7355                         }
7356                         num_bytes = key.objectid - bytenr;
7357                 }
7358                 path.slots[0]++;
7359         }
7360         ret = 0;
7361
7362 out:
7363         if (num_bytes && !ret) {
7364                 fprintf(stderr, "There are no extents for csum range "
7365                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7366                 ret = 1;
7367         }
7368
7369         btrfs_release_path(&path);
7370         return ret;
7371 }
7372
7373 static int check_csums(struct btrfs_root *root)
7374 {
7375         struct btrfs_path path;
7376         struct extent_buffer *leaf;
7377         struct btrfs_key key;
7378         u64 offset = 0, num_bytes = 0;
7379         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7380         int errors = 0;
7381         int ret;
7382         u64 data_len;
7383         unsigned long leaf_offset;
7384
7385         root = root->fs_info->csum_root;
7386         if (!extent_buffer_uptodate(root->node)) {
7387                 fprintf(stderr, "No valid csum tree found\n");
7388                 return -ENOENT;
7389         }
7390
7391         btrfs_init_path(&path);
7392         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7393         key.type = BTRFS_EXTENT_CSUM_KEY;
7394         key.offset = 0;
7395         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7396         if (ret < 0) {
7397                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7398                 btrfs_release_path(&path);
7399                 return ret;
7400         }
7401
7402         if (ret > 0 && path.slots[0])
7403                 path.slots[0]--;
7404         ret = 0;
7405
7406         while (1) {
7407                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7408                         ret = btrfs_next_leaf(root, &path);
7409                         if (ret < 0) {
7410                                 fprintf(stderr, "Error going to next leaf "
7411                                         "%d\n", ret);
7412                                 break;
7413                         }
7414                         if (ret)
7415                                 break;
7416                 }
7417                 leaf = path.nodes[0];
7418
7419                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7420                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7421                         path.slots[0]++;
7422                         continue;
7423                 }
7424
7425                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7426                               csum_size) * root->fs_info->sectorsize;
7427                 if (!check_data_csum)
7428                         goto skip_csum_check;
7429                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7430                 ret = check_extent_csums(root, key.offset, data_len,
7431                                          leaf_offset, leaf);
7432                 if (ret)
7433                         break;
7434 skip_csum_check:
7435                 if (!num_bytes) {
7436                         offset = key.offset;
7437                 } else if (key.offset != offset + num_bytes) {
7438                         ret = check_extent_exists(root, offset, num_bytes);
7439                         if (ret) {
7440                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7441                                         "there is no extent record\n",
7442                                         offset, offset+num_bytes);
7443                                 errors++;
7444                         }
7445                         offset = key.offset;
7446                         num_bytes = 0;
7447                 }
7448                 num_bytes += data_len;
7449                 path.slots[0]++;
7450         }
7451
7452         btrfs_release_path(&path);
7453         return errors;
7454 }
7455
7456 static int is_dropped_key(struct btrfs_key *key,
7457                           struct btrfs_key *drop_key) {
7458         if (key->objectid < drop_key->objectid)
7459                 return 1;
7460         else if (key->objectid == drop_key->objectid) {
7461                 if (key->type < drop_key->type)
7462                         return 1;
7463                 else if (key->type == drop_key->type) {
7464                         if (key->offset < drop_key->offset)
7465                                 return 1;
7466                 }
7467         }
7468         return 0;
7469 }
7470
7471 /*
7472  * Here are the rules for FULL_BACKREF.
7473  *
7474  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7475  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7476  *      FULL_BACKREF set.
7477  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7478  *    if it happened after the relocation occurred since we'll have dropped the
7479  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7480  *    have no real way to know for sure.
7481  *
7482  * We process the blocks one root at a time, and we start from the lowest root
7483  * objectid and go to the highest.  So we can just lookup the owner backref for
7484  * the record and if we don't find it then we know it doesn't exist and we have
7485  * a FULL BACKREF.
7486  *
7487  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7488  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7489  * be set or not and then we can check later once we've gathered all the refs.
7490  */
7491 static int calc_extent_flag(struct cache_tree *extent_cache,
7492                            struct extent_buffer *buf,
7493                            struct root_item_record *ri,
7494                            u64 *flags)
7495 {
7496         struct extent_record *rec;
7497         struct cache_extent *cache;
7498         struct tree_backref *tback;
7499         u64 owner = 0;
7500
7501         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7502         /* we have added this extent before */
7503         if (!cache)
7504                 return -ENOENT;
7505
7506         rec = container_of(cache, struct extent_record, cache);
7507
7508         /*
7509          * Except file/reloc tree, we can not have
7510          * FULL BACKREF MODE
7511          */
7512         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7513                 goto normal;
7514         /*
7515          * root node
7516          */
7517         if (buf->start == ri->bytenr)
7518                 goto normal;
7519
7520         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7521                 goto full_backref;
7522
7523         owner = btrfs_header_owner(buf);
7524         if (owner == ri->objectid)
7525                 goto normal;
7526
7527         tback = find_tree_backref(rec, 0, owner);
7528         if (!tback)
7529                 goto full_backref;
7530 normal:
7531         *flags = 0;
7532         if (rec->flag_block_full_backref != FLAG_UNSET &&
7533             rec->flag_block_full_backref != 0)
7534                 rec->bad_full_backref = 1;
7535         return 0;
7536 full_backref:
7537         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7538         if (rec->flag_block_full_backref != FLAG_UNSET &&
7539             rec->flag_block_full_backref != 1)
7540                 rec->bad_full_backref = 1;
7541         return 0;
7542 }
7543
7544 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7545 {
7546         fprintf(stderr, "Invalid key type(");
7547         print_key_type(stderr, 0, key_type);
7548         fprintf(stderr, ") found in root(");
7549         print_objectid(stderr, rootid, 0);
7550         fprintf(stderr, ")\n");
7551 }
7552
7553 /*
7554  * Check if the key is valid with its extent buffer.
7555  *
7556  * This is a early check in case invalid key exists in a extent buffer
7557  * This is not comprehensive yet, but should prevent wrong key/item passed
7558  * further
7559  */
7560 static int check_type_with_root(u64 rootid, u8 key_type)
7561 {
7562         switch (key_type) {
7563         /* Only valid in chunk tree */
7564         case BTRFS_DEV_ITEM_KEY:
7565         case BTRFS_CHUNK_ITEM_KEY:
7566                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7567                         goto err;
7568                 break;
7569         /* valid in csum and log tree */
7570         case BTRFS_CSUM_TREE_OBJECTID:
7571                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7572                       is_fstree(rootid)))
7573                         goto err;
7574                 break;
7575         case BTRFS_EXTENT_ITEM_KEY:
7576         case BTRFS_METADATA_ITEM_KEY:
7577         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7578                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7579                         goto err;
7580                 break;
7581         case BTRFS_ROOT_ITEM_KEY:
7582                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7583                         goto err;
7584                 break;
7585         case BTRFS_DEV_EXTENT_KEY:
7586                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7587                         goto err;
7588                 break;
7589         }
7590         return 0;
7591 err:
7592         report_mismatch_key_root(key_type, rootid);
7593         return -EINVAL;
7594 }
7595
7596 static int run_next_block(struct btrfs_root *root,
7597                           struct block_info *bits,
7598                           int bits_nr,
7599                           u64 *last,
7600                           struct cache_tree *pending,
7601                           struct cache_tree *seen,
7602                           struct cache_tree *reada,
7603                           struct cache_tree *nodes,
7604                           struct cache_tree *extent_cache,
7605                           struct cache_tree *chunk_cache,
7606                           struct rb_root *dev_cache,
7607                           struct block_group_tree *block_group_cache,
7608                           struct device_extent_tree *dev_extent_cache,
7609                           struct root_item_record *ri)
7610 {
7611         struct extent_buffer *buf;
7612         struct extent_record *rec = NULL;
7613         u64 bytenr;
7614         u32 size;
7615         u64 parent;
7616         u64 owner;
7617         u64 flags;
7618         u64 ptr;
7619         u64 gen = 0;
7620         int ret = 0;
7621         int i;
7622         int nritems;
7623         struct btrfs_key key;
7624         struct cache_extent *cache;
7625         int reada_bits;
7626
7627         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7628                                     bits_nr, &reada_bits);
7629         if (nritems == 0)
7630                 return 1;
7631
7632         if (!reada_bits) {
7633                 for(i = 0; i < nritems; i++) {
7634                         ret = add_cache_extent(reada, bits[i].start,
7635                                                bits[i].size);
7636                         if (ret == -EEXIST)
7637                                 continue;
7638
7639                         /* fixme, get the parent transid */
7640                         readahead_tree_block(root, bits[i].start,
7641                                              bits[i].size, 0);
7642                 }
7643         }
7644         *last = bits[0].start;
7645         bytenr = bits[0].start;
7646         size = bits[0].size;
7647
7648         cache = lookup_cache_extent(pending, bytenr, size);
7649         if (cache) {
7650                 remove_cache_extent(pending, cache);
7651                 free(cache);
7652         }
7653         cache = lookup_cache_extent(reada, bytenr, size);
7654         if (cache) {
7655                 remove_cache_extent(reada, cache);
7656                 free(cache);
7657         }
7658         cache = lookup_cache_extent(nodes, bytenr, size);
7659         if (cache) {
7660                 remove_cache_extent(nodes, cache);
7661                 free(cache);
7662         }
7663         cache = lookup_cache_extent(extent_cache, bytenr, size);
7664         if (cache) {
7665                 rec = container_of(cache, struct extent_record, cache);
7666                 gen = rec->parent_generation;
7667         }
7668
7669         /* fixme, get the real parent transid */
7670         buf = read_tree_block(root->fs_info, bytenr, size, gen);
7671         if (!extent_buffer_uptodate(buf)) {
7672                 record_bad_block_io(root->fs_info,
7673                                     extent_cache, bytenr, size);
7674                 goto out;
7675         }
7676
7677         nritems = btrfs_header_nritems(buf);
7678
7679         flags = 0;
7680         if (!init_extent_tree) {
7681                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7682                                        btrfs_header_level(buf), 1, NULL,
7683                                        &flags);
7684                 if (ret < 0) {
7685                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7686                         if (ret < 0) {
7687                                 fprintf(stderr, "Couldn't calc extent flags\n");
7688                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7689                         }
7690                 }
7691         } else {
7692                 flags = 0;
7693                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7694                 if (ret < 0) {
7695                         fprintf(stderr, "Couldn't calc extent flags\n");
7696                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7697                 }
7698         }
7699
7700         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7701                 if (ri != NULL &&
7702                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7703                     ri->objectid == btrfs_header_owner(buf)) {
7704                         /*
7705                          * Ok we got to this block from it's original owner and
7706                          * we have FULL_BACKREF set.  Relocation can leave
7707                          * converted blocks over so this is altogether possible,
7708                          * however it's not possible if the generation > the
7709                          * last snapshot, so check for this case.
7710                          */
7711                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7712                             btrfs_header_generation(buf) > ri->last_snapshot) {
7713                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7714                                 rec->bad_full_backref = 1;
7715                         }
7716                 }
7717         } else {
7718                 if (ri != NULL &&
7719                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7720                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7721                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7722                         rec->bad_full_backref = 1;
7723                 }
7724         }
7725
7726         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7727                 rec->flag_block_full_backref = 1;
7728                 parent = bytenr;
7729                 owner = 0;
7730         } else {
7731                 rec->flag_block_full_backref = 0;
7732                 parent = 0;
7733                 owner = btrfs_header_owner(buf);
7734         }
7735
7736         ret = check_block(root, extent_cache, buf, flags);
7737         if (ret)
7738                 goto out;
7739
7740         if (btrfs_is_leaf(buf)) {
7741                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7742                 for (i = 0; i < nritems; i++) {
7743                         struct btrfs_file_extent_item *fi;
7744                         btrfs_item_key_to_cpu(buf, &key, i);
7745                         /*
7746                          * Check key type against the leaf owner.
7747                          * Could filter quite a lot of early error if
7748                          * owner is correct
7749                          */
7750                         if (check_type_with_root(btrfs_header_owner(buf),
7751                                                  key.type)) {
7752                                 fprintf(stderr, "ignoring invalid key\n");
7753                                 continue;
7754                         }
7755                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7756                                 process_extent_item(root, extent_cache, buf,
7757                                                     i);
7758                                 continue;
7759                         }
7760                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7761                                 process_extent_item(root, extent_cache, buf,
7762                                                     i);
7763                                 continue;
7764                         }
7765                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7766                                 total_csum_bytes +=
7767                                         btrfs_item_size_nr(buf, i);
7768                                 continue;
7769                         }
7770                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7771                                 process_chunk_item(chunk_cache, &key, buf, i);
7772                                 continue;
7773                         }
7774                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7775                                 process_device_item(dev_cache, &key, buf, i);
7776                                 continue;
7777                         }
7778                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7779                                 process_block_group_item(block_group_cache,
7780                                         &key, buf, i);
7781                                 continue;
7782                         }
7783                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7784                                 process_device_extent_item(dev_extent_cache,
7785                                         &key, buf, i);
7786                                 continue;
7787
7788                         }
7789                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7790 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7791                                 process_extent_ref_v0(extent_cache, buf, i);
7792 #else
7793                                 BUG();
7794 #endif
7795                                 continue;
7796                         }
7797
7798                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7799                                 ret = add_tree_backref(extent_cache,
7800                                                 key.objectid, 0, key.offset, 0);
7801                                 if (ret < 0)
7802                                         error(
7803                                 "add_tree_backref failed (leaf tree block): %s",
7804                                               strerror(-ret));
7805                                 continue;
7806                         }
7807                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7808                                 ret = add_tree_backref(extent_cache,
7809                                                 key.objectid, key.offset, 0, 0);
7810                                 if (ret < 0)
7811                                         error(
7812                                 "add_tree_backref failed (leaf shared block): %s",
7813                                               strerror(-ret));
7814                                 continue;
7815                         }
7816                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7817                                 struct btrfs_extent_data_ref *ref;
7818                                 ref = btrfs_item_ptr(buf, i,
7819                                                 struct btrfs_extent_data_ref);
7820                                 add_data_backref(extent_cache,
7821                                         key.objectid, 0,
7822                                         btrfs_extent_data_ref_root(buf, ref),
7823                                         btrfs_extent_data_ref_objectid(buf,
7824                                                                        ref),
7825                                         btrfs_extent_data_ref_offset(buf, ref),
7826                                         btrfs_extent_data_ref_count(buf, ref),
7827                                         0, root->fs_info->sectorsize);
7828                                 continue;
7829                         }
7830                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7831                                 struct btrfs_shared_data_ref *ref;
7832                                 ref = btrfs_item_ptr(buf, i,
7833                                                 struct btrfs_shared_data_ref);
7834                                 add_data_backref(extent_cache,
7835                                         key.objectid, key.offset, 0, 0, 0,
7836                                         btrfs_shared_data_ref_count(buf, ref),
7837                                         0, root->fs_info->sectorsize);
7838                                 continue;
7839                         }
7840                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7841                                 struct bad_item *bad;
7842
7843                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7844                                         continue;
7845                                 if (!owner)
7846                                         continue;
7847                                 bad = malloc(sizeof(struct bad_item));
7848                                 if (!bad)
7849                                         continue;
7850                                 INIT_LIST_HEAD(&bad->list);
7851                                 memcpy(&bad->key, &key,
7852                                        sizeof(struct btrfs_key));
7853                                 bad->root_id = owner;
7854                                 list_add_tail(&bad->list, &delete_items);
7855                                 continue;
7856                         }
7857                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7858                                 continue;
7859                         fi = btrfs_item_ptr(buf, i,
7860                                             struct btrfs_file_extent_item);
7861                         if (btrfs_file_extent_type(buf, fi) ==
7862                             BTRFS_FILE_EXTENT_INLINE)
7863                                 continue;
7864                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7865                                 continue;
7866
7867                         data_bytes_allocated +=
7868                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7869                         if (data_bytes_allocated < root->fs_info->sectorsize) {
7870                                 abort();
7871                         }
7872                         data_bytes_referenced +=
7873                                 btrfs_file_extent_num_bytes(buf, fi);
7874                         add_data_backref(extent_cache,
7875                                 btrfs_file_extent_disk_bytenr(buf, fi),
7876                                 parent, owner, key.objectid, key.offset -
7877                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7878                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7879                 }
7880         } else {
7881                 int level;
7882                 struct btrfs_key first_key;
7883
7884                 first_key.objectid = 0;
7885
7886                 if (nritems > 0)
7887                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7888                 level = btrfs_header_level(buf);
7889                 for (i = 0; i < nritems; i++) {
7890                         struct extent_record tmpl;
7891
7892                         ptr = btrfs_node_blockptr(buf, i);
7893                         size = root->fs_info->nodesize;
7894                         btrfs_node_key_to_cpu(buf, &key, i);
7895                         if (ri != NULL) {
7896                                 if ((level == ri->drop_level)
7897                                     && is_dropped_key(&key, &ri->drop_key)) {
7898                                         continue;
7899                                 }
7900                         }
7901
7902                         memset(&tmpl, 0, sizeof(tmpl));
7903                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7904                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7905                         tmpl.start = ptr;
7906                         tmpl.nr = size;
7907                         tmpl.refs = 1;
7908                         tmpl.metadata = 1;
7909                         tmpl.max_size = size;
7910                         ret = add_extent_rec(extent_cache, &tmpl);
7911                         if (ret < 0)
7912                                 goto out;
7913
7914                         ret = add_tree_backref(extent_cache, ptr, parent,
7915                                         owner, 1);
7916                         if (ret < 0) {
7917                                 error(
7918                                 "add_tree_backref failed (non-leaf block): %s",
7919                                       strerror(-ret));
7920                                 continue;
7921                         }
7922
7923                         if (level > 1) {
7924                                 add_pending(nodes, seen, ptr, size);
7925                         } else {
7926                                 add_pending(pending, seen, ptr, size);
7927                         }
7928                 }
7929                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7930                                       nritems) * sizeof(struct btrfs_key_ptr);
7931         }
7932         total_btree_bytes += buf->len;
7933         if (fs_root_objectid(btrfs_header_owner(buf)))
7934                 total_fs_tree_bytes += buf->len;
7935         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7936                 total_extent_tree_bytes += buf->len;
7937         if (!found_old_backref &&
7938             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7939             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7940             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7941                 found_old_backref = 1;
7942 out:
7943         free_extent_buffer(buf);
7944         return ret;
7945 }
7946
7947 static int add_root_to_pending(struct extent_buffer *buf,
7948                                struct cache_tree *extent_cache,
7949                                struct cache_tree *pending,
7950                                struct cache_tree *seen,
7951                                struct cache_tree *nodes,
7952                                u64 objectid)
7953 {
7954         struct extent_record tmpl;
7955         int ret;
7956
7957         if (btrfs_header_level(buf) > 0)
7958                 add_pending(nodes, seen, buf->start, buf->len);
7959         else
7960                 add_pending(pending, seen, buf->start, buf->len);
7961
7962         memset(&tmpl, 0, sizeof(tmpl));
7963         tmpl.start = buf->start;
7964         tmpl.nr = buf->len;
7965         tmpl.is_root = 1;
7966         tmpl.refs = 1;
7967         tmpl.metadata = 1;
7968         tmpl.max_size = buf->len;
7969         add_extent_rec(extent_cache, &tmpl);
7970
7971         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7972             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7973                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7974                                 0, 1);
7975         else
7976                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7977                                 1);
7978         return ret;
7979 }
7980
7981 /* as we fix the tree, we might be deleting blocks that
7982  * we're tracking for repair.  This hook makes sure we
7983  * remove any backrefs for blocks as we are fixing them.
7984  */
7985 static int free_extent_hook(struct btrfs_trans_handle *trans,
7986                             struct btrfs_root *root,
7987                             u64 bytenr, u64 num_bytes, u64 parent,
7988                             u64 root_objectid, u64 owner, u64 offset,
7989                             int refs_to_drop)
7990 {
7991         struct extent_record *rec;
7992         struct cache_extent *cache;
7993         int is_data;
7994         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7995
7996         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7997         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7998         if (!cache)
7999                 return 0;
8000
8001         rec = container_of(cache, struct extent_record, cache);
8002         if (is_data) {
8003                 struct data_backref *back;
8004                 back = find_data_backref(rec, parent, root_objectid, owner,
8005                                          offset, 1, bytenr, num_bytes);
8006                 if (!back)
8007                         goto out;
8008                 if (back->node.found_ref) {
8009                         back->found_ref -= refs_to_drop;
8010                         if (rec->refs)
8011                                 rec->refs -= refs_to_drop;
8012                 }
8013                 if (back->node.found_extent_tree) {
8014                         back->num_refs -= refs_to_drop;
8015                         if (rec->extent_item_refs)
8016                                 rec->extent_item_refs -= refs_to_drop;
8017                 }
8018                 if (back->found_ref == 0)
8019                         back->node.found_ref = 0;
8020                 if (back->num_refs == 0)
8021                         back->node.found_extent_tree = 0;
8022
8023                 if (!back->node.found_extent_tree && back->node.found_ref) {
8024                         list_del(&back->node.list);
8025                         free(back);
8026                 }
8027         } else {
8028                 struct tree_backref *back;
8029                 back = find_tree_backref(rec, parent, root_objectid);
8030                 if (!back)
8031                         goto out;
8032                 if (back->node.found_ref) {
8033                         if (rec->refs)
8034                                 rec->refs--;
8035                         back->node.found_ref = 0;
8036                 }
8037                 if (back->node.found_extent_tree) {
8038                         if (rec->extent_item_refs)
8039                                 rec->extent_item_refs--;
8040                         back->node.found_extent_tree = 0;
8041                 }
8042                 if (!back->node.found_extent_tree && back->node.found_ref) {
8043                         list_del(&back->node.list);
8044                         free(back);
8045                 }
8046         }
8047         maybe_free_extent_rec(extent_cache, rec);
8048 out:
8049         return 0;
8050 }
8051
8052 static int delete_extent_records(struct btrfs_trans_handle *trans,
8053                                  struct btrfs_root *root,
8054                                  struct btrfs_path *path,
8055                                  u64 bytenr)
8056 {
8057         struct btrfs_key key;
8058         struct btrfs_key found_key;
8059         struct extent_buffer *leaf;
8060         int ret;
8061         int slot;
8062
8063
8064         key.objectid = bytenr;
8065         key.type = (u8)-1;
8066         key.offset = (u64)-1;
8067
8068         while(1) {
8069                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8070                                         &key, path, 0, 1);
8071                 if (ret < 0)
8072                         break;
8073
8074                 if (ret > 0) {
8075                         ret = 0;
8076                         if (path->slots[0] == 0)
8077                                 break;
8078                         path->slots[0]--;
8079                 }
8080                 ret = 0;
8081
8082                 leaf = path->nodes[0];
8083                 slot = path->slots[0];
8084
8085                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8086                 if (found_key.objectid != bytenr)
8087                         break;
8088
8089                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8090                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8091                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8092                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8093                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8094                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8095                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8096                         btrfs_release_path(path);
8097                         if (found_key.type == 0) {
8098                                 if (found_key.offset == 0)
8099                                         break;
8100                                 key.offset = found_key.offset - 1;
8101                                 key.type = found_key.type;
8102                         }
8103                         key.type = found_key.type - 1;
8104                         key.offset = (u64)-1;
8105                         continue;
8106                 }
8107
8108                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8109                         found_key.objectid, found_key.type, found_key.offset);
8110
8111                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8112                 if (ret)
8113                         break;
8114                 btrfs_release_path(path);
8115
8116                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8117                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8118                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8119                                 found_key.offset : root->fs_info->nodesize;
8120
8121                         ret = btrfs_update_block_group(trans, root, bytenr,
8122                                                        bytes, 0, 0);
8123                         if (ret)
8124                                 break;
8125                 }
8126         }
8127
8128         btrfs_release_path(path);
8129         return ret;
8130 }
8131
8132 /*
8133  * for a single backref, this will allocate a new extent
8134  * and add the backref to it.
8135  */
8136 static int record_extent(struct btrfs_trans_handle *trans,
8137                          struct btrfs_fs_info *info,
8138                          struct btrfs_path *path,
8139                          struct extent_record *rec,
8140                          struct extent_backref *back,
8141                          int allocated, u64 flags)
8142 {
8143         int ret = 0;
8144         struct btrfs_root *extent_root = info->extent_root;
8145         struct extent_buffer *leaf;
8146         struct btrfs_key ins_key;
8147         struct btrfs_extent_item *ei;
8148         struct data_backref *dback;
8149         struct btrfs_tree_block_info *bi;
8150
8151         if (!back->is_data)
8152                 rec->max_size = max_t(u64, rec->max_size,
8153                                     info->nodesize);
8154
8155         if (!allocated) {
8156                 u32 item_size = sizeof(*ei);
8157
8158                 if (!back->is_data)
8159                         item_size += sizeof(*bi);
8160
8161                 ins_key.objectid = rec->start;
8162                 ins_key.offset = rec->max_size;
8163                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8164
8165                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8166                                         &ins_key, item_size);
8167                 if (ret)
8168                         goto fail;
8169
8170                 leaf = path->nodes[0];
8171                 ei = btrfs_item_ptr(leaf, path->slots[0],
8172                                     struct btrfs_extent_item);
8173
8174                 btrfs_set_extent_refs(leaf, ei, 0);
8175                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8176
8177                 if (back->is_data) {
8178                         btrfs_set_extent_flags(leaf, ei,
8179                                                BTRFS_EXTENT_FLAG_DATA);
8180                 } else {
8181                         struct btrfs_disk_key copy_key;;
8182
8183                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8184                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8185                                              sizeof(*bi));
8186
8187                         btrfs_set_disk_key_objectid(&copy_key,
8188                                                     rec->info_objectid);
8189                         btrfs_set_disk_key_type(&copy_key, 0);
8190                         btrfs_set_disk_key_offset(&copy_key, 0);
8191
8192                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8193                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8194
8195                         btrfs_set_extent_flags(leaf, ei,
8196                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8197                 }
8198
8199                 btrfs_mark_buffer_dirty(leaf);
8200                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8201                                                rec->max_size, 1, 0);
8202                 if (ret)
8203                         goto fail;
8204                 btrfs_release_path(path);
8205         }
8206
8207         if (back->is_data) {
8208                 u64 parent;
8209                 int i;
8210
8211                 dback = to_data_backref(back);
8212                 if (back->full_backref)
8213                         parent = dback->parent;
8214                 else
8215                         parent = 0;
8216
8217                 for (i = 0; i < dback->found_ref; i++) {
8218                         /* if parent != 0, we're doing a full backref
8219                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8220                          * just makes the backref allocator create a data
8221                          * backref
8222                          */
8223                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8224                                                    rec->start, rec->max_size,
8225                                                    parent,
8226                                                    dback->root,
8227                                                    parent ?
8228                                                    BTRFS_FIRST_FREE_OBJECTID :
8229                                                    dback->owner,
8230                                                    dback->offset);
8231                         if (ret)
8232                                 break;
8233                 }
8234                 fprintf(stderr, "adding new data backref"
8235                                 " on %llu %s %llu owner %llu"
8236                                 " offset %llu found %d\n",
8237                                 (unsigned long long)rec->start,
8238                                 back->full_backref ?
8239                                 "parent" : "root",
8240                                 back->full_backref ?
8241                                 (unsigned long long)parent :
8242                                 (unsigned long long)dback->root,
8243                                 (unsigned long long)dback->owner,
8244                                 (unsigned long long)dback->offset,
8245                                 dback->found_ref);
8246         } else {
8247                 u64 parent;
8248                 struct tree_backref *tback;
8249
8250                 tback = to_tree_backref(back);
8251                 if (back->full_backref)
8252                         parent = tback->parent;
8253                 else
8254                         parent = 0;
8255
8256                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8257                                            rec->start, rec->max_size,
8258                                            parent, tback->root, 0, 0);
8259                 fprintf(stderr, "adding new tree backref on "
8260                         "start %llu len %llu parent %llu root %llu\n",
8261                         rec->start, rec->max_size, parent, tback->root);
8262         }
8263 fail:
8264         btrfs_release_path(path);
8265         return ret;
8266 }
8267
8268 static struct extent_entry *find_entry(struct list_head *entries,
8269                                        u64 bytenr, u64 bytes)
8270 {
8271         struct extent_entry *entry = NULL;
8272
8273         list_for_each_entry(entry, entries, list) {
8274                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8275                         return entry;
8276         }
8277
8278         return NULL;
8279 }
8280
8281 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8282 {
8283         struct extent_entry *entry, *best = NULL, *prev = NULL;
8284
8285         list_for_each_entry(entry, entries, list) {
8286                 /*
8287                  * If there are as many broken entries as entries then we know
8288                  * not to trust this particular entry.
8289                  */
8290                 if (entry->broken == entry->count)
8291                         continue;
8292
8293                 /*
8294                  * Special case, when there are only two entries and 'best' is
8295                  * the first one
8296                  */
8297                 if (!prev) {
8298                         best = entry;
8299                         prev = entry;
8300                         continue;
8301                 }
8302
8303                 /*
8304                  * If our current entry == best then we can't be sure our best
8305                  * is really the best, so we need to keep searching.
8306                  */
8307                 if (best && best->count == entry->count) {
8308                         prev = entry;
8309                         best = NULL;
8310                         continue;
8311                 }
8312
8313                 /* Prev == entry, not good enough, have to keep searching */
8314                 if (!prev->broken && prev->count == entry->count)
8315                         continue;
8316
8317                 if (!best)
8318                         best = (prev->count > entry->count) ? prev : entry;
8319                 else if (best->count < entry->count)
8320                         best = entry;
8321                 prev = entry;
8322         }
8323
8324         return best;
8325 }
8326
8327 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8328                       struct data_backref *dback, struct extent_entry *entry)
8329 {
8330         struct btrfs_trans_handle *trans;
8331         struct btrfs_root *root;
8332         struct btrfs_file_extent_item *fi;
8333         struct extent_buffer *leaf;
8334         struct btrfs_key key;
8335         u64 bytenr, bytes;
8336         int ret, err;
8337
8338         key.objectid = dback->root;
8339         key.type = BTRFS_ROOT_ITEM_KEY;
8340         key.offset = (u64)-1;
8341         root = btrfs_read_fs_root(info, &key);
8342         if (IS_ERR(root)) {
8343                 fprintf(stderr, "Couldn't find root for our ref\n");
8344                 return -EINVAL;
8345         }
8346
8347         /*
8348          * The backref points to the original offset of the extent if it was
8349          * split, so we need to search down to the offset we have and then walk
8350          * forward until we find the backref we're looking for.
8351          */
8352         key.objectid = dback->owner;
8353         key.type = BTRFS_EXTENT_DATA_KEY;
8354         key.offset = dback->offset;
8355         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8356         if (ret < 0) {
8357                 fprintf(stderr, "Error looking up ref %d\n", ret);
8358                 return ret;
8359         }
8360
8361         while (1) {
8362                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8363                         ret = btrfs_next_leaf(root, path);
8364                         if (ret) {
8365                                 fprintf(stderr, "Couldn't find our ref, next\n");
8366                                 return -EINVAL;
8367                         }
8368                 }
8369                 leaf = path->nodes[0];
8370                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8371                 if (key.objectid != dback->owner ||
8372                     key.type != BTRFS_EXTENT_DATA_KEY) {
8373                         fprintf(stderr, "Couldn't find our ref, search\n");
8374                         return -EINVAL;
8375                 }
8376                 fi = btrfs_item_ptr(leaf, path->slots[0],
8377                                     struct btrfs_file_extent_item);
8378                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8379                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8380
8381                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8382                         break;
8383                 path->slots[0]++;
8384         }
8385
8386         btrfs_release_path(path);
8387
8388         trans = btrfs_start_transaction(root, 1);
8389         if (IS_ERR(trans))
8390                 return PTR_ERR(trans);
8391
8392         /*
8393          * Ok we have the key of the file extent we want to fix, now we can cow
8394          * down to the thing and fix it.
8395          */
8396         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8397         if (ret < 0) {
8398                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8399                         key.objectid, key.type, key.offset, ret);
8400                 goto out;
8401         }
8402         if (ret > 0) {
8403                 fprintf(stderr, "Well that's odd, we just found this key "
8404                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8405                         key.offset);
8406                 ret = -EINVAL;
8407                 goto out;
8408         }
8409         leaf = path->nodes[0];
8410         fi = btrfs_item_ptr(leaf, path->slots[0],
8411                             struct btrfs_file_extent_item);
8412
8413         if (btrfs_file_extent_compression(leaf, fi) &&
8414             dback->disk_bytenr != entry->bytenr) {
8415                 fprintf(stderr, "Ref doesn't match the record start and is "
8416                         "compressed, please take a btrfs-image of this file "
8417                         "system and send it to a btrfs developer so they can "
8418                         "complete this functionality for bytenr %Lu\n",
8419                         dback->disk_bytenr);
8420                 ret = -EINVAL;
8421                 goto out;
8422         }
8423
8424         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8425                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8426         } else if (dback->disk_bytenr > entry->bytenr) {
8427                 u64 off_diff, offset;
8428
8429                 off_diff = dback->disk_bytenr - entry->bytenr;
8430                 offset = btrfs_file_extent_offset(leaf, fi);
8431                 if (dback->disk_bytenr + offset +
8432                     btrfs_file_extent_num_bytes(leaf, fi) >
8433                     entry->bytenr + entry->bytes) {
8434                         fprintf(stderr, "Ref is past the entry end, please "
8435                                 "take a btrfs-image of this file system and "
8436                                 "send it to a btrfs developer, ref %Lu\n",
8437                                 dback->disk_bytenr);
8438                         ret = -EINVAL;
8439                         goto out;
8440                 }
8441                 offset += off_diff;
8442                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8443                 btrfs_set_file_extent_offset(leaf, fi, offset);
8444         } else if (dback->disk_bytenr < entry->bytenr) {
8445                 u64 offset;
8446
8447                 offset = btrfs_file_extent_offset(leaf, fi);
8448                 if (dback->disk_bytenr + offset < entry->bytenr) {
8449                         fprintf(stderr, "Ref is before the entry start, please"
8450                                 " take a btrfs-image of this file system and "
8451                                 "send it to a btrfs developer, ref %Lu\n",
8452                                 dback->disk_bytenr);
8453                         ret = -EINVAL;
8454                         goto out;
8455                 }
8456
8457                 offset += dback->disk_bytenr;
8458                 offset -= entry->bytenr;
8459                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8460                 btrfs_set_file_extent_offset(leaf, fi, offset);
8461         }
8462
8463         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8464
8465         /*
8466          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8467          * only do this if we aren't using compression, otherwise it's a
8468          * trickier case.
8469          */
8470         if (!btrfs_file_extent_compression(leaf, fi))
8471                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8472         else
8473                 printf("ram bytes may be wrong?\n");
8474         btrfs_mark_buffer_dirty(leaf);
8475 out:
8476         err = btrfs_commit_transaction(trans, root);
8477         btrfs_release_path(path);
8478         return ret ? ret : err;
8479 }
8480
8481 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8482                            struct extent_record *rec)
8483 {
8484         struct extent_backref *back;
8485         struct data_backref *dback;
8486         struct extent_entry *entry, *best = NULL;
8487         LIST_HEAD(entries);
8488         int nr_entries = 0;
8489         int broken_entries = 0;
8490         int ret = 0;
8491         short mismatch = 0;
8492
8493         /*
8494          * Metadata is easy and the backrefs should always agree on bytenr and
8495          * size, if not we've got bigger issues.
8496          */
8497         if (rec->metadata)
8498                 return 0;
8499
8500         list_for_each_entry(back, &rec->backrefs, list) {
8501                 if (back->full_backref || !back->is_data)
8502                         continue;
8503
8504                 dback = to_data_backref(back);
8505
8506                 /*
8507                  * We only pay attention to backrefs that we found a real
8508                  * backref for.
8509                  */
8510                 if (dback->found_ref == 0)
8511                         continue;
8512
8513                 /*
8514                  * For now we only catch when the bytes don't match, not the
8515                  * bytenr.  We can easily do this at the same time, but I want
8516                  * to have a fs image to test on before we just add repair
8517                  * functionality willy-nilly so we know we won't screw up the
8518                  * repair.
8519                  */
8520
8521                 entry = find_entry(&entries, dback->disk_bytenr,
8522                                    dback->bytes);
8523                 if (!entry) {
8524                         entry = malloc(sizeof(struct extent_entry));
8525                         if (!entry) {
8526                                 ret = -ENOMEM;
8527                                 goto out;
8528                         }
8529                         memset(entry, 0, sizeof(*entry));
8530                         entry->bytenr = dback->disk_bytenr;
8531                         entry->bytes = dback->bytes;
8532                         list_add_tail(&entry->list, &entries);
8533                         nr_entries++;
8534                 }
8535
8536                 /*
8537                  * If we only have on entry we may think the entries agree when
8538                  * in reality they don't so we have to do some extra checking.
8539                  */
8540                 if (dback->disk_bytenr != rec->start ||
8541                     dback->bytes != rec->nr || back->broken)
8542                         mismatch = 1;
8543
8544                 if (back->broken) {
8545                         entry->broken++;
8546                         broken_entries++;
8547                 }
8548
8549                 entry->count++;
8550         }
8551
8552         /* Yay all the backrefs agree, carry on good sir */
8553         if (nr_entries <= 1 && !mismatch)
8554                 goto out;
8555
8556         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8557                 "%Lu\n", rec->start);
8558
8559         /*
8560          * First we want to see if the backrefs can agree amongst themselves who
8561          * is right, so figure out which one of the entries has the highest
8562          * count.
8563          */
8564         best = find_most_right_entry(&entries);
8565
8566         /*
8567          * Ok so we may have an even split between what the backrefs think, so
8568          * this is where we use the extent ref to see what it thinks.
8569          */
8570         if (!best) {
8571                 entry = find_entry(&entries, rec->start, rec->nr);
8572                 if (!entry && (!broken_entries || !rec->found_rec)) {
8573                         fprintf(stderr, "Backrefs don't agree with each other "
8574                                 "and extent record doesn't agree with anybody,"
8575                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8576                                 rec->start, rec->nr);
8577                         ret = -EINVAL;
8578                         goto out;
8579                 } else if (!entry) {
8580                         /*
8581                          * Ok our backrefs were broken, we'll assume this is the
8582                          * correct value and add an entry for this range.
8583                          */
8584                         entry = malloc(sizeof(struct extent_entry));
8585                         if (!entry) {
8586                                 ret = -ENOMEM;
8587                                 goto out;
8588                         }
8589                         memset(entry, 0, sizeof(*entry));
8590                         entry->bytenr = rec->start;
8591                         entry->bytes = rec->nr;
8592                         list_add_tail(&entry->list, &entries);
8593                         nr_entries++;
8594                 }
8595                 entry->count++;
8596                 best = find_most_right_entry(&entries);
8597                 if (!best) {
8598                         fprintf(stderr, "Backrefs and extent record evenly "
8599                                 "split on who is right, this is going to "
8600                                 "require user input to fix bytenr %Lu bytes "
8601                                 "%Lu\n", rec->start, rec->nr);
8602                         ret = -EINVAL;
8603                         goto out;
8604                 }
8605         }
8606
8607         /*
8608          * I don't think this can happen currently as we'll abort() if we catch
8609          * this case higher up, but in case somebody removes that we still can't
8610          * deal with it properly here yet, so just bail out of that's the case.
8611          */
8612         if (best->bytenr != rec->start) {
8613                 fprintf(stderr, "Extent start and backref starts don't match, "
8614                         "please use btrfs-image on this file system and send "
8615                         "it to a btrfs developer so they can make fsck fix "
8616                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8617                         rec->start, rec->nr);
8618                 ret = -EINVAL;
8619                 goto out;
8620         }
8621
8622         /*
8623          * Ok great we all agreed on an extent record, let's go find the real
8624          * references and fix up the ones that don't match.
8625          */
8626         list_for_each_entry(back, &rec->backrefs, list) {
8627                 if (back->full_backref || !back->is_data)
8628                         continue;
8629
8630                 dback = to_data_backref(back);
8631
8632                 /*
8633                  * Still ignoring backrefs that don't have a real ref attached
8634                  * to them.
8635                  */
8636                 if (dback->found_ref == 0)
8637                         continue;
8638
8639                 if (dback->bytes == best->bytes &&
8640                     dback->disk_bytenr == best->bytenr)
8641                         continue;
8642
8643                 ret = repair_ref(info, path, dback, best);
8644                 if (ret)
8645                         goto out;
8646         }
8647
8648         /*
8649          * Ok we messed with the actual refs, which means we need to drop our
8650          * entire cache and go back and rescan.  I know this is a huge pain and
8651          * adds a lot of extra work, but it's the only way to be safe.  Once all
8652          * the backrefs agree we may not need to do anything to the extent
8653          * record itself.
8654          */
8655         ret = -EAGAIN;
8656 out:
8657         while (!list_empty(&entries)) {
8658                 entry = list_entry(entries.next, struct extent_entry, list);
8659                 list_del_init(&entry->list);
8660                 free(entry);
8661         }
8662         return ret;
8663 }
8664
8665 static int process_duplicates(struct cache_tree *extent_cache,
8666                               struct extent_record *rec)
8667 {
8668         struct extent_record *good, *tmp;
8669         struct cache_extent *cache;
8670         int ret;
8671
8672         /*
8673          * If we found a extent record for this extent then return, or if we
8674          * have more than one duplicate we are likely going to need to delete
8675          * something.
8676          */
8677         if (rec->found_rec || rec->num_duplicates > 1)
8678                 return 0;
8679
8680         /* Shouldn't happen but just in case */
8681         BUG_ON(!rec->num_duplicates);
8682
8683         /*
8684          * So this happens if we end up with a backref that doesn't match the
8685          * actual extent entry.  So either the backref is bad or the extent
8686          * entry is bad.  Either way we want to have the extent_record actually
8687          * reflect what we found in the extent_tree, so we need to take the
8688          * duplicate out and use that as the extent_record since the only way we
8689          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8690          */
8691         remove_cache_extent(extent_cache, &rec->cache);
8692
8693         good = to_extent_record(rec->dups.next);
8694         list_del_init(&good->list);
8695         INIT_LIST_HEAD(&good->backrefs);
8696         INIT_LIST_HEAD(&good->dups);
8697         good->cache.start = good->start;
8698         good->cache.size = good->nr;
8699         good->content_checked = 0;
8700         good->owner_ref_checked = 0;
8701         good->num_duplicates = 0;
8702         good->refs = rec->refs;
8703         list_splice_init(&rec->backrefs, &good->backrefs);
8704         while (1) {
8705                 cache = lookup_cache_extent(extent_cache, good->start,
8706                                             good->nr);
8707                 if (!cache)
8708                         break;
8709                 tmp = container_of(cache, struct extent_record, cache);
8710
8711                 /*
8712                  * If we find another overlapping extent and it's found_rec is
8713                  * set then it's a duplicate and we need to try and delete
8714                  * something.
8715                  */
8716                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8717                         if (list_empty(&good->list))
8718                                 list_add_tail(&good->list,
8719                                               &duplicate_extents);
8720                         good->num_duplicates += tmp->num_duplicates + 1;
8721                         list_splice_init(&tmp->dups, &good->dups);
8722                         list_del_init(&tmp->list);
8723                         list_add_tail(&tmp->list, &good->dups);
8724                         remove_cache_extent(extent_cache, &tmp->cache);
8725                         continue;
8726                 }
8727
8728                 /*
8729                  * Ok we have another non extent item backed extent rec, so lets
8730                  * just add it to this extent and carry on like we did above.
8731                  */
8732                 good->refs += tmp->refs;
8733                 list_splice_init(&tmp->backrefs, &good->backrefs);
8734                 remove_cache_extent(extent_cache, &tmp->cache);
8735                 free(tmp);
8736         }
8737         ret = insert_cache_extent(extent_cache, &good->cache);
8738         BUG_ON(ret);
8739         free(rec);
8740         return good->num_duplicates ? 0 : 1;
8741 }
8742
8743 static int delete_duplicate_records(struct btrfs_root *root,
8744                                     struct extent_record *rec)
8745 {
8746         struct btrfs_trans_handle *trans;
8747         LIST_HEAD(delete_list);
8748         struct btrfs_path path;
8749         struct extent_record *tmp, *good, *n;
8750         int nr_del = 0;
8751         int ret = 0, err;
8752         struct btrfs_key key;
8753
8754         btrfs_init_path(&path);
8755
8756         good = rec;
8757         /* Find the record that covers all of the duplicates. */
8758         list_for_each_entry(tmp, &rec->dups, list) {
8759                 if (good->start < tmp->start)
8760                         continue;
8761                 if (good->nr > tmp->nr)
8762                         continue;
8763
8764                 if (tmp->start + tmp->nr < good->start + good->nr) {
8765                         fprintf(stderr, "Ok we have overlapping extents that "
8766                                 "aren't completely covered by each other, this "
8767                                 "is going to require more careful thought.  "
8768                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8769                                 tmp->start, tmp->nr, good->start, good->nr);
8770                         abort();
8771                 }
8772                 good = tmp;
8773         }
8774
8775         if (good != rec)
8776                 list_add_tail(&rec->list, &delete_list);
8777
8778         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8779                 if (tmp == good)
8780                         continue;
8781                 list_move_tail(&tmp->list, &delete_list);
8782         }
8783
8784         root = root->fs_info->extent_root;
8785         trans = btrfs_start_transaction(root, 1);
8786         if (IS_ERR(trans)) {
8787                 ret = PTR_ERR(trans);
8788                 goto out;
8789         }
8790
8791         list_for_each_entry(tmp, &delete_list, list) {
8792                 if (tmp->found_rec == 0)
8793                         continue;
8794                 key.objectid = tmp->start;
8795                 key.type = BTRFS_EXTENT_ITEM_KEY;
8796                 key.offset = tmp->nr;
8797
8798                 /* Shouldn't happen but just in case */
8799                 if (tmp->metadata) {
8800                         fprintf(stderr, "Well this shouldn't happen, extent "
8801                                 "record overlaps but is metadata? "
8802                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8803                         abort();
8804                 }
8805
8806                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8807                 if (ret) {
8808                         if (ret > 0)
8809                                 ret = -EINVAL;
8810                         break;
8811                 }
8812                 ret = btrfs_del_item(trans, root, &path);
8813                 if (ret)
8814                         break;
8815                 btrfs_release_path(&path);
8816                 nr_del++;
8817         }
8818         err = btrfs_commit_transaction(trans, root);
8819         if (err && !ret)
8820                 ret = err;
8821 out:
8822         while (!list_empty(&delete_list)) {
8823                 tmp = to_extent_record(delete_list.next);
8824                 list_del_init(&tmp->list);
8825                 if (tmp == rec)
8826                         continue;
8827                 free(tmp);
8828         }
8829
8830         while (!list_empty(&rec->dups)) {
8831                 tmp = to_extent_record(rec->dups.next);
8832                 list_del_init(&tmp->list);
8833                 free(tmp);
8834         }
8835
8836         btrfs_release_path(&path);
8837
8838         if (!ret && !nr_del)
8839                 rec->num_duplicates = 0;
8840
8841         return ret ? ret : nr_del;
8842 }
8843
8844 static int find_possible_backrefs(struct btrfs_fs_info *info,
8845                                   struct btrfs_path *path,
8846                                   struct cache_tree *extent_cache,
8847                                   struct extent_record *rec)
8848 {
8849         struct btrfs_root *root;
8850         struct extent_backref *back;
8851         struct data_backref *dback;
8852         struct cache_extent *cache;
8853         struct btrfs_file_extent_item *fi;
8854         struct btrfs_key key;
8855         u64 bytenr, bytes;
8856         int ret;
8857
8858         list_for_each_entry(back, &rec->backrefs, list) {
8859                 /* Don't care about full backrefs (poor unloved backrefs) */
8860                 if (back->full_backref || !back->is_data)
8861                         continue;
8862
8863                 dback = to_data_backref(back);
8864
8865                 /* We found this one, we don't need to do a lookup */
8866                 if (dback->found_ref)
8867                         continue;
8868
8869                 key.objectid = dback->root;
8870                 key.type = BTRFS_ROOT_ITEM_KEY;
8871                 key.offset = (u64)-1;
8872
8873                 root = btrfs_read_fs_root(info, &key);
8874
8875                 /* No root, definitely a bad ref, skip */
8876                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8877                         continue;
8878                 /* Other err, exit */
8879                 if (IS_ERR(root))
8880                         return PTR_ERR(root);
8881
8882                 key.objectid = dback->owner;
8883                 key.type = BTRFS_EXTENT_DATA_KEY;
8884                 key.offset = dback->offset;
8885                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8886                 if (ret) {
8887                         btrfs_release_path(path);
8888                         if (ret < 0)
8889                                 return ret;
8890                         /* Didn't find it, we can carry on */
8891                         ret = 0;
8892                         continue;
8893                 }
8894
8895                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8896                                     struct btrfs_file_extent_item);
8897                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8898                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8899                 btrfs_release_path(path);
8900                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8901                 if (cache) {
8902                         struct extent_record *tmp;
8903                         tmp = container_of(cache, struct extent_record, cache);
8904
8905                         /*
8906                          * If we found an extent record for the bytenr for this
8907                          * particular backref then we can't add it to our
8908                          * current extent record.  We only want to add backrefs
8909                          * that don't have a corresponding extent item in the
8910                          * extent tree since they likely belong to this record
8911                          * and we need to fix it if it doesn't match bytenrs.
8912                          */
8913                         if  (tmp->found_rec)
8914                                 continue;
8915                 }
8916
8917                 dback->found_ref += 1;
8918                 dback->disk_bytenr = bytenr;
8919                 dback->bytes = bytes;
8920
8921                 /*
8922                  * Set this so the verify backref code knows not to trust the
8923                  * values in this backref.
8924                  */
8925                 back->broken = 1;
8926         }
8927
8928         return 0;
8929 }
8930
8931 /*
8932  * Record orphan data ref into corresponding root.
8933  *
8934  * Return 0 if the extent item contains data ref and recorded.
8935  * Return 1 if the extent item contains no useful data ref
8936  *   On that case, it may contains only shared_dataref or metadata backref
8937  *   or the file extent exists(this should be handled by the extent bytenr
8938  *   recovery routine)
8939  * Return <0 if something goes wrong.
8940  */
8941 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8942                                       struct extent_record *rec)
8943 {
8944         struct btrfs_key key;
8945         struct btrfs_root *dest_root;
8946         struct extent_backref *back;
8947         struct data_backref *dback;
8948         struct orphan_data_extent *orphan;
8949         struct btrfs_path path;
8950         int recorded_data_ref = 0;
8951         int ret = 0;
8952
8953         if (rec->metadata)
8954                 return 1;
8955         btrfs_init_path(&path);
8956         list_for_each_entry(back, &rec->backrefs, list) {
8957                 if (back->full_backref || !back->is_data ||
8958                     !back->found_extent_tree)
8959                         continue;
8960                 dback = to_data_backref(back);
8961                 if (dback->found_ref)
8962                         continue;
8963                 key.objectid = dback->root;
8964                 key.type = BTRFS_ROOT_ITEM_KEY;
8965                 key.offset = (u64)-1;
8966
8967                 dest_root = btrfs_read_fs_root(fs_info, &key);
8968
8969                 /* For non-exist root we just skip it */
8970                 if (IS_ERR(dest_root) || !dest_root)
8971                         continue;
8972
8973                 key.objectid = dback->owner;
8974                 key.type = BTRFS_EXTENT_DATA_KEY;
8975                 key.offset = dback->offset;
8976
8977                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8978                 btrfs_release_path(&path);
8979                 /*
8980                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8981                  * we need to record it for inode/file extent rebuild.
8982                  * For ret > 0, we record it only for file extent rebuild.
8983                  * For ret == 0, the file extent exists but only bytenr
8984                  * mismatch, let the original bytenr fix routine to handle,
8985                  * don't record it.
8986                  */
8987                 if (ret == 0)
8988                         continue;
8989                 ret = 0;
8990                 orphan = malloc(sizeof(*orphan));
8991                 if (!orphan) {
8992                         ret = -ENOMEM;
8993                         goto out;
8994                 }
8995                 INIT_LIST_HEAD(&orphan->list);
8996                 orphan->root = dback->root;
8997                 orphan->objectid = dback->owner;
8998                 orphan->offset = dback->offset;
8999                 orphan->disk_bytenr = rec->cache.start;
9000                 orphan->disk_len = rec->cache.size;
9001                 list_add(&dest_root->orphan_data_extents, &orphan->list);
9002                 recorded_data_ref = 1;
9003         }
9004 out:
9005         btrfs_release_path(&path);
9006         if (!ret)
9007                 return !recorded_data_ref;
9008         else
9009                 return ret;
9010 }
9011
9012 /*
9013  * when an incorrect extent item is found, this will delete
9014  * all of the existing entries for it and recreate them
9015  * based on what the tree scan found.
9016  */
9017 static int fixup_extent_refs(struct btrfs_fs_info *info,
9018                              struct cache_tree *extent_cache,
9019                              struct extent_record *rec)
9020 {
9021         struct btrfs_trans_handle *trans = NULL;
9022         int ret;
9023         struct btrfs_path path;
9024         struct list_head *cur = rec->backrefs.next;
9025         struct cache_extent *cache;
9026         struct extent_backref *back;
9027         int allocated = 0;
9028         u64 flags = 0;
9029
9030         if (rec->flag_block_full_backref)
9031                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9032
9033         btrfs_init_path(&path);
9034         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9035                 /*
9036                  * Sometimes the backrefs themselves are so broken they don't
9037                  * get attached to any meaningful rec, so first go back and
9038                  * check any of our backrefs that we couldn't find and throw
9039                  * them into the list if we find the backref so that
9040                  * verify_backrefs can figure out what to do.
9041                  */
9042                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9043                 if (ret < 0)
9044                         goto out;
9045         }
9046
9047         /* step one, make sure all of the backrefs agree */
9048         ret = verify_backrefs(info, &path, rec);
9049         if (ret < 0)
9050                 goto out;
9051
9052         trans = btrfs_start_transaction(info->extent_root, 1);
9053         if (IS_ERR(trans)) {
9054                 ret = PTR_ERR(trans);
9055                 goto out;
9056         }
9057
9058         /* step two, delete all the existing records */
9059         ret = delete_extent_records(trans, info->extent_root, &path,
9060                                     rec->start);
9061
9062         if (ret < 0)
9063                 goto out;
9064
9065         /* was this block corrupt?  If so, don't add references to it */
9066         cache = lookup_cache_extent(info->corrupt_blocks,
9067                                     rec->start, rec->max_size);
9068         if (cache) {
9069                 ret = 0;
9070                 goto out;
9071         }
9072
9073         /* step three, recreate all the refs we did find */
9074         while(cur != &rec->backrefs) {
9075                 back = to_extent_backref(cur);
9076                 cur = cur->next;
9077
9078                 /*
9079                  * if we didn't find any references, don't create a
9080                  * new extent record
9081                  */
9082                 if (!back->found_ref)
9083                         continue;
9084
9085                 rec->bad_full_backref = 0;
9086                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9087                 allocated = 1;
9088
9089                 if (ret)
9090                         goto out;
9091         }
9092 out:
9093         if (trans) {
9094                 int err = btrfs_commit_transaction(trans, info->extent_root);
9095                 if (!ret)
9096                         ret = err;
9097         }
9098
9099         if (!ret)
9100                 fprintf(stderr, "Repaired extent references for %llu\n",
9101                                 (unsigned long long)rec->start);
9102
9103         btrfs_release_path(&path);
9104         return ret;
9105 }
9106
9107 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9108                               struct extent_record *rec)
9109 {
9110         struct btrfs_trans_handle *trans;
9111         struct btrfs_root *root = fs_info->extent_root;
9112         struct btrfs_path path;
9113         struct btrfs_extent_item *ei;
9114         struct btrfs_key key;
9115         u64 flags;
9116         int ret = 0;
9117
9118         key.objectid = rec->start;
9119         if (rec->metadata) {
9120                 key.type = BTRFS_METADATA_ITEM_KEY;
9121                 key.offset = rec->info_level;
9122         } else {
9123                 key.type = BTRFS_EXTENT_ITEM_KEY;
9124                 key.offset = rec->max_size;
9125         }
9126
9127         trans = btrfs_start_transaction(root, 0);
9128         if (IS_ERR(trans))
9129                 return PTR_ERR(trans);
9130
9131         btrfs_init_path(&path);
9132         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9133         if (ret < 0) {
9134                 btrfs_release_path(&path);
9135                 btrfs_commit_transaction(trans, root);
9136                 return ret;
9137         } else if (ret) {
9138                 fprintf(stderr, "Didn't find extent for %llu\n",
9139                         (unsigned long long)rec->start);
9140                 btrfs_release_path(&path);
9141                 btrfs_commit_transaction(trans, root);
9142                 return -ENOENT;
9143         }
9144
9145         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9146                             struct btrfs_extent_item);
9147         flags = btrfs_extent_flags(path.nodes[0], ei);
9148         if (rec->flag_block_full_backref) {
9149                 fprintf(stderr, "setting full backref on %llu\n",
9150                         (unsigned long long)key.objectid);
9151                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9152         } else {
9153                 fprintf(stderr, "clearing full backref on %llu\n",
9154                         (unsigned long long)key.objectid);
9155                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9156         }
9157         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9158         btrfs_mark_buffer_dirty(path.nodes[0]);
9159         btrfs_release_path(&path);
9160         ret = btrfs_commit_transaction(trans, root);
9161         if (!ret)
9162                 fprintf(stderr, "Repaired extent flags for %llu\n",
9163                                 (unsigned long long)rec->start);
9164
9165         return ret;
9166 }
9167
9168 /* right now we only prune from the extent allocation tree */
9169 static int prune_one_block(struct btrfs_trans_handle *trans,
9170                            struct btrfs_fs_info *info,
9171                            struct btrfs_corrupt_block *corrupt)
9172 {
9173         int ret;
9174         struct btrfs_path path;
9175         struct extent_buffer *eb;
9176         u64 found;
9177         int slot;
9178         int nritems;
9179         int level = corrupt->level + 1;
9180
9181         btrfs_init_path(&path);
9182 again:
9183         /* we want to stop at the parent to our busted block */
9184         path.lowest_level = level;
9185
9186         ret = btrfs_search_slot(trans, info->extent_root,
9187                                 &corrupt->key, &path, -1, 1);
9188
9189         if (ret < 0)
9190                 goto out;
9191
9192         eb = path.nodes[level];
9193         if (!eb) {
9194                 ret = -ENOENT;
9195                 goto out;
9196         }
9197
9198         /*
9199          * hopefully the search gave us the block we want to prune,
9200          * lets try that first
9201          */
9202         slot = path.slots[level];
9203         found =  btrfs_node_blockptr(eb, slot);
9204         if (found == corrupt->cache.start)
9205                 goto del_ptr;
9206
9207         nritems = btrfs_header_nritems(eb);
9208
9209         /* the search failed, lets scan this node and hope we find it */
9210         for (slot = 0; slot < nritems; slot++) {
9211                 found =  btrfs_node_blockptr(eb, slot);
9212                 if (found == corrupt->cache.start)
9213                         goto del_ptr;
9214         }
9215         /*
9216          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9217          * to this block
9218          */
9219         if (eb == info->extent_root->node) {
9220                 ret = -ENOENT;
9221                 goto out;
9222         } else {
9223                 level++;
9224                 btrfs_release_path(&path);
9225                 goto again;
9226         }
9227
9228 del_ptr:
9229         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9230         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9231
9232 out:
9233         btrfs_release_path(&path);
9234         return ret;
9235 }
9236
9237 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9238 {
9239         struct btrfs_trans_handle *trans = NULL;
9240         struct cache_extent *cache;
9241         struct btrfs_corrupt_block *corrupt;
9242
9243         while (1) {
9244                 cache = search_cache_extent(info->corrupt_blocks, 0);
9245                 if (!cache)
9246                         break;
9247                 if (!trans) {
9248                         trans = btrfs_start_transaction(info->extent_root, 1);
9249                         if (IS_ERR(trans))
9250                                 return PTR_ERR(trans);
9251                 }
9252                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9253                 prune_one_block(trans, info, corrupt);
9254                 remove_cache_extent(info->corrupt_blocks, cache);
9255         }
9256         if (trans)
9257                 return btrfs_commit_transaction(trans, info->extent_root);
9258         return 0;
9259 }
9260
9261 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9262 {
9263         struct btrfs_block_group_cache *cache;
9264         u64 start, end;
9265         int ret;
9266
9267         while (1) {
9268                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9269                                             &start, &end, EXTENT_DIRTY);
9270                 if (ret)
9271                         break;
9272                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9273         }
9274
9275         start = 0;
9276         while (1) {
9277                 cache = btrfs_lookup_first_block_group(fs_info, start);
9278                 if (!cache)
9279                         break;
9280                 if (cache->cached)
9281                         cache->cached = 0;
9282                 start = cache->key.objectid + cache->key.offset;
9283         }
9284 }
9285
9286 static int check_extent_refs(struct btrfs_root *root,
9287                              struct cache_tree *extent_cache)
9288 {
9289         struct extent_record *rec;
9290         struct cache_extent *cache;
9291         int ret = 0;
9292         int had_dups = 0;
9293
9294         if (repair) {
9295                 /*
9296                  * if we're doing a repair, we have to make sure
9297                  * we don't allocate from the problem extents.
9298                  * In the worst case, this will be all the
9299                  * extents in the FS
9300                  */
9301                 cache = search_cache_extent(extent_cache, 0);
9302                 while(cache) {
9303                         rec = container_of(cache, struct extent_record, cache);
9304                         set_extent_dirty(root->fs_info->excluded_extents,
9305                                          rec->start,
9306                                          rec->start + rec->max_size - 1);
9307                         cache = next_cache_extent(cache);
9308                 }
9309
9310                 /* pin down all the corrupted blocks too */
9311                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9312                 while(cache) {
9313                         set_extent_dirty(root->fs_info->excluded_extents,
9314                                          cache->start,
9315                                          cache->start + cache->size - 1);
9316                         cache = next_cache_extent(cache);
9317                 }
9318                 prune_corrupt_blocks(root->fs_info);
9319                 reset_cached_block_groups(root->fs_info);
9320         }
9321
9322         reset_cached_block_groups(root->fs_info);
9323
9324         /*
9325          * We need to delete any duplicate entries we find first otherwise we
9326          * could mess up the extent tree when we have backrefs that actually
9327          * belong to a different extent item and not the weird duplicate one.
9328          */
9329         while (repair && !list_empty(&duplicate_extents)) {
9330                 rec = to_extent_record(duplicate_extents.next);
9331                 list_del_init(&rec->list);
9332
9333                 /* Sometimes we can find a backref before we find an actual
9334                  * extent, so we need to process it a little bit to see if there
9335                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9336                  * if this is a backref screwup.  If we need to delete stuff
9337                  * process_duplicates() will return 0, otherwise it will return
9338                  * 1 and we
9339                  */
9340                 if (process_duplicates(extent_cache, rec))
9341                         continue;
9342                 ret = delete_duplicate_records(root, rec);
9343                 if (ret < 0)
9344                         return ret;
9345                 /*
9346                  * delete_duplicate_records will return the number of entries
9347                  * deleted, so if it's greater than 0 then we know we actually
9348                  * did something and we need to remove.
9349                  */
9350                 if (ret)
9351                         had_dups = 1;
9352         }
9353
9354         if (had_dups)
9355                 return -EAGAIN;
9356
9357         while(1) {
9358                 int cur_err = 0;
9359                 int fix = 0;
9360
9361                 cache = search_cache_extent(extent_cache, 0);
9362                 if (!cache)
9363                         break;
9364                 rec = container_of(cache, struct extent_record, cache);
9365                 if (rec->num_duplicates) {
9366                         fprintf(stderr, "extent item %llu has multiple extent "
9367                                 "items\n", (unsigned long long)rec->start);
9368                         cur_err = 1;
9369                 }
9370
9371                 if (rec->refs != rec->extent_item_refs) {
9372                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9373                                 (unsigned long long)rec->start,
9374                                 (unsigned long long)rec->nr);
9375                         fprintf(stderr, "extent item %llu, found %llu\n",
9376                                 (unsigned long long)rec->extent_item_refs,
9377                                 (unsigned long long)rec->refs);
9378                         ret = record_orphan_data_extents(root->fs_info, rec);
9379                         if (ret < 0)
9380                                 goto repair_abort;
9381                         fix = ret;
9382                         cur_err = 1;
9383                 }
9384                 if (all_backpointers_checked(rec, 1)) {
9385                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9386                                 (unsigned long long)rec->start,
9387                                 (unsigned long long)rec->nr);
9388                         fix = 1;
9389                         cur_err = 1;
9390                 }
9391                 if (!rec->owner_ref_checked) {
9392                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9393                                 (unsigned long long)rec->start,
9394                                 (unsigned long long)rec->nr);
9395                         fix = 1;
9396                         cur_err = 1;
9397                 }
9398
9399                 if (repair && fix) {
9400                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9401                         if (ret)
9402                                 goto repair_abort;
9403                 }
9404
9405
9406                 if (rec->bad_full_backref) {
9407                         fprintf(stderr, "bad full backref, on [%llu]\n",
9408                                 (unsigned long long)rec->start);
9409                         if (repair) {
9410                                 ret = fixup_extent_flags(root->fs_info, rec);
9411                                 if (ret)
9412                                         goto repair_abort;
9413                                 fix = 1;
9414                         }
9415                         cur_err = 1;
9416                 }
9417                 /*
9418                  * Although it's not a extent ref's problem, we reuse this
9419                  * routine for error reporting.
9420                  * No repair function yet.
9421                  */
9422                 if (rec->crossing_stripes) {
9423                         fprintf(stderr,
9424                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9425                                 rec->start, rec->start + rec->max_size);
9426                         cur_err = 1;
9427                 }
9428
9429                 if (rec->wrong_chunk_type) {
9430                         fprintf(stderr,
9431                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9432                                 rec->start, rec->start + rec->max_size);
9433                         cur_err = 1;
9434                 }
9435
9436                 remove_cache_extent(extent_cache, cache);
9437                 free_all_extent_backrefs(rec);
9438                 if (!init_extent_tree && repair && (!cur_err || fix))
9439                         clear_extent_dirty(root->fs_info->excluded_extents,
9440                                            rec->start,
9441                                            rec->start + rec->max_size - 1);
9442                 free(rec);
9443         }
9444 repair_abort:
9445         if (repair) {
9446                 if (ret && ret != -EAGAIN) {
9447                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9448                         exit(1);
9449                 } else if (!ret) {
9450                         struct btrfs_trans_handle *trans;
9451
9452                         root = root->fs_info->extent_root;
9453                         trans = btrfs_start_transaction(root, 1);
9454                         if (IS_ERR(trans)) {
9455                                 ret = PTR_ERR(trans);
9456                                 goto repair_abort;
9457                         }
9458
9459                         btrfs_fix_block_accounting(trans, root);
9460                         ret = btrfs_commit_transaction(trans, root);
9461                         if (ret)
9462                                 goto repair_abort;
9463                 }
9464                 return ret;
9465         }
9466         return 0;
9467 }
9468
9469 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9470 {
9471         u64 stripe_size;
9472
9473         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9474                 stripe_size = length;
9475                 stripe_size /= num_stripes;
9476         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9477                 stripe_size = length * 2;
9478                 stripe_size /= num_stripes;
9479         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9480                 stripe_size = length;
9481                 stripe_size /= (num_stripes - 1);
9482         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9483                 stripe_size = length;
9484                 stripe_size /= (num_stripes - 2);
9485         } else {
9486                 stripe_size = length;
9487         }
9488         return stripe_size;
9489 }
9490
9491 /*
9492  * Check the chunk with its block group/dev list ref:
9493  * Return 0 if all refs seems valid.
9494  * Return 1 if part of refs seems valid, need later check for rebuild ref
9495  * like missing block group and needs to search extent tree to rebuild them.
9496  * Return -1 if essential refs are missing and unable to rebuild.
9497  */
9498 static int check_chunk_refs(struct chunk_record *chunk_rec,
9499                             struct block_group_tree *block_group_cache,
9500                             struct device_extent_tree *dev_extent_cache,
9501                             int silent)
9502 {
9503         struct cache_extent *block_group_item;
9504         struct block_group_record *block_group_rec;
9505         struct cache_extent *dev_extent_item;
9506         struct device_extent_record *dev_extent_rec;
9507         u64 devid;
9508         u64 offset;
9509         u64 length;
9510         int metadump_v2 = 0;
9511         int i;
9512         int ret = 0;
9513
9514         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9515                                                chunk_rec->offset,
9516                                                chunk_rec->length);
9517         if (block_group_item) {
9518                 block_group_rec = container_of(block_group_item,
9519                                                struct block_group_record,
9520                                                cache);
9521                 if (chunk_rec->length != block_group_rec->offset ||
9522                     chunk_rec->offset != block_group_rec->objectid ||
9523                     (!metadump_v2 &&
9524                      chunk_rec->type_flags != block_group_rec->flags)) {
9525                         if (!silent)
9526                                 fprintf(stderr,
9527                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9528                                         chunk_rec->objectid,
9529                                         chunk_rec->type,
9530                                         chunk_rec->offset,
9531                                         chunk_rec->length,
9532                                         chunk_rec->offset,
9533                                         chunk_rec->type_flags,
9534                                         block_group_rec->objectid,
9535                                         block_group_rec->type,
9536                                         block_group_rec->offset,
9537                                         block_group_rec->offset,
9538                                         block_group_rec->objectid,
9539                                         block_group_rec->flags);
9540                         ret = -1;
9541                 } else {
9542                         list_del_init(&block_group_rec->list);
9543                         chunk_rec->bg_rec = block_group_rec;
9544                 }
9545         } else {
9546                 if (!silent)
9547                         fprintf(stderr,
9548                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9549                                 chunk_rec->objectid,
9550                                 chunk_rec->type,
9551                                 chunk_rec->offset,
9552                                 chunk_rec->length,
9553                                 chunk_rec->offset,
9554                                 chunk_rec->type_flags);
9555                 ret = 1;
9556         }
9557
9558         if (metadump_v2)
9559                 return ret;
9560
9561         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9562                                     chunk_rec->num_stripes);
9563         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9564                 devid = chunk_rec->stripes[i].devid;
9565                 offset = chunk_rec->stripes[i].offset;
9566                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9567                                                        devid, offset, length);
9568                 if (dev_extent_item) {
9569                         dev_extent_rec = container_of(dev_extent_item,
9570                                                 struct device_extent_record,
9571                                                 cache);
9572                         if (dev_extent_rec->objectid != devid ||
9573                             dev_extent_rec->offset != offset ||
9574                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9575                             dev_extent_rec->length != length) {
9576                                 if (!silent)
9577                                         fprintf(stderr,
9578                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9579                                                 chunk_rec->objectid,
9580                                                 chunk_rec->type,
9581                                                 chunk_rec->offset,
9582                                                 chunk_rec->stripes[i].devid,
9583                                                 chunk_rec->stripes[i].offset,
9584                                                 dev_extent_rec->objectid,
9585                                                 dev_extent_rec->offset,
9586                                                 dev_extent_rec->length);
9587                                 ret = -1;
9588                         } else {
9589                                 list_move(&dev_extent_rec->chunk_list,
9590                                           &chunk_rec->dextents);
9591                         }
9592                 } else {
9593                         if (!silent)
9594                                 fprintf(stderr,
9595                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9596                                         chunk_rec->objectid,
9597                                         chunk_rec->type,
9598                                         chunk_rec->offset,
9599                                         chunk_rec->stripes[i].devid,
9600                                         chunk_rec->stripes[i].offset);
9601                         ret = -1;
9602                 }
9603         }
9604         return ret;
9605 }
9606
9607 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9608 int check_chunks(struct cache_tree *chunk_cache,
9609                  struct block_group_tree *block_group_cache,
9610                  struct device_extent_tree *dev_extent_cache,
9611                  struct list_head *good, struct list_head *bad,
9612                  struct list_head *rebuild, int silent)
9613 {
9614         struct cache_extent *chunk_item;
9615         struct chunk_record *chunk_rec;
9616         struct block_group_record *bg_rec;
9617         struct device_extent_record *dext_rec;
9618         int err;
9619         int ret = 0;
9620
9621         chunk_item = first_cache_extent(chunk_cache);
9622         while (chunk_item) {
9623                 chunk_rec = container_of(chunk_item, struct chunk_record,
9624                                          cache);
9625                 err = check_chunk_refs(chunk_rec, block_group_cache,
9626                                        dev_extent_cache, silent);
9627                 if (err < 0)
9628                         ret = err;
9629                 if (err == 0 && good)
9630                         list_add_tail(&chunk_rec->list, good);
9631                 if (err > 0 && rebuild)
9632                         list_add_tail(&chunk_rec->list, rebuild);
9633                 if (err < 0 && bad)
9634                         list_add_tail(&chunk_rec->list, bad);
9635                 chunk_item = next_cache_extent(chunk_item);
9636         }
9637
9638         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9639                 if (!silent)
9640                         fprintf(stderr,
9641                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9642                                 bg_rec->objectid,
9643                                 bg_rec->offset,
9644                                 bg_rec->flags);
9645                 if (!ret)
9646                         ret = 1;
9647         }
9648
9649         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9650                             chunk_list) {
9651                 if (!silent)
9652                         fprintf(stderr,
9653                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9654                                 dext_rec->objectid,
9655                                 dext_rec->offset,
9656                                 dext_rec->length);
9657                 if (!ret)
9658                         ret = 1;
9659         }
9660         return ret;
9661 }
9662
9663
9664 static int check_device_used(struct device_record *dev_rec,
9665                              struct device_extent_tree *dext_cache)
9666 {
9667         struct cache_extent *cache;
9668         struct device_extent_record *dev_extent_rec;
9669         u64 total_byte = 0;
9670
9671         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9672         while (cache) {
9673                 dev_extent_rec = container_of(cache,
9674                                               struct device_extent_record,
9675                                               cache);
9676                 if (dev_extent_rec->objectid != dev_rec->devid)
9677                         break;
9678
9679                 list_del_init(&dev_extent_rec->device_list);
9680                 total_byte += dev_extent_rec->length;
9681                 cache = next_cache_extent(cache);
9682         }
9683
9684         if (total_byte != dev_rec->byte_used) {
9685                 fprintf(stderr,
9686                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9687                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9688                         dev_rec->type, dev_rec->offset);
9689                 return -1;
9690         } else {
9691                 return 0;
9692         }
9693 }
9694
9695 /* check btrfs_dev_item -> btrfs_dev_extent */
9696 static int check_devices(struct rb_root *dev_cache,
9697                          struct device_extent_tree *dev_extent_cache)
9698 {
9699         struct rb_node *dev_node;
9700         struct device_record *dev_rec;
9701         struct device_extent_record *dext_rec;
9702         int err;
9703         int ret = 0;
9704
9705         dev_node = rb_first(dev_cache);
9706         while (dev_node) {
9707                 dev_rec = container_of(dev_node, struct device_record, node);
9708                 err = check_device_used(dev_rec, dev_extent_cache);
9709                 if (err)
9710                         ret = err;
9711
9712                 dev_node = rb_next(dev_node);
9713         }
9714         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9715                             device_list) {
9716                 fprintf(stderr,
9717                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9718                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9719                 if (!ret)
9720                         ret = 1;
9721         }
9722         return ret;
9723 }
9724
9725 static int add_root_item_to_list(struct list_head *head,
9726                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9727                                   u8 level, u8 drop_level,
9728                                   int level_size, struct btrfs_key *drop_key)
9729 {
9730
9731         struct root_item_record *ri_rec;
9732         ri_rec = malloc(sizeof(*ri_rec));
9733         if (!ri_rec)
9734                 return -ENOMEM;
9735         ri_rec->bytenr = bytenr;
9736         ri_rec->objectid = objectid;
9737         ri_rec->level = level;
9738         ri_rec->level_size = level_size;
9739         ri_rec->drop_level = drop_level;
9740         ri_rec->last_snapshot = last_snapshot;
9741         if (drop_key)
9742                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9743         list_add_tail(&ri_rec->list, head);
9744
9745         return 0;
9746 }
9747
9748 static void free_root_item_list(struct list_head *list)
9749 {
9750         struct root_item_record *ri_rec;
9751
9752         while (!list_empty(list)) {
9753                 ri_rec = list_first_entry(list, struct root_item_record,
9754                                           list);
9755                 list_del_init(&ri_rec->list);
9756                 free(ri_rec);
9757         }
9758 }
9759
9760 static int deal_root_from_list(struct list_head *list,
9761                                struct btrfs_root *root,
9762                                struct block_info *bits,
9763                                int bits_nr,
9764                                struct cache_tree *pending,
9765                                struct cache_tree *seen,
9766                                struct cache_tree *reada,
9767                                struct cache_tree *nodes,
9768                                struct cache_tree *extent_cache,
9769                                struct cache_tree *chunk_cache,
9770                                struct rb_root *dev_cache,
9771                                struct block_group_tree *block_group_cache,
9772                                struct device_extent_tree *dev_extent_cache)
9773 {
9774         int ret = 0;
9775         u64 last;
9776
9777         while (!list_empty(list)) {
9778                 struct root_item_record *rec;
9779                 struct extent_buffer *buf;
9780                 rec = list_entry(list->next,
9781                                  struct root_item_record, list);
9782                 last = 0;
9783                 buf = read_tree_block(root->fs_info,
9784                                       rec->bytenr, rec->level_size, 0);
9785                 if (!extent_buffer_uptodate(buf)) {
9786                         free_extent_buffer(buf);
9787                         ret = -EIO;
9788                         break;
9789                 }
9790                 ret = add_root_to_pending(buf, extent_cache, pending,
9791                                     seen, nodes, rec->objectid);
9792                 if (ret < 0)
9793                         break;
9794                 /*
9795                  * To rebuild extent tree, we need deal with snapshot
9796                  * one by one, otherwise we deal with node firstly which
9797                  * can maximize readahead.
9798                  */
9799                 while (1) {
9800                         ret = run_next_block(root, bits, bits_nr, &last,
9801                                              pending, seen, reada, nodes,
9802                                              extent_cache, chunk_cache,
9803                                              dev_cache, block_group_cache,
9804                                              dev_extent_cache, rec);
9805                         if (ret != 0)
9806                                 break;
9807                 }
9808                 free_extent_buffer(buf);
9809                 list_del(&rec->list);
9810                 free(rec);
9811                 if (ret < 0)
9812                         break;
9813         }
9814         while (ret >= 0) {
9815                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9816                                      reada, nodes, extent_cache, chunk_cache,
9817                                      dev_cache, block_group_cache,
9818                                      dev_extent_cache, NULL);
9819                 if (ret != 0) {
9820                         if (ret > 0)
9821                                 ret = 0;
9822                         break;
9823                 }
9824         }
9825         return ret;
9826 }
9827
9828 static int check_chunks_and_extents(struct btrfs_root *root)
9829 {
9830         struct rb_root dev_cache;
9831         struct cache_tree chunk_cache;
9832         struct block_group_tree block_group_cache;
9833         struct device_extent_tree dev_extent_cache;
9834         struct cache_tree extent_cache;
9835         struct cache_tree seen;
9836         struct cache_tree pending;
9837         struct cache_tree reada;
9838         struct cache_tree nodes;
9839         struct extent_io_tree excluded_extents;
9840         struct cache_tree corrupt_blocks;
9841         struct btrfs_path path;
9842         struct btrfs_key key;
9843         struct btrfs_key found_key;
9844         int ret, err = 0;
9845         struct block_info *bits;
9846         int bits_nr;
9847         struct extent_buffer *leaf;
9848         int slot;
9849         struct btrfs_root_item ri;
9850         struct list_head dropping_trees;
9851         struct list_head normal_trees;
9852         struct btrfs_root *root1;
9853         u64 objectid;
9854         u32 level_size;
9855         u8 level;
9856
9857         dev_cache = RB_ROOT;
9858         cache_tree_init(&chunk_cache);
9859         block_group_tree_init(&block_group_cache);
9860         device_extent_tree_init(&dev_extent_cache);
9861
9862         cache_tree_init(&extent_cache);
9863         cache_tree_init(&seen);
9864         cache_tree_init(&pending);
9865         cache_tree_init(&nodes);
9866         cache_tree_init(&reada);
9867         cache_tree_init(&corrupt_blocks);
9868         extent_io_tree_init(&excluded_extents);
9869         INIT_LIST_HEAD(&dropping_trees);
9870         INIT_LIST_HEAD(&normal_trees);
9871
9872         if (repair) {
9873                 root->fs_info->excluded_extents = &excluded_extents;
9874                 root->fs_info->fsck_extent_cache = &extent_cache;
9875                 root->fs_info->free_extent_hook = free_extent_hook;
9876                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9877         }
9878
9879         bits_nr = 1024;
9880         bits = malloc(bits_nr * sizeof(struct block_info));
9881         if (!bits) {
9882                 perror("malloc");
9883                 exit(1);
9884         }
9885
9886         if (ctx.progress_enabled) {
9887                 ctx.tp = TASK_EXTENTS;
9888                 task_start(ctx.info);
9889         }
9890
9891 again:
9892         root1 = root->fs_info->tree_root;
9893         level = btrfs_header_level(root1->node);
9894         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9895                                     root1->node->start, 0, level, 0,
9896                                     root1->fs_info->nodesize, NULL);
9897         if (ret < 0)
9898                 goto out;
9899         root1 = root->fs_info->chunk_root;
9900         level = btrfs_header_level(root1->node);
9901         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9902                                     root1->node->start, 0, level, 0,
9903                                     root1->fs_info->nodesize, NULL);
9904         if (ret < 0)
9905                 goto out;
9906         btrfs_init_path(&path);
9907         key.offset = 0;
9908         key.objectid = 0;
9909         key.type = BTRFS_ROOT_ITEM_KEY;
9910         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9911                                         &key, &path, 0, 0);
9912         if (ret < 0)
9913                 goto out;
9914         while(1) {
9915                 leaf = path.nodes[0];
9916                 slot = path.slots[0];
9917                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9918                         ret = btrfs_next_leaf(root, &path);
9919                         if (ret != 0)
9920                                 break;
9921                         leaf = path.nodes[0];
9922                         slot = path.slots[0];
9923                 }
9924                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9925                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9926                         unsigned long offset;
9927                         u64 last_snapshot;
9928
9929                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9930                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9931                         last_snapshot = btrfs_root_last_snapshot(&ri);
9932                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9933                                 level = btrfs_root_level(&ri);
9934                                 level_size = root->fs_info->nodesize;
9935                                 ret = add_root_item_to_list(&normal_trees,
9936                                                 found_key.objectid,
9937                                                 btrfs_root_bytenr(&ri),
9938                                                 last_snapshot, level,
9939                                                 0, level_size, NULL);
9940                                 if (ret < 0)
9941                                         goto out;
9942                         } else {
9943                                 level = btrfs_root_level(&ri);
9944                                 level_size = root->fs_info->nodesize;
9945                                 objectid = found_key.objectid;
9946                                 btrfs_disk_key_to_cpu(&found_key,
9947                                                       &ri.drop_progress);
9948                                 ret = add_root_item_to_list(&dropping_trees,
9949                                                 objectid,
9950                                                 btrfs_root_bytenr(&ri),
9951                                                 last_snapshot, level,
9952                                                 ri.drop_level,
9953                                                 level_size, &found_key);
9954                                 if (ret < 0)
9955                                         goto out;
9956                         }
9957                 }
9958                 path.slots[0]++;
9959         }
9960         btrfs_release_path(&path);
9961
9962         /*
9963          * check_block can return -EAGAIN if it fixes something, please keep
9964          * this in mind when dealing with return values from these functions, if
9965          * we get -EAGAIN we want to fall through and restart the loop.
9966          */
9967         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9968                                   &seen, &reada, &nodes, &extent_cache,
9969                                   &chunk_cache, &dev_cache, &block_group_cache,
9970                                   &dev_extent_cache);
9971         if (ret < 0) {
9972                 if (ret == -EAGAIN)
9973                         goto loop;
9974                 goto out;
9975         }
9976         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9977                                   &pending, &seen, &reada, &nodes,
9978                                   &extent_cache, &chunk_cache, &dev_cache,
9979                                   &block_group_cache, &dev_extent_cache);
9980         if (ret < 0) {
9981                 if (ret == -EAGAIN)
9982                         goto loop;
9983                 goto out;
9984         }
9985
9986         ret = check_chunks(&chunk_cache, &block_group_cache,
9987                            &dev_extent_cache, NULL, NULL, NULL, 0);
9988         if (ret) {
9989                 if (ret == -EAGAIN)
9990                         goto loop;
9991                 err = ret;
9992         }
9993
9994         ret = check_extent_refs(root, &extent_cache);
9995         if (ret < 0) {
9996                 if (ret == -EAGAIN)
9997                         goto loop;
9998                 goto out;
9999         }
10000
10001         ret = check_devices(&dev_cache, &dev_extent_cache);
10002         if (ret && err)
10003                 ret = err;
10004
10005 out:
10006         task_stop(ctx.info);
10007         if (repair) {
10008                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10009                 extent_io_tree_cleanup(&excluded_extents);
10010                 root->fs_info->fsck_extent_cache = NULL;
10011                 root->fs_info->free_extent_hook = NULL;
10012                 root->fs_info->corrupt_blocks = NULL;
10013                 root->fs_info->excluded_extents = NULL;
10014         }
10015         free(bits);
10016         free_chunk_cache_tree(&chunk_cache);
10017         free_device_cache_tree(&dev_cache);
10018         free_block_group_tree(&block_group_cache);
10019         free_device_extent_tree(&dev_extent_cache);
10020         free_extent_cache_tree(&seen);
10021         free_extent_cache_tree(&pending);
10022         free_extent_cache_tree(&reada);
10023         free_extent_cache_tree(&nodes);
10024         free_root_item_list(&normal_trees);
10025         free_root_item_list(&dropping_trees);
10026         return ret;
10027 loop:
10028         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10029         free_extent_cache_tree(&seen);
10030         free_extent_cache_tree(&pending);
10031         free_extent_cache_tree(&reada);
10032         free_extent_cache_tree(&nodes);
10033         free_chunk_cache_tree(&chunk_cache);
10034         free_block_group_tree(&block_group_cache);
10035         free_device_cache_tree(&dev_cache);
10036         free_device_extent_tree(&dev_extent_cache);
10037         free_extent_record_cache(&extent_cache);
10038         free_root_item_list(&normal_trees);
10039         free_root_item_list(&dropping_trees);
10040         extent_io_tree_cleanup(&excluded_extents);
10041         goto again;
10042 }
10043
10044 /*
10045  * Check backrefs of a tree block given by @bytenr or @eb.
10046  *
10047  * @root:       the root containing the @bytenr or @eb
10048  * @eb:         tree block extent buffer, can be NULL
10049  * @bytenr:     bytenr of the tree block to search
10050  * @level:      tree level of the tree block
10051  * @owner:      owner of the tree block
10052  *
10053  * Return >0 for any error found and output error message
10054  * Return 0 for no error found
10055  */
10056 static int check_tree_block_ref(struct btrfs_root *root,
10057                                 struct extent_buffer *eb, u64 bytenr,
10058                                 int level, u64 owner)
10059 {
10060         struct btrfs_key key;
10061         struct btrfs_root *extent_root = root->fs_info->extent_root;
10062         struct btrfs_path path;
10063         struct btrfs_extent_item *ei;
10064         struct btrfs_extent_inline_ref *iref;
10065         struct extent_buffer *leaf;
10066         unsigned long end;
10067         unsigned long ptr;
10068         int slot;
10069         int skinny_level;
10070         int type;
10071         u32 nodesize = root->fs_info->nodesize;
10072         u32 item_size;
10073         u64 offset;
10074         int tree_reloc_root = 0;
10075         int found_ref = 0;
10076         int err = 0;
10077         int ret;
10078
10079         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10080             btrfs_header_bytenr(root->node) == bytenr)
10081                 tree_reloc_root = 1;
10082
10083         btrfs_init_path(&path);
10084         key.objectid = bytenr;
10085         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10086                 key.type = BTRFS_METADATA_ITEM_KEY;
10087         else
10088                 key.type = BTRFS_EXTENT_ITEM_KEY;
10089         key.offset = (u64)-1;
10090
10091         /* Search for the backref in extent tree */
10092         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10093         if (ret < 0) {
10094                 err |= BACKREF_MISSING;
10095                 goto out;
10096         }
10097         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10098         if (ret) {
10099                 err |= BACKREF_MISSING;
10100                 goto out;
10101         }
10102
10103         leaf = path.nodes[0];
10104         slot = path.slots[0];
10105         btrfs_item_key_to_cpu(leaf, &key, slot);
10106
10107         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10108
10109         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10110                 skinny_level = (int)key.offset;
10111                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10112         } else {
10113                 struct btrfs_tree_block_info *info;
10114
10115                 info = (struct btrfs_tree_block_info *)(ei + 1);
10116                 skinny_level = btrfs_tree_block_level(leaf, info);
10117                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10118         }
10119
10120         if (eb) {
10121                 u64 header_gen;
10122                 u64 extent_gen;
10123
10124                 if (!(btrfs_extent_flags(leaf, ei) &
10125                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10126                         error(
10127                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10128                                 key.objectid, nodesize,
10129                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10130                         err = BACKREF_MISMATCH;
10131                 }
10132                 header_gen = btrfs_header_generation(eb);
10133                 extent_gen = btrfs_extent_generation(leaf, ei);
10134                 if (header_gen != extent_gen) {
10135                         error(
10136         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10137                                 key.objectid, nodesize, header_gen,
10138                                 extent_gen);
10139                         err = BACKREF_MISMATCH;
10140                 }
10141                 if (level != skinny_level) {
10142                         error(
10143                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10144                                 key.objectid, nodesize, level, skinny_level);
10145                         err = BACKREF_MISMATCH;
10146                 }
10147                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10148                         error(
10149                         "extent[%llu %u] is referred by other roots than %llu",
10150                                 key.objectid, nodesize, root->objectid);
10151                         err = BACKREF_MISMATCH;
10152                 }
10153         }
10154
10155         /*
10156          * Iterate the extent/metadata item to find the exact backref
10157          */
10158         item_size = btrfs_item_size_nr(leaf, slot);
10159         ptr = (unsigned long)iref;
10160         end = (unsigned long)ei + item_size;
10161         while (ptr < end) {
10162                 iref = (struct btrfs_extent_inline_ref *)ptr;
10163                 type = btrfs_extent_inline_ref_type(leaf, iref);
10164                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10165
10166                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10167                         (offset == root->objectid || offset == owner)) {
10168                         found_ref = 1;
10169                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10170                         /*
10171                          * Backref of tree reloc root points to itself, no need
10172                          * to check backref any more.
10173                          */
10174                         if (tree_reloc_root)
10175                                 found_ref = 1;
10176                         else
10177                         /* Check if the backref points to valid referencer */
10178                                 found_ref = !check_tree_block_ref(root, NULL,
10179                                                 offset, level + 1, owner);
10180                 }
10181
10182                 if (found_ref)
10183                         break;
10184                 ptr += btrfs_extent_inline_ref_size(type);
10185         }
10186
10187         /*
10188          * Inlined extent item doesn't have what we need, check
10189          * TREE_BLOCK_REF_KEY
10190          */
10191         if (!found_ref) {
10192                 btrfs_release_path(&path);
10193                 key.objectid = bytenr;
10194                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10195                 key.offset = root->objectid;
10196
10197                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10198                 if (!ret)
10199                         found_ref = 1;
10200         }
10201         if (!found_ref)
10202                 err |= BACKREF_MISSING;
10203 out:
10204         btrfs_release_path(&path);
10205         if (eb && (err & BACKREF_MISSING))
10206                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10207                         bytenr, nodesize, owner, level);
10208         return err;
10209 }
10210
10211 /*
10212  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10213  *
10214  * Return >0 any error found and output error message
10215  * Return 0 for no error found
10216  */
10217 static int check_extent_data_item(struct btrfs_root *root,
10218                                   struct extent_buffer *eb, int slot)
10219 {
10220         struct btrfs_file_extent_item *fi;
10221         struct btrfs_path path;
10222         struct btrfs_root *extent_root = root->fs_info->extent_root;
10223         struct btrfs_key fi_key;
10224         struct btrfs_key dbref_key;
10225         struct extent_buffer *leaf;
10226         struct btrfs_extent_item *ei;
10227         struct btrfs_extent_inline_ref *iref;
10228         struct btrfs_extent_data_ref *dref;
10229         u64 owner;
10230         u64 disk_bytenr;
10231         u64 disk_num_bytes;
10232         u64 extent_num_bytes;
10233         u64 extent_flags;
10234         u32 item_size;
10235         unsigned long end;
10236         unsigned long ptr;
10237         int type;
10238         u64 ref_root;
10239         int found_dbackref = 0;
10240         int err = 0;
10241         int ret;
10242
10243         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10244         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10245
10246         /* Nothing to check for hole and inline data extents */
10247         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10248             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10249                 return 0;
10250
10251         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10252         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10253         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10254
10255         /* Check unaligned disk_num_bytes and num_bytes */
10256         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10257                 error(
10258 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10259                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10260                         root->fs_info->sectorsize);
10261                 err |= BYTES_UNALIGNED;
10262         } else {
10263                 data_bytes_allocated += disk_num_bytes;
10264         }
10265         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10266                 error(
10267 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10268                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10269                         root->fs_info->sectorsize);
10270                 err |= BYTES_UNALIGNED;
10271         } else {
10272                 data_bytes_referenced += extent_num_bytes;
10273         }
10274         owner = btrfs_header_owner(eb);
10275
10276         /* Check the extent item of the file extent in extent tree */
10277         btrfs_init_path(&path);
10278         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10279         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10280         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10281
10282         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10283         if (ret)
10284                 goto out;
10285
10286         leaf = path.nodes[0];
10287         slot = path.slots[0];
10288         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10289
10290         extent_flags = btrfs_extent_flags(leaf, ei);
10291
10292         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10293                 error(
10294                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10295                     disk_bytenr, disk_num_bytes,
10296                     BTRFS_EXTENT_FLAG_DATA);
10297                 err |= BACKREF_MISMATCH;
10298         }
10299
10300         /* Check data backref inside that extent item */
10301         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10302         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10303         ptr = (unsigned long)iref;
10304         end = (unsigned long)ei + item_size;
10305         while (ptr < end) {
10306                 iref = (struct btrfs_extent_inline_ref *)ptr;
10307                 type = btrfs_extent_inline_ref_type(leaf, iref);
10308                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10309
10310                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10311                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10312                         if (ref_root == owner || ref_root == root->objectid)
10313                                 found_dbackref = 1;
10314                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10315                         found_dbackref = !check_tree_block_ref(root, NULL,
10316                                 btrfs_extent_inline_ref_offset(leaf, iref),
10317                                 0, owner);
10318                 }
10319
10320                 if (found_dbackref)
10321                         break;
10322                 ptr += btrfs_extent_inline_ref_size(type);
10323         }
10324
10325         if (!found_dbackref) {
10326                 btrfs_release_path(&path);
10327
10328                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10329                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10330                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10331                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10332                                 fi_key.objectid, fi_key.offset);
10333
10334                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10335                                         &dbref_key, &path, 0, 0);
10336                 if (!ret) {
10337                         found_dbackref = 1;
10338                         goto out;
10339                 }
10340
10341                 btrfs_release_path(&path);
10342
10343                 /*
10344                  * Neither inlined nor EXTENT_DATA_REF found, try
10345                  * SHARED_DATA_REF as last chance.
10346                  */
10347                 dbref_key.objectid = disk_bytenr;
10348                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10349                 dbref_key.offset = eb->start;
10350
10351                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10352                                         &dbref_key, &path, 0, 0);
10353                 if (!ret) {
10354                         found_dbackref = 1;
10355                         goto out;
10356                 }
10357         }
10358
10359 out:
10360         if (!found_dbackref)
10361                 err |= BACKREF_MISSING;
10362         btrfs_release_path(&path);
10363         if (err & BACKREF_MISSING) {
10364                 error("data extent[%llu %llu] backref lost",
10365                       disk_bytenr, disk_num_bytes);
10366         }
10367         return err;
10368 }
10369
10370 /*
10371  * Get real tree block level for the case like shared block
10372  * Return >= 0 as tree level
10373  * Return <0 for error
10374  */
10375 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10376 {
10377         struct extent_buffer *eb;
10378         struct btrfs_path path;
10379         struct btrfs_key key;
10380         struct btrfs_extent_item *ei;
10381         u64 flags;
10382         u64 transid;
10383         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10384         u8 backref_level;
10385         u8 header_level;
10386         int ret;
10387
10388         /* Search extent tree for extent generation and level */
10389         key.objectid = bytenr;
10390         key.type = BTRFS_METADATA_ITEM_KEY;
10391         key.offset = (u64)-1;
10392
10393         btrfs_init_path(&path);
10394         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10395         if (ret < 0)
10396                 goto release_out;
10397         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10398         if (ret < 0)
10399                 goto release_out;
10400         if (ret > 0) {
10401                 ret = -ENOENT;
10402                 goto release_out;
10403         }
10404
10405         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10406         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10407                             struct btrfs_extent_item);
10408         flags = btrfs_extent_flags(path.nodes[0], ei);
10409         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10410                 ret = -ENOENT;
10411                 goto release_out;
10412         }
10413
10414         /* Get transid for later read_tree_block() check */
10415         transid = btrfs_extent_generation(path.nodes[0], ei);
10416
10417         /* Get backref level as one source */
10418         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10419                 backref_level = key.offset;
10420         } else {
10421                 struct btrfs_tree_block_info *info;
10422
10423                 info = (struct btrfs_tree_block_info *)(ei + 1);
10424                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10425         }
10426         btrfs_release_path(&path);
10427
10428         /* Get level from tree block as an alternative source */
10429         eb = read_tree_block(fs_info, bytenr, nodesize, transid);
10430         if (!extent_buffer_uptodate(eb)) {
10431                 free_extent_buffer(eb);
10432                 return -EIO;
10433         }
10434         header_level = btrfs_header_level(eb);
10435         free_extent_buffer(eb);
10436
10437         if (header_level != backref_level)
10438                 return -EIO;
10439         return header_level;
10440
10441 release_out:
10442         btrfs_release_path(&path);
10443         return ret;
10444 }
10445
10446 /*
10447  * Check if a tree block backref is valid (points to a valid tree block)
10448  * if level == -1, level will be resolved
10449  * Return >0 for any error found and print error message
10450  */
10451 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10452                                     u64 bytenr, int level)
10453 {
10454         struct btrfs_root *root;
10455         struct btrfs_key key;
10456         struct btrfs_path path;
10457         struct extent_buffer *eb;
10458         struct extent_buffer *node;
10459         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10460         int err = 0;
10461         int ret;
10462
10463         /* Query level for level == -1 special case */
10464         if (level == -1)
10465                 level = query_tree_block_level(fs_info, bytenr);
10466         if (level < 0) {
10467                 err |= REFERENCER_MISSING;
10468                 goto out;
10469         }
10470
10471         key.objectid = root_id;
10472         key.type = BTRFS_ROOT_ITEM_KEY;
10473         key.offset = (u64)-1;
10474
10475         root = btrfs_read_fs_root(fs_info, &key);
10476         if (IS_ERR(root)) {
10477                 err |= REFERENCER_MISSING;
10478                 goto out;
10479         }
10480
10481         /* Read out the tree block to get item/node key */
10482         eb = read_tree_block(fs_info, bytenr, root->fs_info->nodesize, 0);
10483         if (!extent_buffer_uptodate(eb)) {
10484                 err |= REFERENCER_MISSING;
10485                 free_extent_buffer(eb);
10486                 goto out;
10487         }
10488
10489         /* Empty tree, no need to check key */
10490         if (!btrfs_header_nritems(eb) && !level) {
10491                 free_extent_buffer(eb);
10492                 goto out;
10493         }
10494
10495         if (level)
10496                 btrfs_node_key_to_cpu(eb, &key, 0);
10497         else
10498                 btrfs_item_key_to_cpu(eb, &key, 0);
10499
10500         free_extent_buffer(eb);
10501
10502         btrfs_init_path(&path);
10503         path.lowest_level = level;
10504         /* Search with the first key, to ensure we can reach it */
10505         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10506         if (ret < 0) {
10507                 err |= REFERENCER_MISSING;
10508                 goto release_out;
10509         }
10510
10511         node = path.nodes[level];
10512         if (btrfs_header_bytenr(node) != bytenr) {
10513                 error(
10514         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10515                         bytenr, nodesize, bytenr,
10516                         btrfs_header_bytenr(node));
10517                 err |= REFERENCER_MISMATCH;
10518         }
10519         if (btrfs_header_level(node) != level) {
10520                 error(
10521         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10522                         bytenr, nodesize, level,
10523                         btrfs_header_level(node));
10524                 err |= REFERENCER_MISMATCH;
10525         }
10526
10527 release_out:
10528         btrfs_release_path(&path);
10529 out:
10530         if (err & REFERENCER_MISSING) {
10531                 if (level < 0)
10532                         error("extent [%llu %d] lost referencer (owner: %llu)",
10533                                 bytenr, nodesize, root_id);
10534                 else
10535                         error(
10536                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10537                                 bytenr, nodesize, root_id, level);
10538         }
10539
10540         return err;
10541 }
10542
10543 /*
10544  * Check if tree block @eb is tree reloc root.
10545  * Return 0 if it's not or any problem happens
10546  * Return 1 if it's a tree reloc root
10547  */
10548 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10549                                  struct extent_buffer *eb)
10550 {
10551         struct btrfs_root *tree_reloc_root;
10552         struct btrfs_key key;
10553         u64 bytenr = btrfs_header_bytenr(eb);
10554         u64 owner = btrfs_header_owner(eb);
10555         int ret = 0;
10556
10557         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10558         key.offset = owner;
10559         key.type = BTRFS_ROOT_ITEM_KEY;
10560
10561         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10562         if (IS_ERR(tree_reloc_root))
10563                 return 0;
10564
10565         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10566                 ret = 1;
10567         btrfs_free_fs_root(tree_reloc_root);
10568         return ret;
10569 }
10570
10571 /*
10572  * Check referencer for shared block backref
10573  * If level == -1, this function will resolve the level.
10574  */
10575 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10576                                      u64 parent, u64 bytenr, int level)
10577 {
10578         struct extent_buffer *eb;
10579         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10580         u32 nr;
10581         int found_parent = 0;
10582         int i;
10583
10584         eb = read_tree_block(fs_info, parent, nodesize, 0);
10585         if (!extent_buffer_uptodate(eb))
10586                 goto out;
10587
10588         if (level == -1)
10589                 level = query_tree_block_level(fs_info, bytenr);
10590         if (level < 0)
10591                 goto out;
10592
10593         /* It's possible it's a tree reloc root */
10594         if (parent == bytenr) {
10595                 if (is_tree_reloc_root(fs_info, eb))
10596                         found_parent = 1;
10597                 goto out;
10598         }
10599
10600         if (level + 1 != btrfs_header_level(eb))
10601                 goto out;
10602
10603         nr = btrfs_header_nritems(eb);
10604         for (i = 0; i < nr; i++) {
10605                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10606                         found_parent = 1;
10607                         break;
10608                 }
10609         }
10610 out:
10611         free_extent_buffer(eb);
10612         if (!found_parent) {
10613                 error(
10614         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10615                         bytenr, nodesize, parent, level);
10616                 return REFERENCER_MISSING;
10617         }
10618         return 0;
10619 }
10620
10621 /*
10622  * Check referencer for normal (inlined) data ref
10623  * If len == 0, it will be resolved by searching in extent tree
10624  */
10625 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10626                                      u64 root_id, u64 objectid, u64 offset,
10627                                      u64 bytenr, u64 len, u32 count)
10628 {
10629         struct btrfs_root *root;
10630         struct btrfs_root *extent_root = fs_info->extent_root;
10631         struct btrfs_key key;
10632         struct btrfs_path path;
10633         struct extent_buffer *leaf;
10634         struct btrfs_file_extent_item *fi;
10635         u32 found_count = 0;
10636         int slot;
10637         int ret = 0;
10638
10639         if (!len) {
10640                 key.objectid = bytenr;
10641                 key.type = BTRFS_EXTENT_ITEM_KEY;
10642                 key.offset = (u64)-1;
10643
10644                 btrfs_init_path(&path);
10645                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10646                 if (ret < 0)
10647                         goto out;
10648                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10649                 if (ret)
10650                         goto out;
10651                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10652                 if (key.objectid != bytenr ||
10653                     key.type != BTRFS_EXTENT_ITEM_KEY)
10654                         goto out;
10655                 len = key.offset;
10656                 btrfs_release_path(&path);
10657         }
10658         key.objectid = root_id;
10659         key.type = BTRFS_ROOT_ITEM_KEY;
10660         key.offset = (u64)-1;
10661         btrfs_init_path(&path);
10662
10663         root = btrfs_read_fs_root(fs_info, &key);
10664         if (IS_ERR(root))
10665                 goto out;
10666
10667         key.objectid = objectid;
10668         key.type = BTRFS_EXTENT_DATA_KEY;
10669         /*
10670          * It can be nasty as data backref offset is
10671          * file offset - file extent offset, which is smaller or
10672          * equal to original backref offset.  The only special case is
10673          * overflow.  So we need to special check and do further search.
10674          */
10675         key.offset = offset & (1ULL << 63) ? 0 : offset;
10676
10677         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10678         if (ret < 0)
10679                 goto out;
10680
10681         /*
10682          * Search afterwards to get correct one
10683          * NOTE: As we must do a comprehensive check on the data backref to
10684          * make sure the dref count also matches, we must iterate all file
10685          * extents for that inode.
10686          */
10687         while (1) {
10688                 leaf = path.nodes[0];
10689                 slot = path.slots[0];
10690
10691                 if (slot >= btrfs_header_nritems(leaf))
10692                         goto next;
10693                 btrfs_item_key_to_cpu(leaf, &key, slot);
10694                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10695                         break;
10696                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10697                 /*
10698                  * Except normal disk bytenr and disk num bytes, we still
10699                  * need to do extra check on dbackref offset as
10700                  * dbackref offset = file_offset - file_extent_offset
10701                  */
10702                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10703                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10704                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10705                     offset)
10706                         found_count++;
10707
10708 next:
10709                 ret = btrfs_next_item(root, &path);
10710                 if (ret)
10711                         break;
10712         }
10713 out:
10714         btrfs_release_path(&path);
10715         if (found_count != count) {
10716                 error(
10717 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10718                         bytenr, len, root_id, objectid, offset, count, found_count);
10719                 return REFERENCER_MISSING;
10720         }
10721         return 0;
10722 }
10723
10724 /*
10725  * Check if the referencer of a shared data backref exists
10726  */
10727 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10728                                      u64 parent, u64 bytenr)
10729 {
10730         struct extent_buffer *eb;
10731         struct btrfs_key key;
10732         struct btrfs_file_extent_item *fi;
10733         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10734         u32 nr;
10735         int found_parent = 0;
10736         int i;
10737
10738         eb = read_tree_block(fs_info, parent, nodesize, 0);
10739         if (!extent_buffer_uptodate(eb))
10740                 goto out;
10741
10742         nr = btrfs_header_nritems(eb);
10743         for (i = 0; i < nr; i++) {
10744                 btrfs_item_key_to_cpu(eb, &key, i);
10745                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10746                         continue;
10747
10748                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10749                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10750                         continue;
10751
10752                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10753                         found_parent = 1;
10754                         break;
10755                 }
10756         }
10757
10758 out:
10759         free_extent_buffer(eb);
10760         if (!found_parent) {
10761                 error("shared extent %llu referencer lost (parent: %llu)",
10762                         bytenr, parent);
10763                 return REFERENCER_MISSING;
10764         }
10765         return 0;
10766 }
10767
10768 /*
10769  * This function will check a given extent item, including its backref and
10770  * itself (like crossing stripe boundary and type)
10771  *
10772  * Since we don't use extent_record anymore, introduce new error bit
10773  */
10774 static int check_extent_item(struct btrfs_fs_info *fs_info,
10775                              struct extent_buffer *eb, int slot)
10776 {
10777         struct btrfs_extent_item *ei;
10778         struct btrfs_extent_inline_ref *iref;
10779         struct btrfs_extent_data_ref *dref;
10780         unsigned long end;
10781         unsigned long ptr;
10782         int type;
10783         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10784         u32 item_size = btrfs_item_size_nr(eb, slot);
10785         u64 flags;
10786         u64 offset;
10787         int metadata = 0;
10788         int level;
10789         struct btrfs_key key;
10790         int ret;
10791         int err = 0;
10792
10793         btrfs_item_key_to_cpu(eb, &key, slot);
10794         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10795                 bytes_used += key.offset;
10796         else
10797                 bytes_used += nodesize;
10798
10799         if (item_size < sizeof(*ei)) {
10800                 /*
10801                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10802                  * old thing when on disk format is still un-determined.
10803                  * No need to care about it anymore
10804                  */
10805                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10806                 return -ENOTTY;
10807         }
10808
10809         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10810         flags = btrfs_extent_flags(eb, ei);
10811
10812         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10813                 metadata = 1;
10814         if (metadata && check_crossing_stripes(global_info, key.objectid,
10815                                                eb->len)) {
10816                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10817                       key.objectid, key.objectid + nodesize);
10818                 err |= CROSSING_STRIPE_BOUNDARY;
10819         }
10820
10821         ptr = (unsigned long)(ei + 1);
10822
10823         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10824                 /* Old EXTENT_ITEM metadata */
10825                 struct btrfs_tree_block_info *info;
10826
10827                 info = (struct btrfs_tree_block_info *)ptr;
10828                 level = btrfs_tree_block_level(eb, info);
10829                 ptr += sizeof(struct btrfs_tree_block_info);
10830         } else {
10831                 /* New METADATA_ITEM */
10832                 level = key.offset;
10833         }
10834         end = (unsigned long)ei + item_size;
10835
10836 next:
10837         /* Reached extent item end normally */
10838         if (ptr == end)
10839                 goto out;
10840
10841         /* Beyond extent item end, wrong item size */
10842         if (ptr > end) {
10843                 err |= ITEM_SIZE_MISMATCH;
10844                 error("extent item at bytenr %llu slot %d has wrong size",
10845                         eb->start, slot);
10846                 goto out;
10847         }
10848
10849         /* Now check every backref in this extent item */
10850         iref = (struct btrfs_extent_inline_ref *)ptr;
10851         type = btrfs_extent_inline_ref_type(eb, iref);
10852         offset = btrfs_extent_inline_ref_offset(eb, iref);
10853         switch (type) {
10854         case BTRFS_TREE_BLOCK_REF_KEY:
10855                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10856                                                level);
10857                 err |= ret;
10858                 break;
10859         case BTRFS_SHARED_BLOCK_REF_KEY:
10860                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10861                                                  level);
10862                 err |= ret;
10863                 break;
10864         case BTRFS_EXTENT_DATA_REF_KEY:
10865                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10866                 ret = check_extent_data_backref(fs_info,
10867                                 btrfs_extent_data_ref_root(eb, dref),
10868                                 btrfs_extent_data_ref_objectid(eb, dref),
10869                                 btrfs_extent_data_ref_offset(eb, dref),
10870                                 key.objectid, key.offset,
10871                                 btrfs_extent_data_ref_count(eb, dref));
10872                 err |= ret;
10873                 break;
10874         case BTRFS_SHARED_DATA_REF_KEY:
10875                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10876                 err |= ret;
10877                 break;
10878         default:
10879                 error("extent[%llu %d %llu] has unknown ref type: %d",
10880                         key.objectid, key.type, key.offset, type);
10881                 err |= UNKNOWN_TYPE;
10882                 goto out;
10883         }
10884
10885         ptr += btrfs_extent_inline_ref_size(type);
10886         goto next;
10887
10888 out:
10889         return err;
10890 }
10891
10892 /*
10893  * Check if a dev extent item is referred correctly by its chunk
10894  */
10895 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10896                                  struct extent_buffer *eb, int slot)
10897 {
10898         struct btrfs_root *chunk_root = fs_info->chunk_root;
10899         struct btrfs_dev_extent *ptr;
10900         struct btrfs_path path;
10901         struct btrfs_key chunk_key;
10902         struct btrfs_key devext_key;
10903         struct btrfs_chunk *chunk;
10904         struct extent_buffer *l;
10905         int num_stripes;
10906         u64 length;
10907         int i;
10908         int found_chunk = 0;
10909         int ret;
10910
10911         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10912         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10913         length = btrfs_dev_extent_length(eb, ptr);
10914
10915         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10916         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10917         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10918
10919         btrfs_init_path(&path);
10920         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10921         if (ret)
10922                 goto out;
10923
10924         l = path.nodes[0];
10925         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10926         ret = btrfs_check_chunk_valid(chunk_root, l, chunk, path.slots[0],
10927                                       chunk_key.offset);
10928         if (ret < 0)
10929                 goto out;
10930
10931         if (btrfs_stripe_length(fs_info, l, chunk) != length)
10932                 goto out;
10933
10934         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10935         for (i = 0; i < num_stripes; i++) {
10936                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10937                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10938
10939                 if (devid == devext_key.objectid &&
10940                     offset == devext_key.offset) {
10941                         found_chunk = 1;
10942                         break;
10943                 }
10944         }
10945 out:
10946         btrfs_release_path(&path);
10947         if (!found_chunk) {
10948                 error(
10949                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10950                         devext_key.objectid, devext_key.offset, length);
10951                 return REFERENCER_MISSING;
10952         }
10953         return 0;
10954 }
10955
10956 /*
10957  * Check if the used space is correct with the dev item
10958  */
10959 static int check_dev_item(struct btrfs_fs_info *fs_info,
10960                           struct extent_buffer *eb, int slot)
10961 {
10962         struct btrfs_root *dev_root = fs_info->dev_root;
10963         struct btrfs_dev_item *dev_item;
10964         struct btrfs_path path;
10965         struct btrfs_key key;
10966         struct btrfs_dev_extent *ptr;
10967         u64 dev_id;
10968         u64 used;
10969         u64 total = 0;
10970         int ret;
10971
10972         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10973         dev_id = btrfs_device_id(eb, dev_item);
10974         used = btrfs_device_bytes_used(eb, dev_item);
10975
10976         key.objectid = dev_id;
10977         key.type = BTRFS_DEV_EXTENT_KEY;
10978         key.offset = 0;
10979
10980         btrfs_init_path(&path);
10981         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10982         if (ret < 0) {
10983                 btrfs_item_key_to_cpu(eb, &key, slot);
10984                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10985                         key.objectid, key.type, key.offset);
10986                 btrfs_release_path(&path);
10987                 return REFERENCER_MISSING;
10988         }
10989
10990         /* Iterate dev_extents to calculate the used space of a device */
10991         while (1) {
10992                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10993                         goto next;
10994
10995                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10996                 if (key.objectid > dev_id)
10997                         break;
10998                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10999                         goto next;
11000
11001                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11002                                      struct btrfs_dev_extent);
11003                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11004 next:
11005                 ret = btrfs_next_item(dev_root, &path);
11006                 if (ret)
11007                         break;
11008         }
11009         btrfs_release_path(&path);
11010
11011         if (used != total) {
11012                 btrfs_item_key_to_cpu(eb, &key, slot);
11013                 error(
11014 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11015                         total, used, BTRFS_ROOT_TREE_OBJECTID,
11016                         BTRFS_DEV_EXTENT_KEY, dev_id);
11017                 return ACCOUNTING_MISMATCH;
11018         }
11019         return 0;
11020 }
11021
11022 /*
11023  * Check a block group item with its referener (chunk) and its used space
11024  * with extent/metadata item
11025  */
11026 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11027                                   struct extent_buffer *eb, int slot)
11028 {
11029         struct btrfs_root *extent_root = fs_info->extent_root;
11030         struct btrfs_root *chunk_root = fs_info->chunk_root;
11031         struct btrfs_block_group_item *bi;
11032         struct btrfs_block_group_item bg_item;
11033         struct btrfs_path path;
11034         struct btrfs_key bg_key;
11035         struct btrfs_key chunk_key;
11036         struct btrfs_key extent_key;
11037         struct btrfs_chunk *chunk;
11038         struct extent_buffer *leaf;
11039         struct btrfs_extent_item *ei;
11040         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11041         u64 flags;
11042         u64 bg_flags;
11043         u64 used;
11044         u64 total = 0;
11045         int ret;
11046         int err = 0;
11047
11048         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11049         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11050         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11051         used = btrfs_block_group_used(&bg_item);
11052         bg_flags = btrfs_block_group_flags(&bg_item);
11053
11054         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11055         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11056         chunk_key.offset = bg_key.objectid;
11057
11058         btrfs_init_path(&path);
11059         /* Search for the referencer chunk */
11060         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11061         if (ret) {
11062                 error(
11063                 "block group[%llu %llu] did not find the related chunk item",
11064                         bg_key.objectid, bg_key.offset);
11065                 err |= REFERENCER_MISSING;
11066         } else {
11067                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11068                                         struct btrfs_chunk);
11069                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11070                                                 bg_key.offset) {
11071                         error(
11072         "block group[%llu %llu] related chunk item length does not match",
11073                                 bg_key.objectid, bg_key.offset);
11074                         err |= REFERENCER_MISMATCH;
11075                 }
11076         }
11077         btrfs_release_path(&path);
11078
11079         /* Search from the block group bytenr */
11080         extent_key.objectid = bg_key.objectid;
11081         extent_key.type = 0;
11082         extent_key.offset = 0;
11083
11084         btrfs_init_path(&path);
11085         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11086         if (ret < 0)
11087                 goto out;
11088
11089         /* Iterate extent tree to account used space */
11090         while (1) {
11091                 leaf = path.nodes[0];
11092
11093                 /* Search slot can point to the last item beyond leaf nritems */
11094                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11095                         goto next;
11096
11097                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11098                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11099                         break;
11100
11101                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11102                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11103                         goto next;
11104                 if (extent_key.objectid < bg_key.objectid)
11105                         goto next;
11106
11107                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11108                         total += nodesize;
11109                 else
11110                         total += extent_key.offset;
11111
11112                 ei = btrfs_item_ptr(leaf, path.slots[0],
11113                                     struct btrfs_extent_item);
11114                 flags = btrfs_extent_flags(leaf, ei);
11115                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11116                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11117                                 error(
11118                         "bad extent[%llu, %llu) type mismatch with chunk",
11119                                         extent_key.objectid,
11120                                         extent_key.objectid + extent_key.offset);
11121                                 err |= CHUNK_TYPE_MISMATCH;
11122                         }
11123                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11124                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11125                                     BTRFS_BLOCK_GROUP_METADATA))) {
11126                                 error(
11127                         "bad extent[%llu, %llu) type mismatch with chunk",
11128                                         extent_key.objectid,
11129                                         extent_key.objectid + nodesize);
11130                                 err |= CHUNK_TYPE_MISMATCH;
11131                         }
11132                 }
11133 next:
11134                 ret = btrfs_next_item(extent_root, &path);
11135                 if (ret)
11136                         break;
11137         }
11138
11139 out:
11140         btrfs_release_path(&path);
11141
11142         if (total != used) {
11143                 error(
11144                 "block group[%llu %llu] used %llu but extent items used %llu",
11145                         bg_key.objectid, bg_key.offset, used, total);
11146                 err |= ACCOUNTING_MISMATCH;
11147         }
11148         return err;
11149 }
11150
11151 /*
11152  * Check a chunk item.
11153  * Including checking all referred dev_extents and block group
11154  */
11155 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11156                             struct extent_buffer *eb, int slot)
11157 {
11158         struct btrfs_root *extent_root = fs_info->extent_root;
11159         struct btrfs_root *dev_root = fs_info->dev_root;
11160         struct btrfs_path path;
11161         struct btrfs_key chunk_key;
11162         struct btrfs_key bg_key;
11163         struct btrfs_key devext_key;
11164         struct btrfs_chunk *chunk;
11165         struct extent_buffer *leaf;
11166         struct btrfs_block_group_item *bi;
11167         struct btrfs_block_group_item bg_item;
11168         struct btrfs_dev_extent *ptr;
11169         u64 length;
11170         u64 chunk_end;
11171         u64 stripe_len;
11172         u64 type;
11173         int num_stripes;
11174         u64 offset;
11175         u64 objectid;
11176         int i;
11177         int ret;
11178         int err = 0;
11179
11180         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11181         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11182         length = btrfs_chunk_length(eb, chunk);
11183         chunk_end = chunk_key.offset + length;
11184         ret = btrfs_check_chunk_valid(extent_root, eb, chunk, slot,
11185                                       chunk_key.offset);
11186         if (ret < 0) {
11187                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11188                         chunk_end);
11189                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11190                 goto out;
11191         }
11192         type = btrfs_chunk_type(eb, chunk);
11193
11194         bg_key.objectid = chunk_key.offset;
11195         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11196         bg_key.offset = length;
11197
11198         btrfs_init_path(&path);
11199         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11200         if (ret) {
11201                 error(
11202                 "chunk[%llu %llu) did not find the related block group item",
11203                         chunk_key.offset, chunk_end);
11204                 err |= REFERENCER_MISSING;
11205         } else{
11206                 leaf = path.nodes[0];
11207                 bi = btrfs_item_ptr(leaf, path.slots[0],
11208                                     struct btrfs_block_group_item);
11209                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11210                                    sizeof(bg_item));
11211                 if (btrfs_block_group_flags(&bg_item) != type) {
11212                         error(
11213 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11214                                 chunk_key.offset, chunk_end, type,
11215                                 btrfs_block_group_flags(&bg_item));
11216                         err |= REFERENCER_MISSING;
11217                 }
11218         }
11219
11220         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11221         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11222         for (i = 0; i < num_stripes; i++) {
11223                 btrfs_release_path(&path);
11224                 btrfs_init_path(&path);
11225                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11226                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11227                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11228
11229                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11230                                         0, 0);
11231                 if (ret)
11232                         goto not_match_dev;
11233
11234                 leaf = path.nodes[0];
11235                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11236                                      struct btrfs_dev_extent);
11237                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11238                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11239                 if (objectid != chunk_key.objectid ||
11240                     offset != chunk_key.offset ||
11241                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11242                         goto not_match_dev;
11243                 continue;
11244 not_match_dev:
11245                 err |= BACKREF_MISSING;
11246                 error(
11247                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11248                         chunk_key.objectid, chunk_end, i);
11249                 continue;
11250         }
11251         btrfs_release_path(&path);
11252 out:
11253         return err;
11254 }
11255
11256 /*
11257  * Main entry function to check known items and update related accounting info
11258  */
11259 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11260 {
11261         struct btrfs_fs_info *fs_info = root->fs_info;
11262         struct btrfs_key key;
11263         int slot = 0;
11264         int type;
11265         struct btrfs_extent_data_ref *dref;
11266         int ret;
11267         int err = 0;
11268
11269 next:
11270         btrfs_item_key_to_cpu(eb, &key, slot);
11271         type = key.type;
11272
11273         switch (type) {
11274         case BTRFS_EXTENT_DATA_KEY:
11275                 ret = check_extent_data_item(root, eb, slot);
11276                 err |= ret;
11277                 break;
11278         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11279                 ret = check_block_group_item(fs_info, eb, slot);
11280                 err |= ret;
11281                 break;
11282         case BTRFS_DEV_ITEM_KEY:
11283                 ret = check_dev_item(fs_info, eb, slot);
11284                 err |= ret;
11285                 break;
11286         case BTRFS_CHUNK_ITEM_KEY:
11287                 ret = check_chunk_item(fs_info, eb, slot);
11288                 err |= ret;
11289                 break;
11290         case BTRFS_DEV_EXTENT_KEY:
11291                 ret = check_dev_extent_item(fs_info, eb, slot);
11292                 err |= ret;
11293                 break;
11294         case BTRFS_EXTENT_ITEM_KEY:
11295         case BTRFS_METADATA_ITEM_KEY:
11296                 ret = check_extent_item(fs_info, eb, slot);
11297                 err |= ret;
11298                 break;
11299         case BTRFS_EXTENT_CSUM_KEY:
11300                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11301                 break;
11302         case BTRFS_TREE_BLOCK_REF_KEY:
11303                 ret = check_tree_block_backref(fs_info, key.offset,
11304                                                key.objectid, -1);
11305                 err |= ret;
11306                 break;
11307         case BTRFS_EXTENT_DATA_REF_KEY:
11308                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11309                 ret = check_extent_data_backref(fs_info,
11310                                 btrfs_extent_data_ref_root(eb, dref),
11311                                 btrfs_extent_data_ref_objectid(eb, dref),
11312                                 btrfs_extent_data_ref_offset(eb, dref),
11313                                 key.objectid, 0,
11314                                 btrfs_extent_data_ref_count(eb, dref));
11315                 err |= ret;
11316                 break;
11317         case BTRFS_SHARED_BLOCK_REF_KEY:
11318                 ret = check_shared_block_backref(fs_info, key.offset,
11319                                                  key.objectid, -1);
11320                 err |= ret;
11321                 break;
11322         case BTRFS_SHARED_DATA_REF_KEY:
11323                 ret = check_shared_data_backref(fs_info, key.offset,
11324                                                 key.objectid);
11325                 err |= ret;
11326                 break;
11327         default:
11328                 break;
11329         }
11330
11331         if (++slot < btrfs_header_nritems(eb))
11332                 goto next;
11333
11334         return err;
11335 }
11336
11337 /*
11338  * Helper function for later fs/subvol tree check.  To determine if a tree
11339  * block should be checked.
11340  * This function will ensure only the direct referencer with lowest rootid to
11341  * check a fs/subvolume tree block.
11342  *
11343  * Backref check at extent tree would detect errors like missing subvolume
11344  * tree, so we can do aggressive check to reduce duplicated checks.
11345  */
11346 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11347 {
11348         struct btrfs_root *extent_root = root->fs_info->extent_root;
11349         struct btrfs_key key;
11350         struct btrfs_path path;
11351         struct extent_buffer *leaf;
11352         int slot;
11353         struct btrfs_extent_item *ei;
11354         unsigned long ptr;
11355         unsigned long end;
11356         int type;
11357         u32 item_size;
11358         u64 offset;
11359         struct btrfs_extent_inline_ref *iref;
11360         int ret;
11361
11362         btrfs_init_path(&path);
11363         key.objectid = btrfs_header_bytenr(eb);
11364         key.type = BTRFS_METADATA_ITEM_KEY;
11365         key.offset = (u64)-1;
11366
11367         /*
11368          * Any failure in backref resolving means we can't determine
11369          * whom the tree block belongs to.
11370          * So in that case, we need to check that tree block
11371          */
11372         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11373         if (ret < 0)
11374                 goto need_check;
11375
11376         ret = btrfs_previous_extent_item(extent_root, &path,
11377                                          btrfs_header_bytenr(eb));
11378         if (ret)
11379                 goto need_check;
11380
11381         leaf = path.nodes[0];
11382         slot = path.slots[0];
11383         btrfs_item_key_to_cpu(leaf, &key, slot);
11384         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11385
11386         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11387                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11388         } else {
11389                 struct btrfs_tree_block_info *info;
11390
11391                 info = (struct btrfs_tree_block_info *)(ei + 1);
11392                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11393         }
11394
11395         item_size = btrfs_item_size_nr(leaf, slot);
11396         ptr = (unsigned long)iref;
11397         end = (unsigned long)ei + item_size;
11398         while (ptr < end) {
11399                 iref = (struct btrfs_extent_inline_ref *)ptr;
11400                 type = btrfs_extent_inline_ref_type(leaf, iref);
11401                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11402
11403                 /*
11404                  * We only check the tree block if current root is
11405                  * the lowest referencer of it.
11406                  */
11407                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11408                     offset < root->objectid) {
11409                         btrfs_release_path(&path);
11410                         return 0;
11411                 }
11412
11413                 ptr += btrfs_extent_inline_ref_size(type);
11414         }
11415         /*
11416          * Normally we should also check keyed tree block ref, but that may be
11417          * very time consuming.  Inlined ref should already make us skip a lot
11418          * of refs now.  So skip search keyed tree block ref.
11419          */
11420
11421 need_check:
11422         btrfs_release_path(&path);
11423         return 1;
11424 }
11425
11426 /*
11427  * Traversal function for tree block. We will do:
11428  * 1) Skip shared fs/subvolume tree blocks
11429  * 2) Update related bytes accounting
11430  * 3) Pre-order traversal
11431  */
11432 static int traverse_tree_block(struct btrfs_root *root,
11433                                 struct extent_buffer *node)
11434 {
11435         struct extent_buffer *eb;
11436         struct btrfs_key key;
11437         struct btrfs_key drop_key;
11438         int level;
11439         u64 nr;
11440         int i;
11441         int err = 0;
11442         int ret;
11443
11444         /*
11445          * Skip shared fs/subvolume tree block, in that case they will
11446          * be checked by referencer with lowest rootid
11447          */
11448         if (is_fstree(root->objectid) && !should_check(root, node))
11449                 return 0;
11450
11451         /* Update bytes accounting */
11452         total_btree_bytes += node->len;
11453         if (fs_root_objectid(btrfs_header_owner(node)))
11454                 total_fs_tree_bytes += node->len;
11455         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11456                 total_extent_tree_bytes += node->len;
11457         if (!found_old_backref &&
11458             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11459             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11460             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11461                 found_old_backref = 1;
11462
11463         /* pre-order tranversal, check itself first */
11464         level = btrfs_header_level(node);
11465         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11466                                    btrfs_header_level(node),
11467                                    btrfs_header_owner(node));
11468         err |= ret;
11469         if (err)
11470                 error(
11471         "check %s failed root %llu bytenr %llu level %d, force continue check",
11472                         level ? "node":"leaf", root->objectid,
11473                         btrfs_header_bytenr(node), btrfs_header_level(node));
11474
11475         if (!level) {
11476                 btree_space_waste += btrfs_leaf_free_space(root, node);
11477                 ret = check_leaf_items(root, node);
11478                 err |= ret;
11479                 return err;
11480         }
11481
11482         nr = btrfs_header_nritems(node);
11483         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11484         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11485                 sizeof(struct btrfs_key_ptr);
11486
11487         /* Then check all its children */
11488         for (i = 0; i < nr; i++) {
11489                 u64 blocknr = btrfs_node_blockptr(node, i);
11490
11491                 btrfs_node_key_to_cpu(node, &key, i);
11492                 if (level == root->root_item.drop_level &&
11493                     is_dropped_key(&key, &drop_key))
11494                         continue;
11495
11496                 /*
11497                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11498                  * to call the function itself.
11499                  */
11500                 eb = read_tree_block(root->fs_info, blocknr,
11501                                 root->fs_info->nodesize, 0);
11502                 if (extent_buffer_uptodate(eb)) {
11503                         ret = traverse_tree_block(root, eb);
11504                         err |= ret;
11505                 }
11506                 free_extent_buffer(eb);
11507         }
11508
11509         return err;
11510 }
11511
11512 /*
11513  * Low memory usage version check_chunks_and_extents.
11514  */
11515 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11516 {
11517         struct btrfs_path path;
11518         struct btrfs_key key;
11519         struct btrfs_root *root1;
11520         struct btrfs_root *cur_root;
11521         int err = 0;
11522         int ret;
11523
11524         root1 = root->fs_info->chunk_root;
11525         ret = traverse_tree_block(root1, root1->node);
11526         err |= ret;
11527
11528         root1 = root->fs_info->tree_root;
11529         ret = traverse_tree_block(root1, root1->node);
11530         err |= ret;
11531
11532         btrfs_init_path(&path);
11533         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11534         key.offset = 0;
11535         key.type = BTRFS_ROOT_ITEM_KEY;
11536
11537         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11538         if (ret) {
11539                 error("cannot find extent treet in tree_root");
11540                 goto out;
11541         }
11542
11543         while (1) {
11544                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11545                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11546                         goto next;
11547                 key.offset = (u64)-1;
11548
11549                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11550                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11551                                         &key);
11552                 else
11553                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11554                 if (IS_ERR(cur_root) || !cur_root) {
11555                         error("failed to read tree: %lld", key.objectid);
11556                         goto next;
11557                 }
11558
11559                 ret = traverse_tree_block(cur_root, cur_root->node);
11560                 err |= ret;
11561
11562                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11563                         btrfs_free_fs_root(cur_root);
11564 next:
11565                 ret = btrfs_next_item(root1, &path);
11566                 if (ret)
11567                         goto out;
11568         }
11569
11570 out:
11571         btrfs_release_path(&path);
11572         return err;
11573 }
11574
11575 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11576                            struct btrfs_root *root, int overwrite)
11577 {
11578         struct extent_buffer *c;
11579         struct extent_buffer *old = root->node;
11580         int level;
11581         int ret;
11582         struct btrfs_disk_key disk_key = {0,0,0};
11583
11584         level = 0;
11585
11586         if (overwrite) {
11587                 c = old;
11588                 extent_buffer_get(c);
11589                 goto init;
11590         }
11591         c = btrfs_alloc_free_block(trans, root,
11592                                    root->fs_info->nodesize,
11593                                    root->root_key.objectid,
11594                                    &disk_key, level, 0, 0);
11595         if (IS_ERR(c)) {
11596                 c = old;
11597                 extent_buffer_get(c);
11598                 overwrite = 1;
11599         }
11600 init:
11601         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11602         btrfs_set_header_level(c, level);
11603         btrfs_set_header_bytenr(c, c->start);
11604         btrfs_set_header_generation(c, trans->transid);
11605         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11606         btrfs_set_header_owner(c, root->root_key.objectid);
11607
11608         write_extent_buffer(c, root->fs_info->fsid,
11609                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11610
11611         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11612                             btrfs_header_chunk_tree_uuid(c),
11613                             BTRFS_UUID_SIZE);
11614
11615         btrfs_mark_buffer_dirty(c);
11616         /*
11617          * this case can happen in the following case:
11618          *
11619          * 1.overwrite previous root.
11620          *
11621          * 2.reinit reloc data root, this is because we skip pin
11622          * down reloc data tree before which means we can allocate
11623          * same block bytenr here.
11624          */
11625         if (old->start == c->start) {
11626                 btrfs_set_root_generation(&root->root_item,
11627                                           trans->transid);
11628                 root->root_item.level = btrfs_header_level(root->node);
11629                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11630                                         &root->root_key, &root->root_item);
11631                 if (ret) {
11632                         free_extent_buffer(c);
11633                         return ret;
11634                 }
11635         }
11636         free_extent_buffer(old);
11637         root->node = c;
11638         add_root_to_dirty_list(root);
11639         return 0;
11640 }
11641
11642 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11643                                 struct extent_buffer *eb, int tree_root)
11644 {
11645         struct extent_buffer *tmp;
11646         struct btrfs_root_item *ri;
11647         struct btrfs_key key;
11648         u64 bytenr;
11649         u32 nodesize;
11650         int level = btrfs_header_level(eb);
11651         int nritems;
11652         int ret;
11653         int i;
11654
11655         /*
11656          * If we have pinned this block before, don't pin it again.
11657          * This can not only avoid forever loop with broken filesystem
11658          * but also give us some speedups.
11659          */
11660         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11661                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11662                 return 0;
11663
11664         btrfs_pin_extent(fs_info, eb->start, eb->len);
11665
11666         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11667         nritems = btrfs_header_nritems(eb);
11668         for (i = 0; i < nritems; i++) {
11669                 if (level == 0) {
11670                         btrfs_item_key_to_cpu(eb, &key, i);
11671                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11672                                 continue;
11673                         /* Skip the extent root and reloc roots */
11674                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11675                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11676                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11677                                 continue;
11678                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11679                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11680
11681                         /*
11682                          * If at any point we start needing the real root we
11683                          * will have to build a stump root for the root we are
11684                          * in, but for now this doesn't actually use the root so
11685                          * just pass in extent_root.
11686                          */
11687                         tmp = read_tree_block(fs_info, bytenr, nodesize, 0);
11688                         if (!extent_buffer_uptodate(tmp)) {
11689                                 fprintf(stderr, "Error reading root block\n");
11690                                 return -EIO;
11691                         }
11692                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11693                         free_extent_buffer(tmp);
11694                         if (ret)
11695                                 return ret;
11696                 } else {
11697                         bytenr = btrfs_node_blockptr(eb, i);
11698
11699                         /* If we aren't the tree root don't read the block */
11700                         if (level == 1 && !tree_root) {
11701                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11702                                 continue;
11703                         }
11704
11705                         tmp = read_tree_block(fs_info, bytenr,
11706                                               nodesize, 0);
11707                         if (!extent_buffer_uptodate(tmp)) {
11708                                 fprintf(stderr, "Error reading tree block\n");
11709                                 return -EIO;
11710                         }
11711                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11712                         free_extent_buffer(tmp);
11713                         if (ret)
11714                                 return ret;
11715                 }
11716         }
11717
11718         return 0;
11719 }
11720
11721 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11722 {
11723         int ret;
11724
11725         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11726         if (ret)
11727                 return ret;
11728
11729         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11730 }
11731
11732 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11733 {
11734         struct btrfs_block_group_cache *cache;
11735         struct btrfs_path path;
11736         struct extent_buffer *leaf;
11737         struct btrfs_chunk *chunk;
11738         struct btrfs_key key;
11739         int ret;
11740         u64 start;
11741
11742         btrfs_init_path(&path);
11743         key.objectid = 0;
11744         key.type = BTRFS_CHUNK_ITEM_KEY;
11745         key.offset = 0;
11746         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11747         if (ret < 0) {
11748                 btrfs_release_path(&path);
11749                 return ret;
11750         }
11751
11752         /*
11753          * We do this in case the block groups were screwed up and had alloc
11754          * bits that aren't actually set on the chunks.  This happens with
11755          * restored images every time and could happen in real life I guess.
11756          */
11757         fs_info->avail_data_alloc_bits = 0;
11758         fs_info->avail_metadata_alloc_bits = 0;
11759         fs_info->avail_system_alloc_bits = 0;
11760
11761         /* First we need to create the in-memory block groups */
11762         while (1) {
11763                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11764                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11765                         if (ret < 0) {
11766                                 btrfs_release_path(&path);
11767                                 return ret;
11768                         }
11769                         if (ret) {
11770                                 ret = 0;
11771                                 break;
11772                         }
11773                 }
11774                 leaf = path.nodes[0];
11775                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11776                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11777                         path.slots[0]++;
11778                         continue;
11779                 }
11780
11781                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11782                 btrfs_add_block_group(fs_info, 0,
11783                                       btrfs_chunk_type(leaf, chunk),
11784                                       key.objectid, key.offset,
11785                                       btrfs_chunk_length(leaf, chunk));
11786                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11787                                  key.offset + btrfs_chunk_length(leaf, chunk));
11788                 path.slots[0]++;
11789         }
11790         start = 0;
11791         while (1) {
11792                 cache = btrfs_lookup_first_block_group(fs_info, start);
11793                 if (!cache)
11794                         break;
11795                 cache->cached = 1;
11796                 start = cache->key.objectid + cache->key.offset;
11797         }
11798
11799         btrfs_release_path(&path);
11800         return 0;
11801 }
11802
11803 static int reset_balance(struct btrfs_trans_handle *trans,
11804                          struct btrfs_fs_info *fs_info)
11805 {
11806         struct btrfs_root *root = fs_info->tree_root;
11807         struct btrfs_path path;
11808         struct extent_buffer *leaf;
11809         struct btrfs_key key;
11810         int del_slot, del_nr = 0;
11811         int ret;
11812         int found = 0;
11813
11814         btrfs_init_path(&path);
11815         key.objectid = BTRFS_BALANCE_OBJECTID;
11816         key.type = BTRFS_BALANCE_ITEM_KEY;
11817         key.offset = 0;
11818         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11819         if (ret) {
11820                 if (ret > 0)
11821                         ret = 0;
11822                 if (!ret)
11823                         goto reinit_data_reloc;
11824                 else
11825                         goto out;
11826         }
11827
11828         ret = btrfs_del_item(trans, root, &path);
11829         if (ret)
11830                 goto out;
11831         btrfs_release_path(&path);
11832
11833         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11834         key.type = BTRFS_ROOT_ITEM_KEY;
11835         key.offset = 0;
11836         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11837         if (ret < 0)
11838                 goto out;
11839         while (1) {
11840                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11841                         if (!found)
11842                                 break;
11843
11844                         if (del_nr) {
11845                                 ret = btrfs_del_items(trans, root, &path,
11846                                                       del_slot, del_nr);
11847                                 del_nr = 0;
11848                                 if (ret)
11849                                         goto out;
11850                         }
11851                         key.offset++;
11852                         btrfs_release_path(&path);
11853
11854                         found = 0;
11855                         ret = btrfs_search_slot(trans, root, &key, &path,
11856                                                 -1, 1);
11857                         if (ret < 0)
11858                                 goto out;
11859                         continue;
11860                 }
11861                 found = 1;
11862                 leaf = path.nodes[0];
11863                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11864                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11865                         break;
11866                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11867                         path.slots[0]++;
11868                         continue;
11869                 }
11870                 if (!del_nr) {
11871                         del_slot = path.slots[0];
11872                         del_nr = 1;
11873                 } else {
11874                         del_nr++;
11875                 }
11876                 path.slots[0]++;
11877         }
11878
11879         if (del_nr) {
11880                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11881                 if (ret)
11882                         goto out;
11883         }
11884         btrfs_release_path(&path);
11885
11886 reinit_data_reloc:
11887         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11888         key.type = BTRFS_ROOT_ITEM_KEY;
11889         key.offset = (u64)-1;
11890         root = btrfs_read_fs_root(fs_info, &key);
11891         if (IS_ERR(root)) {
11892                 fprintf(stderr, "Error reading data reloc tree\n");
11893                 ret = PTR_ERR(root);
11894                 goto out;
11895         }
11896         record_root_in_trans(trans, root);
11897         ret = btrfs_fsck_reinit_root(trans, root, 0);
11898         if (ret)
11899                 goto out;
11900         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11901 out:
11902         btrfs_release_path(&path);
11903         return ret;
11904 }
11905
11906 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11907                               struct btrfs_fs_info *fs_info)
11908 {
11909         u64 start = 0;
11910         int ret;
11911
11912         /*
11913          * The only reason we don't do this is because right now we're just
11914          * walking the trees we find and pinning down their bytes, we don't look
11915          * at any of the leaves.  In order to do mixed groups we'd have to check
11916          * the leaves of any fs roots and pin down the bytes for any file
11917          * extents we find.  Not hard but why do it if we don't have to?
11918          */
11919         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11920                 fprintf(stderr, "We don't support re-initing the extent tree "
11921                         "for mixed block groups yet, please notify a btrfs "
11922                         "developer you want to do this so they can add this "
11923                         "functionality.\n");
11924                 return -EINVAL;
11925         }
11926
11927         /*
11928          * first we need to walk all of the trees except the extent tree and pin
11929          * down the bytes that are in use so we don't overwrite any existing
11930          * metadata.
11931          */
11932         ret = pin_metadata_blocks(fs_info);
11933         if (ret) {
11934                 fprintf(stderr, "error pinning down used bytes\n");
11935                 return ret;
11936         }
11937
11938         /*
11939          * Need to drop all the block groups since we're going to recreate all
11940          * of them again.
11941          */
11942         btrfs_free_block_groups(fs_info);
11943         ret = reset_block_groups(fs_info);
11944         if (ret) {
11945                 fprintf(stderr, "error resetting the block groups\n");
11946                 return ret;
11947         }
11948
11949         /* Ok we can allocate now, reinit the extent root */
11950         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11951         if (ret) {
11952                 fprintf(stderr, "extent root initialization failed\n");
11953                 /*
11954                  * When the transaction code is updated we should end the
11955                  * transaction, but for now progs only knows about commit so
11956                  * just return an error.
11957                  */
11958                 return ret;
11959         }
11960
11961         /*
11962          * Now we have all the in-memory block groups setup so we can make
11963          * allocations properly, and the metadata we care about is safe since we
11964          * pinned all of it above.
11965          */
11966         while (1) {
11967                 struct btrfs_block_group_cache *cache;
11968
11969                 cache = btrfs_lookup_first_block_group(fs_info, start);
11970                 if (!cache)
11971                         break;
11972                 start = cache->key.objectid + cache->key.offset;
11973                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11974                                         &cache->key, &cache->item,
11975                                         sizeof(cache->item));
11976                 if (ret) {
11977                         fprintf(stderr, "Error adding block group\n");
11978                         return ret;
11979                 }
11980                 btrfs_extent_post_op(trans, fs_info->extent_root);
11981         }
11982
11983         ret = reset_balance(trans, fs_info);
11984         if (ret)
11985                 fprintf(stderr, "error resetting the pending balance\n");
11986
11987         return ret;
11988 }
11989
11990 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11991 {
11992         struct btrfs_path path;
11993         struct btrfs_trans_handle *trans;
11994         struct btrfs_key key;
11995         int ret;
11996
11997         printf("Recowing metadata block %llu\n", eb->start);
11998         key.objectid = btrfs_header_owner(eb);
11999         key.type = BTRFS_ROOT_ITEM_KEY;
12000         key.offset = (u64)-1;
12001
12002         root = btrfs_read_fs_root(root->fs_info, &key);
12003         if (IS_ERR(root)) {
12004                 fprintf(stderr, "Couldn't find owner root %llu\n",
12005                         key.objectid);
12006                 return PTR_ERR(root);
12007         }
12008
12009         trans = btrfs_start_transaction(root, 1);
12010         if (IS_ERR(trans))
12011                 return PTR_ERR(trans);
12012
12013         btrfs_init_path(&path);
12014         path.lowest_level = btrfs_header_level(eb);
12015         if (path.lowest_level)
12016                 btrfs_node_key_to_cpu(eb, &key, 0);
12017         else
12018                 btrfs_item_key_to_cpu(eb, &key, 0);
12019
12020         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12021         btrfs_commit_transaction(trans, root);
12022         btrfs_release_path(&path);
12023         return ret;
12024 }
12025
12026 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12027 {
12028         struct btrfs_path path;
12029         struct btrfs_trans_handle *trans;
12030         struct btrfs_key key;
12031         int ret;
12032
12033         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12034                bad->key.type, bad->key.offset);
12035         key.objectid = bad->root_id;
12036         key.type = BTRFS_ROOT_ITEM_KEY;
12037         key.offset = (u64)-1;
12038
12039         root = btrfs_read_fs_root(root->fs_info, &key);
12040         if (IS_ERR(root)) {
12041                 fprintf(stderr, "Couldn't find owner root %llu\n",
12042                         key.objectid);
12043                 return PTR_ERR(root);
12044         }
12045
12046         trans = btrfs_start_transaction(root, 1);
12047         if (IS_ERR(trans))
12048                 return PTR_ERR(trans);
12049
12050         btrfs_init_path(&path);
12051         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12052         if (ret) {
12053                 if (ret > 0)
12054                         ret = 0;
12055                 goto out;
12056         }
12057         ret = btrfs_del_item(trans, root, &path);
12058 out:
12059         btrfs_commit_transaction(trans, root);
12060         btrfs_release_path(&path);
12061         return ret;
12062 }
12063
12064 static int zero_log_tree(struct btrfs_root *root)
12065 {
12066         struct btrfs_trans_handle *trans;
12067         int ret;
12068
12069         trans = btrfs_start_transaction(root, 1);
12070         if (IS_ERR(trans)) {
12071                 ret = PTR_ERR(trans);
12072                 return ret;
12073         }
12074         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12075         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12076         ret = btrfs_commit_transaction(trans, root);
12077         return ret;
12078 }
12079
12080 static int populate_csum(struct btrfs_trans_handle *trans,
12081                          struct btrfs_root *csum_root, char *buf, u64 start,
12082                          u64 len)
12083 {
12084         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12085         u64 offset = 0;
12086         u64 sectorsize;
12087         int ret = 0;
12088
12089         while (offset < len) {
12090                 sectorsize = fs_info->sectorsize;
12091                 ret = read_extent_data(fs_info, buf, start + offset,
12092                                        &sectorsize, 0);
12093                 if (ret)
12094                         break;
12095                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12096                                             start + offset, buf, sectorsize);
12097                 if (ret)
12098                         break;
12099                 offset += sectorsize;
12100         }
12101         return ret;
12102 }
12103
12104 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12105                                       struct btrfs_root *csum_root,
12106                                       struct btrfs_root *cur_root)
12107 {
12108         struct btrfs_path path;
12109         struct btrfs_key key;
12110         struct extent_buffer *node;
12111         struct btrfs_file_extent_item *fi;
12112         char *buf = NULL;
12113         u64 start = 0;
12114         u64 len = 0;
12115         int slot = 0;
12116         int ret = 0;
12117
12118         buf = malloc(cur_root->fs_info->sectorsize);
12119         if (!buf)
12120                 return -ENOMEM;
12121
12122         btrfs_init_path(&path);
12123         key.objectid = 0;
12124         key.offset = 0;
12125         key.type = 0;
12126         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12127         if (ret < 0)
12128                 goto out;
12129         /* Iterate all regular file extents and fill its csum */
12130         while (1) {
12131                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12132
12133                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12134                         goto next;
12135                 node = path.nodes[0];
12136                 slot = path.slots[0];
12137                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12138                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12139                         goto next;
12140                 start = btrfs_file_extent_disk_bytenr(node, fi);
12141                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12142
12143                 ret = populate_csum(trans, csum_root, buf, start, len);
12144                 if (ret == -EEXIST)
12145                         ret = 0;
12146                 if (ret < 0)
12147                         goto out;
12148 next:
12149                 /*
12150                  * TODO: if next leaf is corrupted, jump to nearest next valid
12151                  * leaf.
12152                  */
12153                 ret = btrfs_next_item(cur_root, &path);
12154                 if (ret < 0)
12155                         goto out;
12156                 if (ret > 0) {
12157                         ret = 0;
12158                         goto out;
12159                 }
12160         }
12161
12162 out:
12163         btrfs_release_path(&path);
12164         free(buf);
12165         return ret;
12166 }
12167
12168 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12169                                   struct btrfs_root *csum_root)
12170 {
12171         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12172         struct btrfs_path path;
12173         struct btrfs_root *tree_root = fs_info->tree_root;
12174         struct btrfs_root *cur_root;
12175         struct extent_buffer *node;
12176         struct btrfs_key key;
12177         int slot = 0;
12178         int ret = 0;
12179
12180         btrfs_init_path(&path);
12181         key.objectid = BTRFS_FS_TREE_OBJECTID;
12182         key.offset = 0;
12183         key.type = BTRFS_ROOT_ITEM_KEY;
12184         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12185         if (ret < 0)
12186                 goto out;
12187         if (ret > 0) {
12188                 ret = -ENOENT;
12189                 goto out;
12190         }
12191
12192         while (1) {
12193                 node = path.nodes[0];
12194                 slot = path.slots[0];
12195                 btrfs_item_key_to_cpu(node, &key, slot);
12196                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12197                         goto out;
12198                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12199                         goto next;
12200                 if (!is_fstree(key.objectid))
12201                         goto next;
12202                 key.offset = (u64)-1;
12203
12204                 cur_root = btrfs_read_fs_root(fs_info, &key);
12205                 if (IS_ERR(cur_root) || !cur_root) {
12206                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12207                                 key.objectid);
12208                         goto out;
12209                 }
12210                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12211                                 cur_root);
12212                 if (ret < 0)
12213                         goto out;
12214 next:
12215                 ret = btrfs_next_item(tree_root, &path);
12216                 if (ret > 0) {
12217                         ret = 0;
12218                         goto out;
12219                 }
12220                 if (ret < 0)
12221                         goto out;
12222         }
12223
12224 out:
12225         btrfs_release_path(&path);
12226         return ret;
12227 }
12228
12229 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12230                                       struct btrfs_root *csum_root)
12231 {
12232         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12233         struct btrfs_path path;
12234         struct btrfs_extent_item *ei;
12235         struct extent_buffer *leaf;
12236         char *buf;
12237         struct btrfs_key key;
12238         int ret;
12239
12240         btrfs_init_path(&path);
12241         key.objectid = 0;
12242         key.type = BTRFS_EXTENT_ITEM_KEY;
12243         key.offset = 0;
12244         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12245         if (ret < 0) {
12246                 btrfs_release_path(&path);
12247                 return ret;
12248         }
12249
12250         buf = malloc(csum_root->fs_info->sectorsize);
12251         if (!buf) {
12252                 btrfs_release_path(&path);
12253                 return -ENOMEM;
12254         }
12255
12256         while (1) {
12257                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12258                         ret = btrfs_next_leaf(extent_root, &path);
12259                         if (ret < 0)
12260                                 break;
12261                         if (ret) {
12262                                 ret = 0;
12263                                 break;
12264                         }
12265                 }
12266                 leaf = path.nodes[0];
12267
12268                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12269                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12270                         path.slots[0]++;
12271                         continue;
12272                 }
12273
12274                 ei = btrfs_item_ptr(leaf, path.slots[0],
12275                                     struct btrfs_extent_item);
12276                 if (!(btrfs_extent_flags(leaf, ei) &
12277                       BTRFS_EXTENT_FLAG_DATA)) {
12278                         path.slots[0]++;
12279                         continue;
12280                 }
12281
12282                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12283                                     key.offset);
12284                 if (ret)
12285                         break;
12286                 path.slots[0]++;
12287         }
12288
12289         btrfs_release_path(&path);
12290         free(buf);
12291         return ret;
12292 }
12293
12294 /*
12295  * Recalculate the csum and put it into the csum tree.
12296  *
12297  * Extent tree init will wipe out all the extent info, so in that case, we
12298  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12299  * will use fs/subvol trees to init the csum tree.
12300  */
12301 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12302                           struct btrfs_root *csum_root,
12303                           int search_fs_tree)
12304 {
12305         if (search_fs_tree)
12306                 return fill_csum_tree_from_fs(trans, csum_root);
12307         else
12308                 return fill_csum_tree_from_extent(trans, csum_root);
12309 }
12310
12311 static void free_roots_info_cache(void)
12312 {
12313         if (!roots_info_cache)
12314                 return;
12315
12316         while (!cache_tree_empty(roots_info_cache)) {
12317                 struct cache_extent *entry;
12318                 struct root_item_info *rii;
12319
12320                 entry = first_cache_extent(roots_info_cache);
12321                 if (!entry)
12322                         break;
12323                 remove_cache_extent(roots_info_cache, entry);
12324                 rii = container_of(entry, struct root_item_info, cache_extent);
12325                 free(rii);
12326         }
12327
12328         free(roots_info_cache);
12329         roots_info_cache = NULL;
12330 }
12331
12332 static int build_roots_info_cache(struct btrfs_fs_info *info)
12333 {
12334         int ret = 0;
12335         struct btrfs_key key;
12336         struct extent_buffer *leaf;
12337         struct btrfs_path path;
12338
12339         if (!roots_info_cache) {
12340                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12341                 if (!roots_info_cache)
12342                         return -ENOMEM;
12343                 cache_tree_init(roots_info_cache);
12344         }
12345
12346         btrfs_init_path(&path);
12347         key.objectid = 0;
12348         key.type = BTRFS_EXTENT_ITEM_KEY;
12349         key.offset = 0;
12350         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12351         if (ret < 0)
12352                 goto out;
12353         leaf = path.nodes[0];
12354
12355         while (1) {
12356                 struct btrfs_key found_key;
12357                 struct btrfs_extent_item *ei;
12358                 struct btrfs_extent_inline_ref *iref;
12359                 int slot = path.slots[0];
12360                 int type;
12361                 u64 flags;
12362                 u64 root_id;
12363                 u8 level;
12364                 struct cache_extent *entry;
12365                 struct root_item_info *rii;
12366
12367                 if (slot >= btrfs_header_nritems(leaf)) {
12368                         ret = btrfs_next_leaf(info->extent_root, &path);
12369                         if (ret < 0) {
12370                                 break;
12371                         } else if (ret) {
12372                                 ret = 0;
12373                                 break;
12374                         }
12375                         leaf = path.nodes[0];
12376                         slot = path.slots[0];
12377                 }
12378
12379                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12380
12381                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12382                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12383                         goto next;
12384
12385                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12386                 flags = btrfs_extent_flags(leaf, ei);
12387
12388                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12389                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12390                         goto next;
12391
12392                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12393                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12394                         level = found_key.offset;
12395                 } else {
12396                         struct btrfs_tree_block_info *binfo;
12397
12398                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12399                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12400                         level = btrfs_tree_block_level(leaf, binfo);
12401                 }
12402
12403                 /*
12404                  * For a root extent, it must be of the following type and the
12405                  * first (and only one) iref in the item.
12406                  */
12407                 type = btrfs_extent_inline_ref_type(leaf, iref);
12408                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12409                         goto next;
12410
12411                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12412                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12413                 if (!entry) {
12414                         rii = malloc(sizeof(struct root_item_info));
12415                         if (!rii) {
12416                                 ret = -ENOMEM;
12417                                 goto out;
12418                         }
12419                         rii->cache_extent.start = root_id;
12420                         rii->cache_extent.size = 1;
12421                         rii->level = (u8)-1;
12422                         entry = &rii->cache_extent;
12423                         ret = insert_cache_extent(roots_info_cache, entry);
12424                         ASSERT(ret == 0);
12425                 } else {
12426                         rii = container_of(entry, struct root_item_info,
12427                                            cache_extent);
12428                 }
12429
12430                 ASSERT(rii->cache_extent.start == root_id);
12431                 ASSERT(rii->cache_extent.size == 1);
12432
12433                 if (level > rii->level || rii->level == (u8)-1) {
12434                         rii->level = level;
12435                         rii->bytenr = found_key.objectid;
12436                         rii->gen = btrfs_extent_generation(leaf, ei);
12437                         rii->node_count = 1;
12438                 } else if (level == rii->level) {
12439                         rii->node_count++;
12440                 }
12441 next:
12442                 path.slots[0]++;
12443         }
12444
12445 out:
12446         btrfs_release_path(&path);
12447
12448         return ret;
12449 }
12450
12451 static int maybe_repair_root_item(struct btrfs_path *path,
12452                                   const struct btrfs_key *root_key,
12453                                   const int read_only_mode)
12454 {
12455         const u64 root_id = root_key->objectid;
12456         struct cache_extent *entry;
12457         struct root_item_info *rii;
12458         struct btrfs_root_item ri;
12459         unsigned long offset;
12460
12461         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12462         if (!entry) {
12463                 fprintf(stderr,
12464                         "Error: could not find extent items for root %llu\n",
12465                         root_key->objectid);
12466                 return -ENOENT;
12467         }
12468
12469         rii = container_of(entry, struct root_item_info, cache_extent);
12470         ASSERT(rii->cache_extent.start == root_id);
12471         ASSERT(rii->cache_extent.size == 1);
12472
12473         if (rii->node_count != 1) {
12474                 fprintf(stderr,
12475                         "Error: could not find btree root extent for root %llu\n",
12476                         root_id);
12477                 return -ENOENT;
12478         }
12479
12480         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12481         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12482
12483         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12484             btrfs_root_level(&ri) != rii->level ||
12485             btrfs_root_generation(&ri) != rii->gen) {
12486
12487                 /*
12488                  * If we're in repair mode but our caller told us to not update
12489                  * the root item, i.e. just check if it needs to be updated, don't
12490                  * print this message, since the caller will call us again shortly
12491                  * for the same root item without read only mode (the caller will
12492                  * open a transaction first).
12493                  */
12494                 if (!(read_only_mode && repair))
12495                         fprintf(stderr,
12496                                 "%sroot item for root %llu,"
12497                                 " current bytenr %llu, current gen %llu, current level %u,"
12498                                 " new bytenr %llu, new gen %llu, new level %u\n",
12499                                 (read_only_mode ? "" : "fixing "),
12500                                 root_id,
12501                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12502                                 btrfs_root_level(&ri),
12503                                 rii->bytenr, rii->gen, rii->level);
12504
12505                 if (btrfs_root_generation(&ri) > rii->gen) {
12506                         fprintf(stderr,
12507                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12508                                 root_id, btrfs_root_generation(&ri), rii->gen);
12509                         return -EINVAL;
12510                 }
12511
12512                 if (!read_only_mode) {
12513                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12514                         btrfs_set_root_level(&ri, rii->level);
12515                         btrfs_set_root_generation(&ri, rii->gen);
12516                         write_extent_buffer(path->nodes[0], &ri,
12517                                             offset, sizeof(ri));
12518                 }
12519
12520                 return 1;
12521         }
12522
12523         return 0;
12524 }
12525
12526 /*
12527  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12528  * caused read-only snapshots to be corrupted if they were created at a moment
12529  * when the source subvolume/snapshot had orphan items. The issue was that the
12530  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12531  * node instead of the post orphan cleanup root node.
12532  * So this function, and its callees, just detects and fixes those cases. Even
12533  * though the regression was for read-only snapshots, this function applies to
12534  * any snapshot/subvolume root.
12535  * This must be run before any other repair code - not doing it so, makes other
12536  * repair code delete or modify backrefs in the extent tree for example, which
12537  * will result in an inconsistent fs after repairing the root items.
12538  */
12539 static int repair_root_items(struct btrfs_fs_info *info)
12540 {
12541         struct btrfs_path path;
12542         struct btrfs_key key;
12543         struct extent_buffer *leaf;
12544         struct btrfs_trans_handle *trans = NULL;
12545         int ret = 0;
12546         int bad_roots = 0;
12547         int need_trans = 0;
12548
12549         btrfs_init_path(&path);
12550
12551         ret = build_roots_info_cache(info);
12552         if (ret)
12553                 goto out;
12554
12555         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12556         key.type = BTRFS_ROOT_ITEM_KEY;
12557         key.offset = 0;
12558
12559 again:
12560         /*
12561          * Avoid opening and committing transactions if a leaf doesn't have
12562          * any root items that need to be fixed, so that we avoid rotating
12563          * backup roots unnecessarily.
12564          */
12565         if (need_trans) {
12566                 trans = btrfs_start_transaction(info->tree_root, 1);
12567                 if (IS_ERR(trans)) {
12568                         ret = PTR_ERR(trans);
12569                         goto out;
12570                 }
12571         }
12572
12573         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12574                                 0, trans ? 1 : 0);
12575         if (ret < 0)
12576                 goto out;
12577         leaf = path.nodes[0];
12578
12579         while (1) {
12580                 struct btrfs_key found_key;
12581
12582                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12583                         int no_more_keys = find_next_key(&path, &key);
12584
12585                         btrfs_release_path(&path);
12586                         if (trans) {
12587                                 ret = btrfs_commit_transaction(trans,
12588                                                                info->tree_root);
12589                                 trans = NULL;
12590                                 if (ret < 0)
12591                                         goto out;
12592                         }
12593                         need_trans = 0;
12594                         if (no_more_keys)
12595                                 break;
12596                         goto again;
12597                 }
12598
12599                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12600
12601                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12602                         goto next;
12603                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12604                         goto next;
12605
12606                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12607                 if (ret < 0)
12608                         goto out;
12609                 if (ret) {
12610                         if (!trans && repair) {
12611                                 need_trans = 1;
12612                                 key = found_key;
12613                                 btrfs_release_path(&path);
12614                                 goto again;
12615                         }
12616                         bad_roots++;
12617                 }
12618 next:
12619                 path.slots[0]++;
12620         }
12621         ret = 0;
12622 out:
12623         free_roots_info_cache();
12624         btrfs_release_path(&path);
12625         if (trans)
12626                 btrfs_commit_transaction(trans, info->tree_root);
12627         if (ret < 0)
12628                 return ret;
12629
12630         return bad_roots;
12631 }
12632
12633 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12634 {
12635         struct btrfs_trans_handle *trans;
12636         struct btrfs_block_group_cache *bg_cache;
12637         u64 current = 0;
12638         int ret = 0;
12639
12640         /* Clear all free space cache inodes and its extent data */
12641         while (1) {
12642                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12643                 if (!bg_cache)
12644                         break;
12645                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12646                 if (ret < 0)
12647                         return ret;
12648                 current = bg_cache->key.objectid + bg_cache->key.offset;
12649         }
12650
12651         /* Don't forget to set cache_generation to -1 */
12652         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12653         if (IS_ERR(trans)) {
12654                 error("failed to update super block cache generation");
12655                 return PTR_ERR(trans);
12656         }
12657         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12658         btrfs_commit_transaction(trans, fs_info->tree_root);
12659
12660         return ret;
12661 }
12662
12663 const char * const cmd_check_usage[] = {
12664         "btrfs check [options] <device>",
12665         "Check structural integrity of a filesystem (unmounted).",
12666         "Check structural integrity of an unmounted filesystem. Verify internal",
12667         "trees' consistency and item connectivity. In the repair mode try to",
12668         "fix the problems found. ",
12669         "WARNING: the repair mode is considered dangerous",
12670         "",
12671         "-s|--super <superblock>     use this superblock copy",
12672         "-b|--backup                 use the first valid backup root copy",
12673         "--repair                    try to repair the filesystem",
12674         "--readonly                  run in read-only mode (default)",
12675         "--init-csum-tree            create a new CRC tree",
12676         "--init-extent-tree          create a new extent tree",
12677         "--mode <MODE>               allows choice of memory/IO trade-offs",
12678         "                            where MODE is one of:",
12679         "                            original - read inodes and extents to memory (requires",
12680         "                                       more memory, does less IO)",
12681         "                            lowmem   - try to use less memory but read blocks again",
12682         "                                       when needed",
12683         "--check-data-csum           verify checksums of data blocks",
12684         "-Q|--qgroup-report          print a report on qgroup consistency",
12685         "-E|--subvol-extents <subvolid>",
12686         "                            print subvolume extents and sharing state",
12687         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12688         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12689         "-p|--progress               indicate progress",
12690         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12691         NULL
12692 };
12693
12694 int cmd_check(int argc, char **argv)
12695 {
12696         struct cache_tree root_cache;
12697         struct btrfs_root *root;
12698         struct btrfs_fs_info *info;
12699         u64 bytenr = 0;
12700         u64 subvolid = 0;
12701         u64 tree_root_bytenr = 0;
12702         u64 chunk_root_bytenr = 0;
12703         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12704         int ret;
12705         int err = 0;
12706         u64 num;
12707         int init_csum_tree = 0;
12708         int readonly = 0;
12709         int clear_space_cache = 0;
12710         int qgroup_report = 0;
12711         int qgroups_repaired = 0;
12712         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12713
12714         while(1) {
12715                 int c;
12716                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12717                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12718                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12719                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12720                 static const struct option long_options[] = {
12721                         { "super", required_argument, NULL, 's' },
12722                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12723                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12724                         { "init-csum-tree", no_argument, NULL,
12725                                 GETOPT_VAL_INIT_CSUM },
12726                         { "init-extent-tree", no_argument, NULL,
12727                                 GETOPT_VAL_INIT_EXTENT },
12728                         { "check-data-csum", no_argument, NULL,
12729                                 GETOPT_VAL_CHECK_CSUM },
12730                         { "backup", no_argument, NULL, 'b' },
12731                         { "subvol-extents", required_argument, NULL, 'E' },
12732                         { "qgroup-report", no_argument, NULL, 'Q' },
12733                         { "tree-root", required_argument, NULL, 'r' },
12734                         { "chunk-root", required_argument, NULL,
12735                                 GETOPT_VAL_CHUNK_TREE },
12736                         { "progress", no_argument, NULL, 'p' },
12737                         { "mode", required_argument, NULL,
12738                                 GETOPT_VAL_MODE },
12739                         { "clear-space-cache", required_argument, NULL,
12740                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12741                         { NULL, 0, NULL, 0}
12742                 };
12743
12744                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12745                 if (c < 0)
12746                         break;
12747                 switch(c) {
12748                         case 'a': /* ignored */ break;
12749                         case 'b':
12750                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12751                                 break;
12752                         case 's':
12753                                 num = arg_strtou64(optarg);
12754                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12755                                         error(
12756                                         "super mirror should be less than %d",
12757                                                 BTRFS_SUPER_MIRROR_MAX);
12758                                         exit(1);
12759                                 }
12760                                 bytenr = btrfs_sb_offset(((int)num));
12761                                 printf("using SB copy %llu, bytenr %llu\n", num,
12762                                        (unsigned long long)bytenr);
12763                                 break;
12764                         case 'Q':
12765                                 qgroup_report = 1;
12766                                 break;
12767                         case 'E':
12768                                 subvolid = arg_strtou64(optarg);
12769                                 break;
12770                         case 'r':
12771                                 tree_root_bytenr = arg_strtou64(optarg);
12772                                 break;
12773                         case GETOPT_VAL_CHUNK_TREE:
12774                                 chunk_root_bytenr = arg_strtou64(optarg);
12775                                 break;
12776                         case 'p':
12777                                 ctx.progress_enabled = true;
12778                                 break;
12779                         case '?':
12780                         case 'h':
12781                                 usage(cmd_check_usage);
12782                         case GETOPT_VAL_REPAIR:
12783                                 printf("enabling repair mode\n");
12784                                 repair = 1;
12785                                 ctree_flags |= OPEN_CTREE_WRITES;
12786                                 break;
12787                         case GETOPT_VAL_READONLY:
12788                                 readonly = 1;
12789                                 break;
12790                         case GETOPT_VAL_INIT_CSUM:
12791                                 printf("Creating a new CRC tree\n");
12792                                 init_csum_tree = 1;
12793                                 repair = 1;
12794                                 ctree_flags |= OPEN_CTREE_WRITES;
12795                                 break;
12796                         case GETOPT_VAL_INIT_EXTENT:
12797                                 init_extent_tree = 1;
12798                                 ctree_flags |= (OPEN_CTREE_WRITES |
12799                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12800                                 repair = 1;
12801                                 break;
12802                         case GETOPT_VAL_CHECK_CSUM:
12803                                 check_data_csum = 1;
12804                                 break;
12805                         case GETOPT_VAL_MODE:
12806                                 check_mode = parse_check_mode(optarg);
12807                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12808                                         error("unknown mode: %s", optarg);
12809                                         exit(1);
12810                                 }
12811                                 break;
12812                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12813                                 if (strcmp(optarg, "v1") == 0) {
12814                                         clear_space_cache = 1;
12815                                 } else if (strcmp(optarg, "v2") == 0) {
12816                                         clear_space_cache = 2;
12817                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12818                                 } else {
12819                                         error(
12820                 "invalid argument to --clear-space-cache, must be v1 or v2");
12821                                         exit(1);
12822                                 }
12823                                 ctree_flags |= OPEN_CTREE_WRITES;
12824                                 break;
12825                 }
12826         }
12827
12828         if (check_argc_exact(argc - optind, 1))
12829                 usage(cmd_check_usage);
12830
12831         if (ctx.progress_enabled) {
12832                 ctx.tp = TASK_NOTHING;
12833                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12834         }
12835
12836         /* This check is the only reason for --readonly to exist */
12837         if (readonly && repair) {
12838                 error("repair options are not compatible with --readonly");
12839                 exit(1);
12840         }
12841
12842         /*
12843          * Not supported yet
12844          */
12845         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12846                 error("low memory mode doesn't support repair yet");
12847                 exit(1);
12848         }
12849
12850         radix_tree_init();
12851         cache_tree_init(&root_cache);
12852
12853         if((ret = check_mounted(argv[optind])) < 0) {
12854                 error("could not check mount status: %s", strerror(-ret));
12855                 err |= !!ret;
12856                 goto err_out;
12857         } else if(ret) {
12858                 error("%s is currently mounted, aborting", argv[optind]);
12859                 ret = -EBUSY;
12860                 err |= !!ret;
12861                 goto err_out;
12862         }
12863
12864         /* only allow partial opening under repair mode */
12865         if (repair)
12866                 ctree_flags |= OPEN_CTREE_PARTIAL;
12867
12868         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12869                                   chunk_root_bytenr, ctree_flags);
12870         if (!info) {
12871                 error("cannot open file system");
12872                 ret = -EIO;
12873                 err |= !!ret;
12874                 goto err_out;
12875         }
12876
12877         global_info = info;
12878         root = info->fs_root;
12879         if (clear_space_cache == 1) {
12880                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12881                         error(
12882                 "free space cache v2 detected, use --clear-space-cache v2");
12883                         ret = 1;
12884                         goto close_out;
12885                 }
12886                 printf("Clearing free space cache\n");
12887                 ret = clear_free_space_cache(info);
12888                 if (ret) {
12889                         error("failed to clear free space cache");
12890                         ret = 1;
12891                 } else {
12892                         printf("Free space cache cleared\n");
12893                 }
12894                 goto close_out;
12895         } else if (clear_space_cache == 2) {
12896                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12897                         printf("no free space cache v2 to clear\n");
12898                         ret = 0;
12899                         goto close_out;
12900                 }
12901                 printf("Clear free space cache v2\n");
12902                 ret = btrfs_clear_free_space_tree(info);
12903                 if (ret) {
12904                         error("failed to clear free space cache v2: %d", ret);
12905                         ret = 1;
12906                 } else {
12907                         printf("free space cache v2 cleared\n");
12908                 }
12909                 goto close_out;
12910         }
12911
12912         /*
12913          * repair mode will force us to commit transaction which
12914          * will make us fail to load log tree when mounting.
12915          */
12916         if (repair && btrfs_super_log_root(info->super_copy)) {
12917                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12918                 if (!ret) {
12919                         ret = 1;
12920                         err |= !!ret;
12921                         goto close_out;
12922                 }
12923                 ret = zero_log_tree(root);
12924                 err |= !!ret;
12925                 if (ret) {
12926                         error("failed to zero log tree: %d", ret);
12927                         goto close_out;
12928                 }
12929         }
12930
12931         uuid_unparse(info->super_copy->fsid, uuidbuf);
12932         if (qgroup_report) {
12933                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12934                        uuidbuf);
12935                 ret = qgroup_verify_all(info);
12936                 err |= !!ret;
12937                 if (ret == 0)
12938                         report_qgroups(1);
12939                 goto close_out;
12940         }
12941         if (subvolid) {
12942                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12943                        subvolid, argv[optind], uuidbuf);
12944                 ret = print_extent_state(info, subvolid);
12945                 err |= !!ret;
12946                 goto close_out;
12947         }
12948         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12949
12950         if (!extent_buffer_uptodate(info->tree_root->node) ||
12951             !extent_buffer_uptodate(info->dev_root->node) ||
12952             !extent_buffer_uptodate(info->chunk_root->node)) {
12953                 error("critical roots corrupted, unable to check the filesystem");
12954                 err |= !!ret;
12955                 ret = -EIO;
12956                 goto close_out;
12957         }
12958
12959         if (init_extent_tree || init_csum_tree) {
12960                 struct btrfs_trans_handle *trans;
12961
12962                 trans = btrfs_start_transaction(info->extent_root, 0);
12963                 if (IS_ERR(trans)) {
12964                         error("error starting transaction");
12965                         ret = PTR_ERR(trans);
12966                         err |= !!ret;
12967                         goto close_out;
12968                 }
12969
12970                 if (init_extent_tree) {
12971                         printf("Creating a new extent tree\n");
12972                         ret = reinit_extent_tree(trans, info);
12973                         err |= !!ret;
12974                         if (ret)
12975                                 goto close_out;
12976                 }
12977
12978                 if (init_csum_tree) {
12979                         printf("Reinitialize checksum tree\n");
12980                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12981                         if (ret) {
12982                                 error("checksum tree initialization failed: %d",
12983                                                 ret);
12984                                 ret = -EIO;
12985                                 err |= !!ret;
12986                                 goto close_out;
12987                         }
12988
12989                         ret = fill_csum_tree(trans, info->csum_root,
12990                                              init_extent_tree);
12991                         err |= !!ret;
12992                         if (ret) {
12993                                 error("checksum tree refilling failed: %d", ret);
12994                                 return -EIO;
12995                         }
12996                 }
12997                 /*
12998                  * Ok now we commit and run the normal fsck, which will add
12999                  * extent entries for all of the items it finds.
13000                  */
13001                 ret = btrfs_commit_transaction(trans, info->extent_root);
13002                 err |= !!ret;
13003                 if (ret)
13004                         goto close_out;
13005         }
13006         if (!extent_buffer_uptodate(info->extent_root->node)) {
13007                 error("critical: extent_root, unable to check the filesystem");
13008                 ret = -EIO;
13009                 err |= !!ret;
13010                 goto close_out;
13011         }
13012         if (!extent_buffer_uptodate(info->csum_root->node)) {
13013                 error("critical: csum_root, unable to check the filesystem");
13014                 ret = -EIO;
13015                 err |= !!ret;
13016                 goto close_out;
13017         }
13018
13019         if (!ctx.progress_enabled)
13020                 fprintf(stderr, "checking extents\n");
13021         if (check_mode == CHECK_MODE_LOWMEM)
13022                 ret = check_chunks_and_extents_v2(root);
13023         else
13024                 ret = check_chunks_and_extents(root);
13025         err |= !!ret;
13026         if (ret)
13027                 error(
13028                 "errors found in extent allocation tree or chunk allocation");
13029
13030         ret = repair_root_items(info);
13031         err |= !!ret;
13032         if (ret < 0) {
13033                 error("failed to repair root items: %s", strerror(-ret));
13034                 goto close_out;
13035         }
13036         if (repair) {
13037                 fprintf(stderr, "Fixed %d roots.\n", ret);
13038                 ret = 0;
13039         } else if (ret > 0) {
13040                 fprintf(stderr,
13041                        "Found %d roots with an outdated root item.\n",
13042                        ret);
13043                 fprintf(stderr,
13044                         "Please run a filesystem check with the option --repair to fix them.\n");
13045                 ret = 1;
13046                 err |= !!ret;
13047                 goto close_out;
13048         }
13049
13050         if (!ctx.progress_enabled) {
13051                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13052                         fprintf(stderr, "checking free space tree\n");
13053                 else
13054                         fprintf(stderr, "checking free space cache\n");
13055         }
13056         ret = check_space_cache(root);
13057         err |= !!ret;
13058         if (ret) {
13059                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13060                         error("errors found in free space tree");
13061                 else
13062                         error("errors found in free space cache");
13063                 goto out;
13064         }
13065
13066         /*
13067          * We used to have to have these hole extents in between our real
13068          * extents so if we don't have this flag set we need to make sure there
13069          * are no gaps in the file extents for inodes, otherwise we can just
13070          * ignore it when this happens.
13071          */
13072         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13073         if (!ctx.progress_enabled)
13074                 fprintf(stderr, "checking fs roots\n");
13075         if (check_mode == CHECK_MODE_LOWMEM)
13076                 ret = check_fs_roots_v2(root->fs_info);
13077         else
13078                 ret = check_fs_roots(root, &root_cache);
13079         err |= !!ret;
13080         if (ret) {
13081                 error("errors found in fs roots");
13082                 goto out;
13083         }
13084
13085         fprintf(stderr, "checking csums\n");
13086         ret = check_csums(root);
13087         err |= !!ret;
13088         if (ret) {
13089                 error("errors found in csum tree");
13090                 goto out;
13091         }
13092
13093         fprintf(stderr, "checking root refs\n");
13094         /* For low memory mode, check_fs_roots_v2 handles root refs */
13095         if (check_mode != CHECK_MODE_LOWMEM) {
13096                 ret = check_root_refs(root, &root_cache);
13097                 err |= !!ret;
13098                 if (ret) {
13099                         error("errors found in root refs");
13100                         goto out;
13101                 }
13102         }
13103
13104         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13105                 struct extent_buffer *eb;
13106
13107                 eb = list_first_entry(&root->fs_info->recow_ebs,
13108                                       struct extent_buffer, recow);
13109                 list_del_init(&eb->recow);
13110                 ret = recow_extent_buffer(root, eb);
13111                 err |= !!ret;
13112                 if (ret) {
13113                         error("fails to fix transid errors");
13114                         break;
13115                 }
13116         }
13117
13118         while (!list_empty(&delete_items)) {
13119                 struct bad_item *bad;
13120
13121                 bad = list_first_entry(&delete_items, struct bad_item, list);
13122                 list_del_init(&bad->list);
13123                 if (repair) {
13124                         ret = delete_bad_item(root, bad);
13125                         err |= !!ret;
13126                 }
13127                 free(bad);
13128         }
13129
13130         if (info->quota_enabled) {
13131                 fprintf(stderr, "checking quota groups\n");
13132                 ret = qgroup_verify_all(info);
13133                 err |= !!ret;
13134                 if (ret) {
13135                         error("failed to check quota groups");
13136                         goto out;
13137                 }
13138                 report_qgroups(0);
13139                 ret = repair_qgroups(info, &qgroups_repaired);
13140                 err |= !!ret;
13141                 if (err) {
13142                         error("failed to repair quota groups");
13143                         goto out;
13144                 }
13145                 ret = 0;
13146         }
13147
13148         if (!list_empty(&root->fs_info->recow_ebs)) {
13149                 error("transid errors in file system");
13150                 ret = 1;
13151                 err |= !!ret;
13152         }
13153 out:
13154         if (found_old_backref) { /*
13155                  * there was a disk format change when mixed
13156                  * backref was in testing tree. The old format
13157                  * existed about one week.
13158                  */
13159                 printf("\n * Found old mixed backref format. "
13160                        "The old format is not supported! *"
13161                        "\n * Please mount the FS in readonly mode, "
13162                        "backup data and re-format the FS. *\n\n");
13163                 err |= 1;
13164         }
13165         printf("found %llu bytes used, ",
13166                (unsigned long long)bytes_used);
13167         if (err)
13168                 printf("error(s) found\n");
13169         else
13170                 printf("no error found\n");
13171         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13172         printf("total tree bytes: %llu\n",
13173                (unsigned long long)total_btree_bytes);
13174         printf("total fs tree bytes: %llu\n",
13175                (unsigned long long)total_fs_tree_bytes);
13176         printf("total extent tree bytes: %llu\n",
13177                (unsigned long long)total_extent_tree_bytes);
13178         printf("btree space waste bytes: %llu\n",
13179                (unsigned long long)btree_space_waste);
13180         printf("file data blocks allocated: %llu\n referenced %llu\n",
13181                 (unsigned long long)data_bytes_allocated,
13182                 (unsigned long long)data_bytes_referenced);
13183
13184         free_qgroup_counts();
13185         free_root_recs_tree(&root_cache);
13186 close_out:
13187         close_ctree(root);
13188 err_out:
13189         if (ctx.progress_enabled)
13190                 task_deinit(ctx.info);
13191
13192         return err;
13193 }