btrfs-progs: remove extra newline from messages
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44 #include "hash.h"
45
46 enum task_position {
47         TASK_EXTENTS,
48         TASK_FREE_SPACE,
49         TASK_FS_ROOTS,
50         TASK_NOTHING, /* have to be the last element */
51 };
52
53 struct task_ctx {
54         int progress_enabled;
55         enum task_position tp;
56
57         struct task_info *info;
58 };
59
60 static u64 bytes_used = 0;
61 static u64 total_csum_bytes = 0;
62 static u64 total_btree_bytes = 0;
63 static u64 total_fs_tree_bytes = 0;
64 static u64 total_extent_tree_bytes = 0;
65 static u64 btree_space_waste = 0;
66 static u64 data_bytes_allocated = 0;
67 static u64 data_bytes_referenced = 0;
68 static int found_old_backref = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct list_head list;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
97 {
98         return list_entry(entry, struct extent_backref, list);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133
134 static inline struct data_backref* to_data_backref(struct extent_backref *back)
135 {
136         return container_of(back, struct data_backref, node);
137 }
138
139 /*
140  * Much like data_backref, just removed the undetermined members
141  * and change it to use list_head.
142  * During extent scan, it is stored in root->orphan_data_extent.
143  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
144  */
145 struct orphan_data_extent {
146         struct list_head list;
147         u64 root;
148         u64 objectid;
149         u64 offset;
150         u64 disk_bytenr;
151         u64 disk_len;
152 };
153
154 struct tree_backref {
155         struct extent_backref node;
156         union {
157                 u64 parent;
158                 u64 root;
159         };
160 };
161
162 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
163 {
164         return container_of(back, struct tree_backref, node);
165 }
166
167 /* Explicit initialization for extent_record::flag_block_full_backref */
168 enum { FLAG_UNSET = 2 };
169
170 struct extent_record {
171         struct list_head backrefs;
172         struct list_head dups;
173         struct list_head list;
174         struct cache_extent cache;
175         struct btrfs_disk_key parent_key;
176         u64 start;
177         u64 max_size;
178         u64 nr;
179         u64 refs;
180         u64 extent_item_refs;
181         u64 generation;
182         u64 parent_generation;
183         u64 info_objectid;
184         u32 num_duplicates;
185         u8 info_level;
186         unsigned int flag_block_full_backref:2;
187         unsigned int found_rec:1;
188         unsigned int content_checked:1;
189         unsigned int owner_ref_checked:1;
190         unsigned int is_root:1;
191         unsigned int metadata:1;
192         unsigned int bad_full_backref:1;
193         unsigned int crossing_stripes:1;
194         unsigned int wrong_chunk_type:1;
195 };
196
197 static inline struct extent_record* to_extent_record(struct list_head *entry)
198 {
199         return container_of(entry, struct extent_record, list);
200 }
201
202 struct inode_backref {
203         struct list_head list;
204         unsigned int found_dir_item:1;
205         unsigned int found_dir_index:1;
206         unsigned int found_inode_ref:1;
207         u8 filetype;
208         u8 ref_type;
209         int errors;
210         u64 dir;
211         u64 index;
212         u16 namelen;
213         char name[0];
214 };
215
216 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
217 {
218         return list_entry(entry, struct inode_backref, list);
219 }
220
221 struct root_item_record {
222         struct list_head list;
223         u64 objectid;
224         u64 bytenr;
225         u64 last_snapshot;
226         u8 level;
227         u8 drop_level;
228         int level_size;
229         struct btrfs_key drop_key;
230 };
231
232 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
233 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
234 #define REF_ERR_NO_INODE_REF            (1 << 2)
235 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
236 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
237 #define REF_ERR_DUP_INODE_REF           (1 << 5)
238 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
239 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
240 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
241 #define REF_ERR_NO_ROOT_REF             (1 << 9)
242 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
243 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
244 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
245
246 struct file_extent_hole {
247         struct rb_node node;
248         u64 start;
249         u64 len;
250 };
251
252 struct inode_record {
253         struct list_head backrefs;
254         unsigned int checked:1;
255         unsigned int merging:1;
256         unsigned int found_inode_item:1;
257         unsigned int found_dir_item:1;
258         unsigned int found_file_extent:1;
259         unsigned int found_csum_item:1;
260         unsigned int some_csum_missing:1;
261         unsigned int nodatasum:1;
262         int errors;
263
264         u64 ino;
265         u32 nlink;
266         u32 imode;
267         u64 isize;
268         u64 nbytes;
269
270         u32 found_link;
271         u64 found_size;
272         u64 extent_start;
273         u64 extent_end;
274         struct rb_root holes;
275         struct list_head orphan_extents;
276
277         u32 refs;
278 };
279
280 #define I_ERR_NO_INODE_ITEM             (1 << 0)
281 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
282 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
283 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
284 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
285 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
286 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
287 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
288 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
289 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
290 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
291 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
292 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
293 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
294 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
295
296 struct root_backref {
297         struct list_head list;
298         unsigned int found_dir_item:1;
299         unsigned int found_dir_index:1;
300         unsigned int found_back_ref:1;
301         unsigned int found_forward_ref:1;
302         unsigned int reachable:1;
303         int errors;
304         u64 ref_root;
305         u64 dir;
306         u64 index;
307         u16 namelen;
308         char name[0];
309 };
310
311 static inline struct root_backref* to_root_backref(struct list_head *entry)
312 {
313         return list_entry(entry, struct root_backref, list);
314 }
315
316 struct root_record {
317         struct list_head backrefs;
318         struct cache_extent cache;
319         unsigned int found_root_item:1;
320         u64 objectid;
321         u32 found_ref;
322 };
323
324 struct ptr_node {
325         struct cache_extent cache;
326         void *data;
327 };
328
329 struct shared_node {
330         struct cache_extent cache;
331         struct cache_tree root_cache;
332         struct cache_tree inode_cache;
333         struct inode_record *current;
334         u32 refs;
335 };
336
337 struct block_info {
338         u64 start;
339         u32 size;
340 };
341
342 struct walk_control {
343         struct cache_tree shared;
344         struct shared_node *nodes[BTRFS_MAX_LEVEL];
345         int active_node;
346         int root_level;
347 };
348
349 struct bad_item {
350         struct btrfs_key key;
351         u64 root_id;
352         struct list_head list;
353 };
354
355 struct extent_entry {
356         u64 bytenr;
357         u64 bytes;
358         int count;
359         int broken;
360         struct list_head list;
361 };
362
363 struct root_item_info {
364         /* level of the root */
365         u8 level;
366         /* number of nodes at this level, must be 1 for a root */
367         int node_count;
368         u64 bytenr;
369         u64 gen;
370         struct cache_extent cache_extent;
371 };
372
373 /*
374  * Error bit for low memory mode check.
375  *
376  * Currently no caller cares about it yet.  Just internal use for error
377  * classification.
378  */
379 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
380 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
381 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
382 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
383 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
384 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
385 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
386 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
387 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
388 #define CHUNK_TYPE_MISMATCH     (1 << 8)
389
390 static void *print_status_check(void *p)
391 {
392         struct task_ctx *priv = p;
393         const char work_indicator[] = { '.', 'o', 'O', 'o' };
394         uint32_t count = 0;
395         static char *task_position_string[] = {
396                 "checking extents",
397                 "checking free space cache",
398                 "checking fs roots",
399         };
400
401         task_period_start(priv->info, 1000 /* 1s */);
402
403         if (priv->tp == TASK_NOTHING)
404                 return NULL;
405
406         while (1) {
407                 printf("%s [%c]\r", task_position_string[priv->tp],
408                                 work_indicator[count % 4]);
409                 count++;
410                 fflush(stdout);
411                 task_period_wait(priv->info);
412         }
413         return NULL;
414 }
415
416 static int print_status_return(void *p)
417 {
418         printf("\n");
419         fflush(stdout);
420
421         return 0;
422 }
423
424 static enum btrfs_check_mode parse_check_mode(const char *str)
425 {
426         if (strcmp(str, "lowmem") == 0)
427                 return CHECK_MODE_LOWMEM;
428         if (strcmp(str, "orig") == 0)
429                 return CHECK_MODE_ORIGINAL;
430         if (strcmp(str, "original") == 0)
431                 return CHECK_MODE_ORIGINAL;
432
433         return CHECK_MODE_UNKNOWN;
434 }
435
436 /* Compatible function to allow reuse of old codes */
437 static u64 first_extent_gap(struct rb_root *holes)
438 {
439         struct file_extent_hole *hole;
440
441         if (RB_EMPTY_ROOT(holes))
442                 return (u64)-1;
443
444         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
445         return hole->start;
446 }
447
448 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
449 {
450         struct file_extent_hole *hole1;
451         struct file_extent_hole *hole2;
452
453         hole1 = rb_entry(node1, struct file_extent_hole, node);
454         hole2 = rb_entry(node2, struct file_extent_hole, node);
455
456         if (hole1->start > hole2->start)
457                 return -1;
458         if (hole1->start < hole2->start)
459                 return 1;
460         /* Now hole1->start == hole2->start */
461         if (hole1->len >= hole2->len)
462                 /*
463                  * Hole 1 will be merge center
464                  * Same hole will be merged later
465                  */
466                 return -1;
467         /* Hole 2 will be merge center */
468         return 1;
469 }
470
471 /*
472  * Add a hole to the record
473  *
474  * This will do hole merge for copy_file_extent_holes(),
475  * which will ensure there won't be continuous holes.
476  */
477 static int add_file_extent_hole(struct rb_root *holes,
478                                 u64 start, u64 len)
479 {
480         struct file_extent_hole *hole;
481         struct file_extent_hole *prev = NULL;
482         struct file_extent_hole *next = NULL;
483
484         hole = malloc(sizeof(*hole));
485         if (!hole)
486                 return -ENOMEM;
487         hole->start = start;
488         hole->len = len;
489         /* Since compare will not return 0, no -EEXIST will happen */
490         rb_insert(holes, &hole->node, compare_hole);
491
492         /* simple merge with previous hole */
493         if (rb_prev(&hole->node))
494                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
495                                 node);
496         if (prev && prev->start + prev->len >= hole->start) {
497                 hole->len = hole->start + hole->len - prev->start;
498                 hole->start = prev->start;
499                 rb_erase(&prev->node, holes);
500                 free(prev);
501                 prev = NULL;
502         }
503
504         /* iterate merge with next holes */
505         while (1) {
506                 if (!rb_next(&hole->node))
507                         break;
508                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
509                                         node);
510                 if (hole->start + hole->len >= next->start) {
511                         if (hole->start + hole->len <= next->start + next->len)
512                                 hole->len = next->start + next->len -
513                                             hole->start;
514                         rb_erase(&next->node, holes);
515                         free(next);
516                         next = NULL;
517                 } else
518                         break;
519         }
520         return 0;
521 }
522
523 static int compare_hole_range(struct rb_node *node, void *data)
524 {
525         struct file_extent_hole *hole;
526         u64 start;
527
528         hole = (struct file_extent_hole *)data;
529         start = hole->start;
530
531         hole = rb_entry(node, struct file_extent_hole, node);
532         if (start < hole->start)
533                 return -1;
534         if (start >= hole->start && start < hole->start + hole->len)
535                 return 0;
536         return 1;
537 }
538
539 /*
540  * Delete a hole in the record
541  *
542  * This will do the hole split and is much restrict than add.
543  */
544 static int del_file_extent_hole(struct rb_root *holes,
545                                 u64 start, u64 len)
546 {
547         struct file_extent_hole *hole;
548         struct file_extent_hole tmp;
549         u64 prev_start = 0;
550         u64 prev_len = 0;
551         u64 next_start = 0;
552         u64 next_len = 0;
553         struct rb_node *node;
554         int have_prev = 0;
555         int have_next = 0;
556         int ret = 0;
557
558         tmp.start = start;
559         tmp.len = len;
560         node = rb_search(holes, &tmp, compare_hole_range, NULL);
561         if (!node)
562                 return -EEXIST;
563         hole = rb_entry(node, struct file_extent_hole, node);
564         if (start + len > hole->start + hole->len)
565                 return -EEXIST;
566
567         /*
568          * Now there will be no overlap, delete the hole and re-add the
569          * split(s) if they exists.
570          */
571         if (start > hole->start) {
572                 prev_start = hole->start;
573                 prev_len = start - hole->start;
574                 have_prev = 1;
575         }
576         if (hole->start + hole->len > start + len) {
577                 next_start = start + len;
578                 next_len = hole->start + hole->len - start - len;
579                 have_next = 1;
580         }
581         rb_erase(node, holes);
582         free(hole);
583         if (have_prev) {
584                 ret = add_file_extent_hole(holes, prev_start, prev_len);
585                 if (ret < 0)
586                         return ret;
587         }
588         if (have_next) {
589                 ret = add_file_extent_hole(holes, next_start, next_len);
590                 if (ret < 0)
591                         return ret;
592         }
593         return 0;
594 }
595
596 static int copy_file_extent_holes(struct rb_root *dst,
597                                   struct rb_root *src)
598 {
599         struct file_extent_hole *hole;
600         struct rb_node *node;
601         int ret = 0;
602
603         node = rb_first(src);
604         while (node) {
605                 hole = rb_entry(node, struct file_extent_hole, node);
606                 ret = add_file_extent_hole(dst, hole->start, hole->len);
607                 if (ret)
608                         break;
609                 node = rb_next(node);
610         }
611         return ret;
612 }
613
614 static void free_file_extent_holes(struct rb_root *holes)
615 {
616         struct rb_node *node;
617         struct file_extent_hole *hole;
618
619         node = rb_first(holes);
620         while (node) {
621                 hole = rb_entry(node, struct file_extent_hole, node);
622                 rb_erase(node, holes);
623                 free(hole);
624                 node = rb_first(holes);
625         }
626 }
627
628 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
629
630 static void record_root_in_trans(struct btrfs_trans_handle *trans,
631                                  struct btrfs_root *root)
632 {
633         if (root->last_trans != trans->transid) {
634                 root->track_dirty = 1;
635                 root->last_trans = trans->transid;
636                 root->commit_root = root->node;
637                 extent_buffer_get(root->node);
638         }
639 }
640
641 static u8 imode_to_type(u32 imode)
642 {
643 #define S_SHIFT 12
644         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
645                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
646                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
647                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
648                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
649                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
650                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
651                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
652         };
653
654         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
655 #undef S_SHIFT
656 }
657
658 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
659 {
660         struct device_record *rec1;
661         struct device_record *rec2;
662
663         rec1 = rb_entry(node1, struct device_record, node);
664         rec2 = rb_entry(node2, struct device_record, node);
665         if (rec1->devid > rec2->devid)
666                 return -1;
667         else if (rec1->devid < rec2->devid)
668                 return 1;
669         else
670                 return 0;
671 }
672
673 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
674 {
675         struct inode_record *rec;
676         struct inode_backref *backref;
677         struct inode_backref *orig;
678         struct inode_backref *tmp;
679         struct orphan_data_extent *src_orphan;
680         struct orphan_data_extent *dst_orphan;
681         struct rb_node *rb;
682         size_t size;
683         int ret;
684
685         rec = malloc(sizeof(*rec));
686         if (!rec)
687                 return ERR_PTR(-ENOMEM);
688         memcpy(rec, orig_rec, sizeof(*rec));
689         rec->refs = 1;
690         INIT_LIST_HEAD(&rec->backrefs);
691         INIT_LIST_HEAD(&rec->orphan_extents);
692         rec->holes = RB_ROOT;
693
694         list_for_each_entry(orig, &orig_rec->backrefs, list) {
695                 size = sizeof(*orig) + orig->namelen + 1;
696                 backref = malloc(size);
697                 if (!backref) {
698                         ret = -ENOMEM;
699                         goto cleanup;
700                 }
701                 memcpy(backref, orig, size);
702                 list_add_tail(&backref->list, &rec->backrefs);
703         }
704         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
705                 dst_orphan = malloc(sizeof(*dst_orphan));
706                 if (!dst_orphan) {
707                         ret = -ENOMEM;
708                         goto cleanup;
709                 }
710                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
711                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
712         }
713         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
714         if (ret < 0)
715                 goto cleanup_rb;
716
717         return rec;
718
719 cleanup_rb:
720         rb = rb_first(&rec->holes);
721         while (rb) {
722                 struct file_extent_hole *hole;
723
724                 hole = rb_entry(rb, struct file_extent_hole, node);
725                 rb = rb_next(rb);
726                 free(hole);
727         }
728
729 cleanup:
730         if (!list_empty(&rec->backrefs))
731                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
732                         list_del(&orig->list);
733                         free(orig);
734                 }
735
736         if (!list_empty(&rec->orphan_extents))
737                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
738                         list_del(&orig->list);
739                         free(orig);
740                 }
741
742         free(rec);
743
744         return ERR_PTR(ret);
745 }
746
747 static void print_orphan_data_extents(struct list_head *orphan_extents,
748                                       u64 objectid)
749 {
750         struct orphan_data_extent *orphan;
751
752         if (list_empty(orphan_extents))
753                 return;
754         printf("The following data extent is lost in tree %llu:\n",
755                objectid);
756         list_for_each_entry(orphan, orphan_extents, list) {
757                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
758                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
759                        orphan->disk_len);
760         }
761 }
762
763 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
764 {
765         u64 root_objectid = root->root_key.objectid;
766         int errors = rec->errors;
767
768         if (!errors)
769                 return;
770         /* reloc root errors, we print its corresponding fs root objectid*/
771         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
772                 root_objectid = root->root_key.offset;
773                 fprintf(stderr, "reloc");
774         }
775         fprintf(stderr, "root %llu inode %llu errors %x",
776                 (unsigned long long) root_objectid,
777                 (unsigned long long) rec->ino, rec->errors);
778
779         if (errors & I_ERR_NO_INODE_ITEM)
780                 fprintf(stderr, ", no inode item");
781         if (errors & I_ERR_NO_ORPHAN_ITEM)
782                 fprintf(stderr, ", no orphan item");
783         if (errors & I_ERR_DUP_INODE_ITEM)
784                 fprintf(stderr, ", dup inode item");
785         if (errors & I_ERR_DUP_DIR_INDEX)
786                 fprintf(stderr, ", dup dir index");
787         if (errors & I_ERR_ODD_DIR_ITEM)
788                 fprintf(stderr, ", odd dir item");
789         if (errors & I_ERR_ODD_FILE_EXTENT)
790                 fprintf(stderr, ", odd file extent");
791         if (errors & I_ERR_BAD_FILE_EXTENT)
792                 fprintf(stderr, ", bad file extent");
793         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
794                 fprintf(stderr, ", file extent overlap");
795         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
796                 fprintf(stderr, ", file extent discount");
797         if (errors & I_ERR_DIR_ISIZE_WRONG)
798                 fprintf(stderr, ", dir isize wrong");
799         if (errors & I_ERR_FILE_NBYTES_WRONG)
800                 fprintf(stderr, ", nbytes wrong");
801         if (errors & I_ERR_ODD_CSUM_ITEM)
802                 fprintf(stderr, ", odd csum item");
803         if (errors & I_ERR_SOME_CSUM_MISSING)
804                 fprintf(stderr, ", some csum missing");
805         if (errors & I_ERR_LINK_COUNT_WRONG)
806                 fprintf(stderr, ", link count wrong");
807         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
808                 fprintf(stderr, ", orphan file extent");
809         fprintf(stderr, "\n");
810         /* Print the orphan extents if needed */
811         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
812                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
813
814         /* Print the holes if needed */
815         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
816                 struct file_extent_hole *hole;
817                 struct rb_node *node;
818                 int found = 0;
819
820                 node = rb_first(&rec->holes);
821                 fprintf(stderr, "Found file extent holes:\n");
822                 while (node) {
823                         found = 1;
824                         hole = rb_entry(node, struct file_extent_hole, node);
825                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
826                                 hole->start, hole->len);
827                         node = rb_next(node);
828                 }
829                 if (!found)
830                         fprintf(stderr, "\tstart: 0, len: %llu\n",
831                                 round_up(rec->isize, root->sectorsize));
832         }
833 }
834
835 static void print_ref_error(int errors)
836 {
837         if (errors & REF_ERR_NO_DIR_ITEM)
838                 fprintf(stderr, ", no dir item");
839         if (errors & REF_ERR_NO_DIR_INDEX)
840                 fprintf(stderr, ", no dir index");
841         if (errors & REF_ERR_NO_INODE_REF)
842                 fprintf(stderr, ", no inode ref");
843         if (errors & REF_ERR_DUP_DIR_ITEM)
844                 fprintf(stderr, ", dup dir item");
845         if (errors & REF_ERR_DUP_DIR_INDEX)
846                 fprintf(stderr, ", dup dir index");
847         if (errors & REF_ERR_DUP_INODE_REF)
848                 fprintf(stderr, ", dup inode ref");
849         if (errors & REF_ERR_INDEX_UNMATCH)
850                 fprintf(stderr, ", index mismatch");
851         if (errors & REF_ERR_FILETYPE_UNMATCH)
852                 fprintf(stderr, ", filetype mismatch");
853         if (errors & REF_ERR_NAME_TOO_LONG)
854                 fprintf(stderr, ", name too long");
855         if (errors & REF_ERR_NO_ROOT_REF)
856                 fprintf(stderr, ", no root ref");
857         if (errors & REF_ERR_NO_ROOT_BACKREF)
858                 fprintf(stderr, ", no root backref");
859         if (errors & REF_ERR_DUP_ROOT_REF)
860                 fprintf(stderr, ", dup root ref");
861         if (errors & REF_ERR_DUP_ROOT_BACKREF)
862                 fprintf(stderr, ", dup root backref");
863         fprintf(stderr, "\n");
864 }
865
866 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
867                                           u64 ino, int mod)
868 {
869         struct ptr_node *node;
870         struct cache_extent *cache;
871         struct inode_record *rec = NULL;
872         int ret;
873
874         cache = lookup_cache_extent(inode_cache, ino, 1);
875         if (cache) {
876                 node = container_of(cache, struct ptr_node, cache);
877                 rec = node->data;
878                 if (mod && rec->refs > 1) {
879                         node->data = clone_inode_rec(rec);
880                         if (IS_ERR(node->data))
881                                 return node->data;
882                         rec->refs--;
883                         rec = node->data;
884                 }
885         } else if (mod) {
886                 rec = calloc(1, sizeof(*rec));
887                 if (!rec)
888                         return ERR_PTR(-ENOMEM);
889                 rec->ino = ino;
890                 rec->extent_start = (u64)-1;
891                 rec->refs = 1;
892                 INIT_LIST_HEAD(&rec->backrefs);
893                 INIT_LIST_HEAD(&rec->orphan_extents);
894                 rec->holes = RB_ROOT;
895
896                 node = malloc(sizeof(*node));
897                 if (!node) {
898                         free(rec);
899                         return ERR_PTR(-ENOMEM);
900                 }
901                 node->cache.start = ino;
902                 node->cache.size = 1;
903                 node->data = rec;
904
905                 if (ino == BTRFS_FREE_INO_OBJECTID)
906                         rec->found_link = 1;
907
908                 ret = insert_cache_extent(inode_cache, &node->cache);
909                 if (ret)
910                         return ERR_PTR(-EEXIST);
911         }
912         return rec;
913 }
914
915 static void free_orphan_data_extents(struct list_head *orphan_extents)
916 {
917         struct orphan_data_extent *orphan;
918
919         while (!list_empty(orphan_extents)) {
920                 orphan = list_entry(orphan_extents->next,
921                                     struct orphan_data_extent, list);
922                 list_del(&orphan->list);
923                 free(orphan);
924         }
925 }
926
927 static void free_inode_rec(struct inode_record *rec)
928 {
929         struct inode_backref *backref;
930
931         if (--rec->refs > 0)
932                 return;
933
934         while (!list_empty(&rec->backrefs)) {
935                 backref = to_inode_backref(rec->backrefs.next);
936                 list_del(&backref->list);
937                 free(backref);
938         }
939         free_orphan_data_extents(&rec->orphan_extents);
940         free_file_extent_holes(&rec->holes);
941         free(rec);
942 }
943
944 static int can_free_inode_rec(struct inode_record *rec)
945 {
946         if (!rec->errors && rec->checked && rec->found_inode_item &&
947             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
948                 return 1;
949         return 0;
950 }
951
952 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
953                                  struct inode_record *rec)
954 {
955         struct cache_extent *cache;
956         struct inode_backref *tmp, *backref;
957         struct ptr_node *node;
958         u8 filetype;
959
960         if (!rec->found_inode_item)
961                 return;
962
963         filetype = imode_to_type(rec->imode);
964         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
965                 if (backref->found_dir_item && backref->found_dir_index) {
966                         if (backref->filetype != filetype)
967                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
968                         if (!backref->errors && backref->found_inode_ref &&
969                             rec->nlink == rec->found_link) {
970                                 list_del(&backref->list);
971                                 free(backref);
972                         }
973                 }
974         }
975
976         if (!rec->checked || rec->merging)
977                 return;
978
979         if (S_ISDIR(rec->imode)) {
980                 if (rec->found_size != rec->isize)
981                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
982                 if (rec->found_file_extent)
983                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
984         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
985                 if (rec->found_dir_item)
986                         rec->errors |= I_ERR_ODD_DIR_ITEM;
987                 if (rec->found_size != rec->nbytes)
988                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
989                 if (rec->nlink > 0 && !no_holes &&
990                     (rec->extent_end < rec->isize ||
991                      first_extent_gap(&rec->holes) < rec->isize))
992                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
993         }
994
995         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
996                 if (rec->found_csum_item && rec->nodatasum)
997                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
998                 if (rec->some_csum_missing && !rec->nodatasum)
999                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1000         }
1001
1002         BUG_ON(rec->refs != 1);
1003         if (can_free_inode_rec(rec)) {
1004                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1005                 node = container_of(cache, struct ptr_node, cache);
1006                 BUG_ON(node->data != rec);
1007                 remove_cache_extent(inode_cache, &node->cache);
1008                 free(node);
1009                 free_inode_rec(rec);
1010         }
1011 }
1012
1013 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1014 {
1015         struct btrfs_path path;
1016         struct btrfs_key key;
1017         int ret;
1018
1019         key.objectid = BTRFS_ORPHAN_OBJECTID;
1020         key.type = BTRFS_ORPHAN_ITEM_KEY;
1021         key.offset = ino;
1022
1023         btrfs_init_path(&path);
1024         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1025         btrfs_release_path(&path);
1026         if (ret > 0)
1027                 ret = -ENOENT;
1028         return ret;
1029 }
1030
1031 static int process_inode_item(struct extent_buffer *eb,
1032                               int slot, struct btrfs_key *key,
1033                               struct shared_node *active_node)
1034 {
1035         struct inode_record *rec;
1036         struct btrfs_inode_item *item;
1037
1038         rec = active_node->current;
1039         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1040         if (rec->found_inode_item) {
1041                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1042                 return 1;
1043         }
1044         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1045         rec->nlink = btrfs_inode_nlink(eb, item);
1046         rec->isize = btrfs_inode_size(eb, item);
1047         rec->nbytes = btrfs_inode_nbytes(eb, item);
1048         rec->imode = btrfs_inode_mode(eb, item);
1049         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1050                 rec->nodatasum = 1;
1051         rec->found_inode_item = 1;
1052         if (rec->nlink == 0)
1053                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1054         maybe_free_inode_rec(&active_node->inode_cache, rec);
1055         return 0;
1056 }
1057
1058 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1059                                                 const char *name,
1060                                                 int namelen, u64 dir)
1061 {
1062         struct inode_backref *backref;
1063
1064         list_for_each_entry(backref, &rec->backrefs, list) {
1065                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1066                         break;
1067                 if (backref->dir != dir || backref->namelen != namelen)
1068                         continue;
1069                 if (memcmp(name, backref->name, namelen))
1070                         continue;
1071                 return backref;
1072         }
1073
1074         backref = malloc(sizeof(*backref) + namelen + 1);
1075         if (!backref)
1076                 return NULL;
1077         memset(backref, 0, sizeof(*backref));
1078         backref->dir = dir;
1079         backref->namelen = namelen;
1080         memcpy(backref->name, name, namelen);
1081         backref->name[namelen] = '\0';
1082         list_add_tail(&backref->list, &rec->backrefs);
1083         return backref;
1084 }
1085
1086 static int add_inode_backref(struct cache_tree *inode_cache,
1087                              u64 ino, u64 dir, u64 index,
1088                              const char *name, int namelen,
1089                              u8 filetype, u8 itemtype, int errors)
1090 {
1091         struct inode_record *rec;
1092         struct inode_backref *backref;
1093
1094         rec = get_inode_rec(inode_cache, ino, 1);
1095         BUG_ON(IS_ERR(rec));
1096         backref = get_inode_backref(rec, name, namelen, dir);
1097         BUG_ON(!backref);
1098         if (errors)
1099                 backref->errors |= errors;
1100         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1101                 if (backref->found_dir_index)
1102                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1103                 if (backref->found_inode_ref && backref->index != index)
1104                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1105                 if (backref->found_dir_item && backref->filetype != filetype)
1106                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1107
1108                 backref->index = index;
1109                 backref->filetype = filetype;
1110                 backref->found_dir_index = 1;
1111         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1112                 rec->found_link++;
1113                 if (backref->found_dir_item)
1114                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1115                 if (backref->found_dir_index && backref->filetype != filetype)
1116                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1117
1118                 backref->filetype = filetype;
1119                 backref->found_dir_item = 1;
1120         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1121                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1122                 if (backref->found_inode_ref)
1123                         backref->errors |= REF_ERR_DUP_INODE_REF;
1124                 if (backref->found_dir_index && backref->index != index)
1125                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1126                 else
1127                         backref->index = index;
1128
1129                 backref->ref_type = itemtype;
1130                 backref->found_inode_ref = 1;
1131         } else {
1132                 BUG_ON(1);
1133         }
1134
1135         maybe_free_inode_rec(inode_cache, rec);
1136         return 0;
1137 }
1138
1139 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1140                             struct cache_tree *dst_cache)
1141 {
1142         struct inode_backref *backref;
1143         u32 dir_count = 0;
1144         int ret = 0;
1145
1146         dst->merging = 1;
1147         list_for_each_entry(backref, &src->backrefs, list) {
1148                 if (backref->found_dir_index) {
1149                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1150                                         backref->index, backref->name,
1151                                         backref->namelen, backref->filetype,
1152                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1153                 }
1154                 if (backref->found_dir_item) {
1155                         dir_count++;
1156                         add_inode_backref(dst_cache, dst->ino,
1157                                         backref->dir, 0, backref->name,
1158                                         backref->namelen, backref->filetype,
1159                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1160                 }
1161                 if (backref->found_inode_ref) {
1162                         add_inode_backref(dst_cache, dst->ino,
1163                                         backref->dir, backref->index,
1164                                         backref->name, backref->namelen, 0,
1165                                         backref->ref_type, backref->errors);
1166                 }
1167         }
1168
1169         if (src->found_dir_item)
1170                 dst->found_dir_item = 1;
1171         if (src->found_file_extent)
1172                 dst->found_file_extent = 1;
1173         if (src->found_csum_item)
1174                 dst->found_csum_item = 1;
1175         if (src->some_csum_missing)
1176                 dst->some_csum_missing = 1;
1177         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1178                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1179                 if (ret < 0)
1180                         return ret;
1181         }
1182
1183         BUG_ON(src->found_link < dir_count);
1184         dst->found_link += src->found_link - dir_count;
1185         dst->found_size += src->found_size;
1186         if (src->extent_start != (u64)-1) {
1187                 if (dst->extent_start == (u64)-1) {
1188                         dst->extent_start = src->extent_start;
1189                         dst->extent_end = src->extent_end;
1190                 } else {
1191                         if (dst->extent_end > src->extent_start)
1192                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1193                         else if (dst->extent_end < src->extent_start) {
1194                                 ret = add_file_extent_hole(&dst->holes,
1195                                         dst->extent_end,
1196                                         src->extent_start - dst->extent_end);
1197                         }
1198                         if (dst->extent_end < src->extent_end)
1199                                 dst->extent_end = src->extent_end;
1200                 }
1201         }
1202
1203         dst->errors |= src->errors;
1204         if (src->found_inode_item) {
1205                 if (!dst->found_inode_item) {
1206                         dst->nlink = src->nlink;
1207                         dst->isize = src->isize;
1208                         dst->nbytes = src->nbytes;
1209                         dst->imode = src->imode;
1210                         dst->nodatasum = src->nodatasum;
1211                         dst->found_inode_item = 1;
1212                 } else {
1213                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1214                 }
1215         }
1216         dst->merging = 0;
1217
1218         return 0;
1219 }
1220
1221 static int splice_shared_node(struct shared_node *src_node,
1222                               struct shared_node *dst_node)
1223 {
1224         struct cache_extent *cache;
1225         struct ptr_node *node, *ins;
1226         struct cache_tree *src, *dst;
1227         struct inode_record *rec, *conflict;
1228         u64 current_ino = 0;
1229         int splice = 0;
1230         int ret;
1231
1232         if (--src_node->refs == 0)
1233                 splice = 1;
1234         if (src_node->current)
1235                 current_ino = src_node->current->ino;
1236
1237         src = &src_node->root_cache;
1238         dst = &dst_node->root_cache;
1239 again:
1240         cache = search_cache_extent(src, 0);
1241         while (cache) {
1242                 node = container_of(cache, struct ptr_node, cache);
1243                 rec = node->data;
1244                 cache = next_cache_extent(cache);
1245
1246                 if (splice) {
1247                         remove_cache_extent(src, &node->cache);
1248                         ins = node;
1249                 } else {
1250                         ins = malloc(sizeof(*ins));
1251                         BUG_ON(!ins);
1252                         ins->cache.start = node->cache.start;
1253                         ins->cache.size = node->cache.size;
1254                         ins->data = rec;
1255                         rec->refs++;
1256                 }
1257                 ret = insert_cache_extent(dst, &ins->cache);
1258                 if (ret == -EEXIST) {
1259                         conflict = get_inode_rec(dst, rec->ino, 1);
1260                         BUG_ON(IS_ERR(conflict));
1261                         merge_inode_recs(rec, conflict, dst);
1262                         if (rec->checked) {
1263                                 conflict->checked = 1;
1264                                 if (dst_node->current == conflict)
1265                                         dst_node->current = NULL;
1266                         }
1267                         maybe_free_inode_rec(dst, conflict);
1268                         free_inode_rec(rec);
1269                         free(ins);
1270                 } else {
1271                         BUG_ON(ret);
1272                 }
1273         }
1274
1275         if (src == &src_node->root_cache) {
1276                 src = &src_node->inode_cache;
1277                 dst = &dst_node->inode_cache;
1278                 goto again;
1279         }
1280
1281         if (current_ino > 0 && (!dst_node->current ||
1282             current_ino > dst_node->current->ino)) {
1283                 if (dst_node->current) {
1284                         dst_node->current->checked = 1;
1285                         maybe_free_inode_rec(dst, dst_node->current);
1286                 }
1287                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1288                 BUG_ON(IS_ERR(dst_node->current));
1289         }
1290         return 0;
1291 }
1292
1293 static void free_inode_ptr(struct cache_extent *cache)
1294 {
1295         struct ptr_node *node;
1296         struct inode_record *rec;
1297
1298         node = container_of(cache, struct ptr_node, cache);
1299         rec = node->data;
1300         free_inode_rec(rec);
1301         free(node);
1302 }
1303
1304 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1305
1306 static struct shared_node *find_shared_node(struct cache_tree *shared,
1307                                             u64 bytenr)
1308 {
1309         struct cache_extent *cache;
1310         struct shared_node *node;
1311
1312         cache = lookup_cache_extent(shared, bytenr, 1);
1313         if (cache) {
1314                 node = container_of(cache, struct shared_node, cache);
1315                 return node;
1316         }
1317         return NULL;
1318 }
1319
1320 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1321 {
1322         int ret;
1323         struct shared_node *node;
1324
1325         node = calloc(1, sizeof(*node));
1326         if (!node)
1327                 return -ENOMEM;
1328         node->cache.start = bytenr;
1329         node->cache.size = 1;
1330         cache_tree_init(&node->root_cache);
1331         cache_tree_init(&node->inode_cache);
1332         node->refs = refs;
1333
1334         ret = insert_cache_extent(shared, &node->cache);
1335
1336         return ret;
1337 }
1338
1339 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1340                              struct walk_control *wc, int level)
1341 {
1342         struct shared_node *node;
1343         struct shared_node *dest;
1344         int ret;
1345
1346         if (level == wc->active_node)
1347                 return 0;
1348
1349         BUG_ON(wc->active_node <= level);
1350         node = find_shared_node(&wc->shared, bytenr);
1351         if (!node) {
1352                 ret = add_shared_node(&wc->shared, bytenr, refs);
1353                 BUG_ON(ret);
1354                 node = find_shared_node(&wc->shared, bytenr);
1355                 wc->nodes[level] = node;
1356                 wc->active_node = level;
1357                 return 0;
1358         }
1359
1360         if (wc->root_level == wc->active_node &&
1361             btrfs_root_refs(&root->root_item) == 0) {
1362                 if (--node->refs == 0) {
1363                         free_inode_recs_tree(&node->root_cache);
1364                         free_inode_recs_tree(&node->inode_cache);
1365                         remove_cache_extent(&wc->shared, &node->cache);
1366                         free(node);
1367                 }
1368                 return 1;
1369         }
1370
1371         dest = wc->nodes[wc->active_node];
1372         splice_shared_node(node, dest);
1373         if (node->refs == 0) {
1374                 remove_cache_extent(&wc->shared, &node->cache);
1375                 free(node);
1376         }
1377         return 1;
1378 }
1379
1380 static int leave_shared_node(struct btrfs_root *root,
1381                              struct walk_control *wc, int level)
1382 {
1383         struct shared_node *node;
1384         struct shared_node *dest;
1385         int i;
1386
1387         if (level == wc->root_level)
1388                 return 0;
1389
1390         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1391                 if (wc->nodes[i])
1392                         break;
1393         }
1394         BUG_ON(i >= BTRFS_MAX_LEVEL);
1395
1396         node = wc->nodes[wc->active_node];
1397         wc->nodes[wc->active_node] = NULL;
1398         wc->active_node = i;
1399
1400         dest = wc->nodes[wc->active_node];
1401         if (wc->active_node < wc->root_level ||
1402             btrfs_root_refs(&root->root_item) > 0) {
1403                 BUG_ON(node->refs <= 1);
1404                 splice_shared_node(node, dest);
1405         } else {
1406                 BUG_ON(node->refs < 2);
1407                 node->refs--;
1408         }
1409         return 0;
1410 }
1411
1412 /*
1413  * Returns:
1414  * < 0 - on error
1415  * 1   - if the root with id child_root_id is a child of root parent_root_id
1416  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1417  *       has other root(s) as parent(s)
1418  * 2   - if the root child_root_id doesn't have any parent roots
1419  */
1420 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1421                          u64 child_root_id)
1422 {
1423         struct btrfs_path path;
1424         struct btrfs_key key;
1425         struct extent_buffer *leaf;
1426         int has_parent = 0;
1427         int ret;
1428
1429         btrfs_init_path(&path);
1430
1431         key.objectid = parent_root_id;
1432         key.type = BTRFS_ROOT_REF_KEY;
1433         key.offset = child_root_id;
1434         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1435                                 0, 0);
1436         if (ret < 0)
1437                 return ret;
1438         btrfs_release_path(&path);
1439         if (!ret)
1440                 return 1;
1441
1442         key.objectid = child_root_id;
1443         key.type = BTRFS_ROOT_BACKREF_KEY;
1444         key.offset = 0;
1445         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1446                                 0, 0);
1447         if (ret < 0)
1448                 goto out;
1449
1450         while (1) {
1451                 leaf = path.nodes[0];
1452                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1453                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1454                         if (ret)
1455                                 break;
1456                         leaf = path.nodes[0];
1457                 }
1458
1459                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1460                 if (key.objectid != child_root_id ||
1461                     key.type != BTRFS_ROOT_BACKREF_KEY)
1462                         break;
1463
1464                 has_parent = 1;
1465
1466                 if (key.offset == parent_root_id) {
1467                         btrfs_release_path(&path);
1468                         return 1;
1469                 }
1470
1471                 path.slots[0]++;
1472         }
1473 out:
1474         btrfs_release_path(&path);
1475         if (ret < 0)
1476                 return ret;
1477         return has_parent ? 0 : 2;
1478 }
1479
1480 static int process_dir_item(struct btrfs_root *root,
1481                             struct extent_buffer *eb,
1482                             int slot, struct btrfs_key *key,
1483                             struct shared_node *active_node)
1484 {
1485         u32 total;
1486         u32 cur = 0;
1487         u32 len;
1488         u32 name_len;
1489         u32 data_len;
1490         int error;
1491         int nritems = 0;
1492         u8 filetype;
1493         struct btrfs_dir_item *di;
1494         struct inode_record *rec;
1495         struct cache_tree *root_cache;
1496         struct cache_tree *inode_cache;
1497         struct btrfs_key location;
1498         char namebuf[BTRFS_NAME_LEN];
1499
1500         root_cache = &active_node->root_cache;
1501         inode_cache = &active_node->inode_cache;
1502         rec = active_node->current;
1503         rec->found_dir_item = 1;
1504
1505         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506         total = btrfs_item_size_nr(eb, slot);
1507         while (cur < total) {
1508                 nritems++;
1509                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510                 name_len = btrfs_dir_name_len(eb, di);
1511                 data_len = btrfs_dir_data_len(eb, di);
1512                 filetype = btrfs_dir_type(eb, di);
1513
1514                 rec->found_size += name_len;
1515                 if (name_len <= BTRFS_NAME_LEN) {
1516                         len = name_len;
1517                         error = 0;
1518                 } else {
1519                         len = BTRFS_NAME_LEN;
1520                         error = REF_ERR_NAME_TOO_LONG;
1521                 }
1522                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1523
1524                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525                         add_inode_backref(inode_cache, location.objectid,
1526                                           key->objectid, key->offset, namebuf,
1527                                           len, filetype, key->type, error);
1528                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529                         add_inode_backref(root_cache, location.objectid,
1530                                           key->objectid, key->offset,
1531                                           namebuf, len, filetype,
1532                                           key->type, error);
1533                 } else {
1534                         fprintf(stderr, "invalid location in dir item %u\n",
1535                                 location.type);
1536                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537                                           key->objectid, key->offset, namebuf,
1538                                           len, filetype, key->type, error);
1539                 }
1540
1541                 len = sizeof(*di) + name_len + data_len;
1542                 di = (struct btrfs_dir_item *)((char *)di + len);
1543                 cur += len;
1544         }
1545         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1547
1548         return 0;
1549 }
1550
1551 static int process_inode_ref(struct extent_buffer *eb,
1552                              int slot, struct btrfs_key *key,
1553                              struct shared_node *active_node)
1554 {
1555         u32 total;
1556         u32 cur = 0;
1557         u32 len;
1558         u32 name_len;
1559         u64 index;
1560         int error;
1561         struct cache_tree *inode_cache;
1562         struct btrfs_inode_ref *ref;
1563         char namebuf[BTRFS_NAME_LEN];
1564
1565         inode_cache = &active_node->inode_cache;
1566
1567         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568         total = btrfs_item_size_nr(eb, slot);
1569         while (cur < total) {
1570                 name_len = btrfs_inode_ref_name_len(eb, ref);
1571                 index = btrfs_inode_ref_index(eb, ref);
1572                 if (name_len <= BTRFS_NAME_LEN) {
1573                         len = name_len;
1574                         error = 0;
1575                 } else {
1576                         len = BTRFS_NAME_LEN;
1577                         error = REF_ERR_NAME_TOO_LONG;
1578                 }
1579                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1580                 add_inode_backref(inode_cache, key->objectid, key->offset,
1581                                   index, namebuf, len, 0, key->type, error);
1582
1583                 len = sizeof(*ref) + name_len;
1584                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1585                 cur += len;
1586         }
1587         return 0;
1588 }
1589
1590 static int process_inode_extref(struct extent_buffer *eb,
1591                                 int slot, struct btrfs_key *key,
1592                                 struct shared_node *active_node)
1593 {
1594         u32 total;
1595         u32 cur = 0;
1596         u32 len;
1597         u32 name_len;
1598         u64 index;
1599         u64 parent;
1600         int error;
1601         struct cache_tree *inode_cache;
1602         struct btrfs_inode_extref *extref;
1603         char namebuf[BTRFS_NAME_LEN];
1604
1605         inode_cache = &active_node->inode_cache;
1606
1607         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1608         total = btrfs_item_size_nr(eb, slot);
1609         while (cur < total) {
1610                 name_len = btrfs_inode_extref_name_len(eb, extref);
1611                 index = btrfs_inode_extref_index(eb, extref);
1612                 parent = btrfs_inode_extref_parent(eb, extref);
1613                 if (name_len <= BTRFS_NAME_LEN) {
1614                         len = name_len;
1615                         error = 0;
1616                 } else {
1617                         len = BTRFS_NAME_LEN;
1618                         error = REF_ERR_NAME_TOO_LONG;
1619                 }
1620                 read_extent_buffer(eb, namebuf,
1621                                    (unsigned long)(extref + 1), len);
1622                 add_inode_backref(inode_cache, key->objectid, parent,
1623                                   index, namebuf, len, 0, key->type, error);
1624
1625                 len = sizeof(*extref) + name_len;
1626                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1627                 cur += len;
1628         }
1629         return 0;
1630
1631 }
1632
1633 static int count_csum_range(struct btrfs_root *root, u64 start,
1634                             u64 len, u64 *found)
1635 {
1636         struct btrfs_key key;
1637         struct btrfs_path path;
1638         struct extent_buffer *leaf;
1639         int ret;
1640         size_t size;
1641         *found = 0;
1642         u64 csum_end;
1643         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1644
1645         btrfs_init_path(&path);
1646
1647         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1648         key.offset = start;
1649         key.type = BTRFS_EXTENT_CSUM_KEY;
1650
1651         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1652                                 &key, &path, 0, 0);
1653         if (ret < 0)
1654                 goto out;
1655         if (ret > 0 && path.slots[0] > 0) {
1656                 leaf = path.nodes[0];
1657                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1658                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1659                     key.type == BTRFS_EXTENT_CSUM_KEY)
1660                         path.slots[0]--;
1661         }
1662
1663         while (len > 0) {
1664                 leaf = path.nodes[0];
1665                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1666                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1667                         if (ret > 0)
1668                                 break;
1669                         else if (ret < 0)
1670                                 goto out;
1671                         leaf = path.nodes[0];
1672                 }
1673
1674                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1675                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1676                     key.type != BTRFS_EXTENT_CSUM_KEY)
1677                         break;
1678
1679                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1680                 if (key.offset >= start + len)
1681                         break;
1682
1683                 if (key.offset > start)
1684                         start = key.offset;
1685
1686                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1687                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1688                 if (csum_end > start) {
1689                         size = min(csum_end - start, len);
1690                         len -= size;
1691                         start += size;
1692                         *found += size;
1693                 }
1694
1695                 path.slots[0]++;
1696         }
1697 out:
1698         btrfs_release_path(&path);
1699         if (ret < 0)
1700                 return ret;
1701         return 0;
1702 }
1703
1704 static int process_file_extent(struct btrfs_root *root,
1705                                 struct extent_buffer *eb,
1706                                 int slot, struct btrfs_key *key,
1707                                 struct shared_node *active_node)
1708 {
1709         struct inode_record *rec;
1710         struct btrfs_file_extent_item *fi;
1711         u64 num_bytes = 0;
1712         u64 disk_bytenr = 0;
1713         u64 extent_offset = 0;
1714         u64 mask = root->sectorsize - 1;
1715         int extent_type;
1716         int ret;
1717
1718         rec = active_node->current;
1719         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1720         rec->found_file_extent = 1;
1721
1722         if (rec->extent_start == (u64)-1) {
1723                 rec->extent_start = key->offset;
1724                 rec->extent_end = key->offset;
1725         }
1726
1727         if (rec->extent_end > key->offset)
1728                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1729         else if (rec->extent_end < key->offset) {
1730                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1731                                            key->offset - rec->extent_end);
1732                 if (ret < 0)
1733                         return ret;
1734         }
1735
1736         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1737         extent_type = btrfs_file_extent_type(eb, fi);
1738
1739         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1740                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1741                 if (num_bytes == 0)
1742                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1743                 rec->found_size += num_bytes;
1744                 num_bytes = (num_bytes + mask) & ~mask;
1745         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1746                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1747                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1748                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1749                 extent_offset = btrfs_file_extent_offset(eb, fi);
1750                 if (num_bytes == 0 || (num_bytes & mask))
1751                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752                 if (num_bytes + extent_offset >
1753                     btrfs_file_extent_ram_bytes(eb, fi))
1754                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1755                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1756                     (btrfs_file_extent_compression(eb, fi) ||
1757                      btrfs_file_extent_encryption(eb, fi) ||
1758                      btrfs_file_extent_other_encoding(eb, fi)))
1759                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760                 if (disk_bytenr > 0)
1761                         rec->found_size += num_bytes;
1762         } else {
1763                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1764         }
1765         rec->extent_end = key->offset + num_bytes;
1766
1767         /*
1768          * The data reloc tree will copy full extents into its inode and then
1769          * copy the corresponding csums.  Because the extent it copied could be
1770          * a preallocated extent that hasn't been written to yet there may be no
1771          * csums to copy, ergo we won't have csums for our file extent.  This is
1772          * ok so just don't bother checking csums if the inode belongs to the
1773          * data reloc tree.
1774          */
1775         if (disk_bytenr > 0 &&
1776             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1777                 u64 found;
1778                 if (btrfs_file_extent_compression(eb, fi))
1779                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1780                 else
1781                         disk_bytenr += extent_offset;
1782
1783                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1784                 if (ret < 0)
1785                         return ret;
1786                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1787                         if (found > 0)
1788                                 rec->found_csum_item = 1;
1789                         if (found < num_bytes)
1790                                 rec->some_csum_missing = 1;
1791                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1792                         if (found > 0)
1793                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1794                 }
1795         }
1796         return 0;
1797 }
1798
1799 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1800                             struct walk_control *wc)
1801 {
1802         struct btrfs_key key;
1803         u32 nritems;
1804         int i;
1805         int ret = 0;
1806         struct cache_tree *inode_cache;
1807         struct shared_node *active_node;
1808
1809         if (wc->root_level == wc->active_node &&
1810             btrfs_root_refs(&root->root_item) == 0)
1811                 return 0;
1812
1813         active_node = wc->nodes[wc->active_node];
1814         inode_cache = &active_node->inode_cache;
1815         nritems = btrfs_header_nritems(eb);
1816         for (i = 0; i < nritems; i++) {
1817                 btrfs_item_key_to_cpu(eb, &key, i);
1818
1819                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1820                         continue;
1821                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1822                         continue;
1823
1824                 if (active_node->current == NULL ||
1825                     active_node->current->ino < key.objectid) {
1826                         if (active_node->current) {
1827                                 active_node->current->checked = 1;
1828                                 maybe_free_inode_rec(inode_cache,
1829                                                      active_node->current);
1830                         }
1831                         active_node->current = get_inode_rec(inode_cache,
1832                                                              key.objectid, 1);
1833                         BUG_ON(IS_ERR(active_node->current));
1834                 }
1835                 switch (key.type) {
1836                 case BTRFS_DIR_ITEM_KEY:
1837                 case BTRFS_DIR_INDEX_KEY:
1838                         ret = process_dir_item(root, eb, i, &key, active_node);
1839                         break;
1840                 case BTRFS_INODE_REF_KEY:
1841                         ret = process_inode_ref(eb, i, &key, active_node);
1842                         break;
1843                 case BTRFS_INODE_EXTREF_KEY:
1844                         ret = process_inode_extref(eb, i, &key, active_node);
1845                         break;
1846                 case BTRFS_INODE_ITEM_KEY:
1847                         ret = process_inode_item(eb, i, &key, active_node);
1848                         break;
1849                 case BTRFS_EXTENT_DATA_KEY:
1850                         ret = process_file_extent(root, eb, i, &key,
1851                                                   active_node);
1852                         break;
1853                 default:
1854                         break;
1855                 };
1856         }
1857         return ret;
1858 }
1859
1860 struct node_refs {
1861         u64 bytenr[BTRFS_MAX_LEVEL];
1862         u64 refs[BTRFS_MAX_LEVEL];
1863         int need_check[BTRFS_MAX_LEVEL];
1864 };
1865
1866 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1867                              struct node_refs *nrefs, u64 level);
1868 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1869                             unsigned int ext_ref);
1870
1871 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1872                                struct node_refs *nrefs, int *level, int ext_ref)
1873 {
1874         struct extent_buffer *cur = path->nodes[0];
1875         struct btrfs_key key;
1876         u64 cur_bytenr;
1877         u32 nritems;
1878         u64 first_ino = 0;
1879         int root_level = btrfs_header_level(root->node);
1880         int i;
1881         int ret = 0; /* Final return value */
1882         int err = 0; /* Positive error bitmap */
1883
1884         cur_bytenr = cur->start;
1885
1886         /* skip to first inode item or the first inode number change */
1887         nritems = btrfs_header_nritems(cur);
1888         for (i = 0; i < nritems; i++) {
1889                 btrfs_item_key_to_cpu(cur, &key, i);
1890                 if (i == 0)
1891                         first_ino = key.objectid;
1892                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1893                     (first_ino && first_ino != key.objectid))
1894                         break;
1895         }
1896         if (i == nritems) {
1897                 path->slots[0] = nritems;
1898                 return 0;
1899         }
1900         path->slots[0] = i;
1901
1902 again:
1903         err |= check_inode_item(root, path, ext_ref);
1904
1905         if (err & LAST_ITEM)
1906                 goto out;
1907
1908         /* still have inode items in thie leaf */
1909         if (cur->start == cur_bytenr)
1910                 goto again;
1911
1912         /*
1913          * we have switched to another leaf, above nodes may
1914          * have changed, here walk down the path, if a node
1915          * or leaf is shared, check whether we can skip this
1916          * node or leaf.
1917          */
1918         for (i = root_level; i >= 0; i--) {
1919                 if (path->nodes[i]->start == nrefs->bytenr[i])
1920                         continue;
1921
1922                 ret = update_nodes_refs(root,
1923                                 path->nodes[i]->start,
1924                                 nrefs, i);
1925                 if (ret)
1926                         goto out;
1927
1928                 if (!nrefs->need_check[i]) {
1929                         *level += 1;
1930                         break;
1931                 }
1932         }
1933
1934         for (i = 0; i < *level; i++) {
1935                 free_extent_buffer(path->nodes[i]);
1936                 path->nodes[i] = NULL;
1937         }
1938 out:
1939         err &= ~LAST_ITEM;
1940         /*
1941          * Convert any error bitmap to -EIO, as we should avoid
1942          * mixing positive and negative return value to represent
1943          * error
1944          */
1945         if (err && !ret)
1946                 ret = -EIO;
1947         return ret;
1948 }
1949
1950 static void reada_walk_down(struct btrfs_root *root,
1951                             struct extent_buffer *node, int slot)
1952 {
1953         u64 bytenr;
1954         u64 ptr_gen;
1955         u32 nritems;
1956         u32 blocksize;
1957         int i;
1958         int level;
1959
1960         level = btrfs_header_level(node);
1961         if (level != 1)
1962                 return;
1963
1964         nritems = btrfs_header_nritems(node);
1965         blocksize = root->nodesize;
1966         for (i = slot; i < nritems; i++) {
1967                 bytenr = btrfs_node_blockptr(node, i);
1968                 ptr_gen = btrfs_node_ptr_generation(node, i);
1969                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1970         }
1971 }
1972
1973 /*
1974  * Check the child node/leaf by the following condition:
1975  * 1. the first item key of the node/leaf should be the same with the one
1976  *    in parent.
1977  * 2. block in parent node should match the child node/leaf.
1978  * 3. generation of parent node and child's header should be consistent.
1979  *
1980  * Or the child node/leaf pointed by the key in parent is not valid.
1981  *
1982  * We hope to check leaf owner too, but since subvol may share leaves,
1983  * which makes leaf owner check not so strong, key check should be
1984  * sufficient enough for that case.
1985  */
1986 static int check_child_node(struct btrfs_root *root,
1987                             struct extent_buffer *parent, int slot,
1988                             struct extent_buffer *child)
1989 {
1990         struct btrfs_key parent_key;
1991         struct btrfs_key child_key;
1992         int ret = 0;
1993
1994         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1995         if (btrfs_header_level(child) == 0)
1996                 btrfs_item_key_to_cpu(child, &child_key, 0);
1997         else
1998                 btrfs_node_key_to_cpu(child, &child_key, 0);
1999
2000         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2001                 ret = -EINVAL;
2002                 fprintf(stderr,
2003                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2004                         parent_key.objectid, parent_key.type, parent_key.offset,
2005                         child_key.objectid, child_key.type, child_key.offset);
2006         }
2007         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2008                 ret = -EINVAL;
2009                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2010                         btrfs_node_blockptr(parent, slot),
2011                         btrfs_header_bytenr(child));
2012         }
2013         if (btrfs_node_ptr_generation(parent, slot) !=
2014             btrfs_header_generation(child)) {
2015                 ret = -EINVAL;
2016                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2017                         btrfs_header_generation(child),
2018                         btrfs_node_ptr_generation(parent, slot));
2019         }
2020         return ret;
2021 }
2022
2023 /*
2024  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2025  * in every fs or file tree check. Here we find its all root ids, and only check
2026  * it in the fs or file tree which has the smallest root id.
2027  */
2028 static int need_check(struct btrfs_root *root, struct ulist *roots)
2029 {
2030         struct rb_node *node;
2031         struct ulist_node *u;
2032
2033         if (roots->nnodes == 1)
2034                 return 1;
2035
2036         node = rb_first(&roots->root);
2037         u = rb_entry(node, struct ulist_node, rb_node);
2038         /*
2039          * current root id is not smallest, we skip it and let it be checked
2040          * in the fs or file tree who hash the smallest root id.
2041          */
2042         if (root->objectid != u->val)
2043                 return 0;
2044
2045         return 1;
2046 }
2047
2048 /*
2049  * for a tree node or leaf, we record its reference count, so later if we still
2050  * process this node or leaf, don't need to compute its reference count again.
2051  */
2052 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2053                              struct node_refs *nrefs, u64 level)
2054 {
2055         int check, ret;
2056         u64 refs;
2057         struct ulist *roots;
2058
2059         if (nrefs->bytenr[level] != bytenr) {
2060                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2061                                        level, 1, &refs, NULL);
2062                 if (ret < 0)
2063                         return ret;
2064
2065                 nrefs->bytenr[level] = bytenr;
2066                 nrefs->refs[level] = refs;
2067                 if (refs > 1) {
2068                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2069                                                    0, &roots);
2070                         if (ret)
2071                                 return -EIO;
2072
2073                         check = need_check(root, roots);
2074                         ulist_free(roots);
2075                         nrefs->need_check[level] = check;
2076                 } else {
2077                         nrefs->need_check[level] = 1;
2078                 }
2079         }
2080
2081         return 0;
2082 }
2083
2084 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2085                           struct walk_control *wc, int *level,
2086                           struct node_refs *nrefs)
2087 {
2088         enum btrfs_tree_block_status status;
2089         u64 bytenr;
2090         u64 ptr_gen;
2091         struct extent_buffer *next;
2092         struct extent_buffer *cur;
2093         u32 blocksize;
2094         int ret, err = 0;
2095         u64 refs;
2096
2097         WARN_ON(*level < 0);
2098         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2099
2100         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2101                 refs = nrefs->refs[*level];
2102                 ret = 0;
2103         } else {
2104                 ret = btrfs_lookup_extent_info(NULL, root,
2105                                        path->nodes[*level]->start,
2106                                        *level, 1, &refs, NULL);
2107                 if (ret < 0) {
2108                         err = ret;
2109                         goto out;
2110                 }
2111                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2112                 nrefs->refs[*level] = refs;
2113         }
2114
2115         if (refs > 1) {
2116                 ret = enter_shared_node(root, path->nodes[*level]->start,
2117                                         refs, wc, *level);
2118                 if (ret > 0) {
2119                         err = ret;
2120                         goto out;
2121                 }
2122         }
2123
2124         while (*level >= 0) {
2125                 WARN_ON(*level < 0);
2126                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2127                 cur = path->nodes[*level];
2128
2129                 if (btrfs_header_level(cur) != *level)
2130                         WARN_ON(1);
2131
2132                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2133                         break;
2134                 if (*level == 0) {
2135                         ret = process_one_leaf(root, cur, wc);
2136                         if (ret < 0)
2137                                 err = ret;
2138                         break;
2139                 }
2140                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2141                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2142                 blocksize = root->nodesize;
2143
2144                 if (bytenr == nrefs->bytenr[*level - 1]) {
2145                         refs = nrefs->refs[*level - 1];
2146                 } else {
2147                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2148                                         *level - 1, 1, &refs, NULL);
2149                         if (ret < 0) {
2150                                 refs = 0;
2151                         } else {
2152                                 nrefs->bytenr[*level - 1] = bytenr;
2153                                 nrefs->refs[*level - 1] = refs;
2154                         }
2155                 }
2156
2157                 if (refs > 1) {
2158                         ret = enter_shared_node(root, bytenr, refs,
2159                                                 wc, *level - 1);
2160                         if (ret > 0) {
2161                                 path->slots[*level]++;
2162                                 continue;
2163                         }
2164                 }
2165
2166                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2167                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2168                         free_extent_buffer(next);
2169                         reada_walk_down(root, cur, path->slots[*level]);
2170                         next = read_tree_block(root, bytenr, blocksize,
2171                                                ptr_gen);
2172                         if (!extent_buffer_uptodate(next)) {
2173                                 struct btrfs_key node_key;
2174
2175                                 btrfs_node_key_to_cpu(path->nodes[*level],
2176                                                       &node_key,
2177                                                       path->slots[*level]);
2178                                 btrfs_add_corrupt_extent_record(root->fs_info,
2179                                                 &node_key,
2180                                                 path->nodes[*level]->start,
2181                                                 root->nodesize, *level);
2182                                 err = -EIO;
2183                                 goto out;
2184                         }
2185                 }
2186
2187                 ret = check_child_node(root, cur, path->slots[*level], next);
2188                 if (ret) {
2189                         err = ret;
2190                         goto out;
2191                 }
2192
2193                 if (btrfs_is_leaf(next))
2194                         status = btrfs_check_leaf(root, NULL, next);
2195                 else
2196                         status = btrfs_check_node(root, NULL, next);
2197                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2198                         free_extent_buffer(next);
2199                         err = -EIO;
2200                         goto out;
2201                 }
2202
2203                 *level = *level - 1;
2204                 free_extent_buffer(path->nodes[*level]);
2205                 path->nodes[*level] = next;
2206                 path->slots[*level] = 0;
2207         }
2208 out:
2209         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2210         return err;
2211 }
2212
2213 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2214                             unsigned int ext_ref);
2215
2216 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2217                              int *level, struct node_refs *nrefs, int ext_ref)
2218 {
2219         enum btrfs_tree_block_status status;
2220         u64 bytenr;
2221         u64 ptr_gen;
2222         struct extent_buffer *next;
2223         struct extent_buffer *cur;
2224         u32 blocksize;
2225         int ret;
2226
2227         WARN_ON(*level < 0);
2228         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2229
2230         ret = update_nodes_refs(root, path->nodes[*level]->start,
2231                                 nrefs, *level);
2232         if (ret < 0)
2233                 return ret;
2234
2235         while (*level >= 0) {
2236                 WARN_ON(*level < 0);
2237                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2238                 cur = path->nodes[*level];
2239
2240                 if (btrfs_header_level(cur) != *level)
2241                         WARN_ON(1);
2242
2243                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2244                         break;
2245                 /* Don't forgot to check leaf/node validation */
2246                 if (*level == 0) {
2247                         ret = btrfs_check_leaf(root, NULL, cur);
2248                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2249                                 ret = -EIO;
2250                                 break;
2251                         }
2252                         ret = process_one_leaf_v2(root, path, nrefs,
2253                                                   level, ext_ref);
2254                         break;
2255                 } else {
2256                         ret = btrfs_check_node(root, NULL, cur);
2257                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2258                                 ret = -EIO;
2259                                 break;
2260                         }
2261                 }
2262                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2263                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2264                 blocksize = root->nodesize;
2265
2266                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2267                 if (ret)
2268                         break;
2269                 if (!nrefs->need_check[*level - 1]) {
2270                         path->slots[*level]++;
2271                         continue;
2272                 }
2273
2274                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2275                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2276                         free_extent_buffer(next);
2277                         reada_walk_down(root, cur, path->slots[*level]);
2278                         next = read_tree_block(root, bytenr, blocksize,
2279                                                ptr_gen);
2280                         if (!extent_buffer_uptodate(next)) {
2281                                 struct btrfs_key node_key;
2282
2283                                 btrfs_node_key_to_cpu(path->nodes[*level],
2284                                                       &node_key,
2285                                                       path->slots[*level]);
2286                                 btrfs_add_corrupt_extent_record(root->fs_info,
2287                                                 &node_key,
2288                                                 path->nodes[*level]->start,
2289                                                 root->nodesize, *level);
2290                                 ret = -EIO;
2291                                 break;
2292                         }
2293                 }
2294
2295                 ret = check_child_node(root, cur, path->slots[*level], next);
2296                 if (ret < 0) 
2297                         break;
2298
2299                 if (btrfs_is_leaf(next))
2300                         status = btrfs_check_leaf(root, NULL, next);
2301                 else
2302                         status = btrfs_check_node(root, NULL, next);
2303                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2304                         free_extent_buffer(next);
2305                         ret = -EIO;
2306                         break;
2307                 }
2308
2309                 *level = *level - 1;
2310                 free_extent_buffer(path->nodes[*level]);
2311                 path->nodes[*level] = next;
2312                 path->slots[*level] = 0;
2313         }
2314         return ret;
2315 }
2316
2317 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2318                         struct walk_control *wc, int *level)
2319 {
2320         int i;
2321         struct extent_buffer *leaf;
2322
2323         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2324                 leaf = path->nodes[i];
2325                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2326                         path->slots[i]++;
2327                         *level = i;
2328                         return 0;
2329                 } else {
2330                         free_extent_buffer(path->nodes[*level]);
2331                         path->nodes[*level] = NULL;
2332                         BUG_ON(*level > wc->active_node);
2333                         if (*level == wc->active_node)
2334                                 leave_shared_node(root, wc, *level);
2335                         *level = i + 1;
2336                 }
2337         }
2338         return 1;
2339 }
2340
2341 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2342                            int *level)
2343 {
2344         int i;
2345         struct extent_buffer *leaf;
2346
2347         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2348                 leaf = path->nodes[i];
2349                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2350                         path->slots[i]++;
2351                         *level = i;
2352                         return 0;
2353                 } else {
2354                         free_extent_buffer(path->nodes[*level]);
2355                         path->nodes[*level] = NULL;
2356                         *level = i + 1;
2357                 }
2358         }
2359         return 1;
2360 }
2361
2362 static int check_root_dir(struct inode_record *rec)
2363 {
2364         struct inode_backref *backref;
2365         int ret = -1;
2366
2367         if (!rec->found_inode_item || rec->errors)
2368                 goto out;
2369         if (rec->nlink != 1 || rec->found_link != 0)
2370                 goto out;
2371         if (list_empty(&rec->backrefs))
2372                 goto out;
2373         backref = to_inode_backref(rec->backrefs.next);
2374         if (!backref->found_inode_ref)
2375                 goto out;
2376         if (backref->index != 0 || backref->namelen != 2 ||
2377             memcmp(backref->name, "..", 2))
2378                 goto out;
2379         if (backref->found_dir_index || backref->found_dir_item)
2380                 goto out;
2381         ret = 0;
2382 out:
2383         return ret;
2384 }
2385
2386 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2387                               struct btrfs_root *root, struct btrfs_path *path,
2388                               struct inode_record *rec)
2389 {
2390         struct btrfs_inode_item *ei;
2391         struct btrfs_key key;
2392         int ret;
2393
2394         key.objectid = rec->ino;
2395         key.type = BTRFS_INODE_ITEM_KEY;
2396         key.offset = (u64)-1;
2397
2398         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2399         if (ret < 0)
2400                 goto out;
2401         if (ret) {
2402                 if (!path->slots[0]) {
2403                         ret = -ENOENT;
2404                         goto out;
2405                 }
2406                 path->slots[0]--;
2407                 ret = 0;
2408         }
2409         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2410         if (key.objectid != rec->ino) {
2411                 ret = -ENOENT;
2412                 goto out;
2413         }
2414
2415         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2416                             struct btrfs_inode_item);
2417         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2418         btrfs_mark_buffer_dirty(path->nodes[0]);
2419         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2420         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2421                root->root_key.objectid);
2422 out:
2423         btrfs_release_path(path);
2424         return ret;
2425 }
2426
2427 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2428                                     struct btrfs_root *root,
2429                                     struct btrfs_path *path,
2430                                     struct inode_record *rec)
2431 {
2432         int ret;
2433
2434         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2435         btrfs_release_path(path);
2436         if (!ret)
2437                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2438         return ret;
2439 }
2440
2441 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2442                                struct btrfs_root *root,
2443                                struct btrfs_path *path,
2444                                struct inode_record *rec)
2445 {
2446         struct btrfs_inode_item *ei;
2447         struct btrfs_key key;
2448         int ret = 0;
2449
2450         key.objectid = rec->ino;
2451         key.type = BTRFS_INODE_ITEM_KEY;
2452         key.offset = 0;
2453
2454         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2455         if (ret) {
2456                 if (ret > 0)
2457                         ret = -ENOENT;
2458                 goto out;
2459         }
2460
2461         /* Since ret == 0, no need to check anything */
2462         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2463                             struct btrfs_inode_item);
2464         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2465         btrfs_mark_buffer_dirty(path->nodes[0]);
2466         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2467         printf("reset nbytes for ino %llu root %llu\n",
2468                rec->ino, root->root_key.objectid);
2469 out:
2470         btrfs_release_path(path);
2471         return ret;
2472 }
2473
2474 static int add_missing_dir_index(struct btrfs_root *root,
2475                                  struct cache_tree *inode_cache,
2476                                  struct inode_record *rec,
2477                                  struct inode_backref *backref)
2478 {
2479         struct btrfs_path path;
2480         struct btrfs_trans_handle *trans;
2481         struct btrfs_dir_item *dir_item;
2482         struct extent_buffer *leaf;
2483         struct btrfs_key key;
2484         struct btrfs_disk_key disk_key;
2485         struct inode_record *dir_rec;
2486         unsigned long name_ptr;
2487         u32 data_size = sizeof(*dir_item) + backref->namelen;
2488         int ret;
2489
2490         trans = btrfs_start_transaction(root, 1);
2491         if (IS_ERR(trans))
2492                 return PTR_ERR(trans);
2493
2494         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2495                 (unsigned long long)rec->ino);
2496
2497         btrfs_init_path(&path);
2498         key.objectid = backref->dir;
2499         key.type = BTRFS_DIR_INDEX_KEY;
2500         key.offset = backref->index;
2501         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2502         BUG_ON(ret);
2503
2504         leaf = path.nodes[0];
2505         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2506
2507         disk_key.objectid = cpu_to_le64(rec->ino);
2508         disk_key.type = BTRFS_INODE_ITEM_KEY;
2509         disk_key.offset = 0;
2510
2511         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2512         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2513         btrfs_set_dir_data_len(leaf, dir_item, 0);
2514         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2515         name_ptr = (unsigned long)(dir_item + 1);
2516         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2517         btrfs_mark_buffer_dirty(leaf);
2518         btrfs_release_path(&path);
2519         btrfs_commit_transaction(trans, root);
2520
2521         backref->found_dir_index = 1;
2522         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2523         BUG_ON(IS_ERR(dir_rec));
2524         if (!dir_rec)
2525                 return 0;
2526         dir_rec->found_size += backref->namelen;
2527         if (dir_rec->found_size == dir_rec->isize &&
2528             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2529                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2530         if (dir_rec->found_size != dir_rec->isize)
2531                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2532
2533         return 0;
2534 }
2535
2536 static int delete_dir_index(struct btrfs_root *root,
2537                             struct cache_tree *inode_cache,
2538                             struct inode_record *rec,
2539                             struct inode_backref *backref)
2540 {
2541         struct btrfs_trans_handle *trans;
2542         struct btrfs_dir_item *di;
2543         struct btrfs_path path;
2544         int ret = 0;
2545
2546         trans = btrfs_start_transaction(root, 1);
2547         if (IS_ERR(trans))
2548                 return PTR_ERR(trans);
2549
2550         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2551                 (unsigned long long)backref->dir,
2552                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2553                 (unsigned long long)root->objectid);
2554
2555         btrfs_init_path(&path);
2556         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2557                                     backref->name, backref->namelen,
2558                                     backref->index, -1);
2559         if (IS_ERR(di)) {
2560                 ret = PTR_ERR(di);
2561                 btrfs_release_path(&path);
2562                 btrfs_commit_transaction(trans, root);
2563                 if (ret == -ENOENT)
2564                         return 0;
2565                 return ret;
2566         }
2567
2568         if (!di)
2569                 ret = btrfs_del_item(trans, root, &path);
2570         else
2571                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2572         BUG_ON(ret);
2573         btrfs_release_path(&path);
2574         btrfs_commit_transaction(trans, root);
2575         return ret;
2576 }
2577
2578 static int create_inode_item(struct btrfs_root *root,
2579                              struct inode_record *rec,
2580                              struct inode_backref *backref, int root_dir)
2581 {
2582         struct btrfs_trans_handle *trans;
2583         struct btrfs_inode_item inode_item;
2584         time_t now = time(NULL);
2585         int ret;
2586
2587         trans = btrfs_start_transaction(root, 1);
2588         if (IS_ERR(trans)) {
2589                 ret = PTR_ERR(trans);
2590                 return ret;
2591         }
2592
2593         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2594                 "be incomplete, please check permissions and content after "
2595                 "the fsck completes.\n", (unsigned long long)root->objectid,
2596                 (unsigned long long)rec->ino);
2597
2598         memset(&inode_item, 0, sizeof(inode_item));
2599         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2600         if (root_dir)
2601                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2602         else
2603                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2604         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2605         if (rec->found_dir_item) {
2606                 if (rec->found_file_extent)
2607                         fprintf(stderr, "root %llu inode %llu has both a dir "
2608                                 "item and extents, unsure if it is a dir or a "
2609                                 "regular file so setting it as a directory\n",
2610                                 (unsigned long long)root->objectid,
2611                                 (unsigned long long)rec->ino);
2612                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2613                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2614         } else if (!rec->found_dir_item) {
2615                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2616                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2617         }
2618         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2619         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2620         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2621         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2622         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2623         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2624         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2625         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2626
2627         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2628         BUG_ON(ret);
2629         btrfs_commit_transaction(trans, root);
2630         return 0;
2631 }
2632
2633 static int repair_inode_backrefs(struct btrfs_root *root,
2634                                  struct inode_record *rec,
2635                                  struct cache_tree *inode_cache,
2636                                  int delete)
2637 {
2638         struct inode_backref *tmp, *backref;
2639         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2640         int ret = 0;
2641         int repaired = 0;
2642
2643         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2644                 if (!delete && rec->ino == root_dirid) {
2645                         if (!rec->found_inode_item) {
2646                                 ret = create_inode_item(root, rec, backref, 1);
2647                                 if (ret)
2648                                         break;
2649                                 repaired++;
2650                         }
2651                 }
2652
2653                 /* Index 0 for root dir's are special, don't mess with it */
2654                 if (rec->ino == root_dirid && backref->index == 0)
2655                         continue;
2656
2657                 if (delete &&
2658                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2659                      (backref->found_dir_index && backref->found_inode_ref &&
2660                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2661                         ret = delete_dir_index(root, inode_cache, rec, backref);
2662                         if (ret)
2663                                 break;
2664                         repaired++;
2665                         list_del(&backref->list);
2666                         free(backref);
2667                 }
2668
2669                 if (!delete && !backref->found_dir_index &&
2670                     backref->found_dir_item && backref->found_inode_ref) {
2671                         ret = add_missing_dir_index(root, inode_cache, rec,
2672                                                     backref);
2673                         if (ret)
2674                                 break;
2675                         repaired++;
2676                         if (backref->found_dir_item &&
2677                             backref->found_dir_index &&
2678                             backref->found_dir_index) {
2679                                 if (!backref->errors &&
2680                                     backref->found_inode_ref) {
2681                                         list_del(&backref->list);
2682                                         free(backref);
2683                                 }
2684                         }
2685                 }
2686
2687                 if (!delete && (!backref->found_dir_index &&
2688                                 !backref->found_dir_item &&
2689                                 backref->found_inode_ref)) {
2690                         struct btrfs_trans_handle *trans;
2691                         struct btrfs_key location;
2692
2693                         ret = check_dir_conflict(root, backref->name,
2694                                                  backref->namelen,
2695                                                  backref->dir,
2696                                                  backref->index);
2697                         if (ret) {
2698                                 /*
2699                                  * let nlink fixing routine to handle it,
2700                                  * which can do it better.
2701                                  */
2702                                 ret = 0;
2703                                 break;
2704                         }
2705                         location.objectid = rec->ino;
2706                         location.type = BTRFS_INODE_ITEM_KEY;
2707                         location.offset = 0;
2708
2709                         trans = btrfs_start_transaction(root, 1);
2710                         if (IS_ERR(trans)) {
2711                                 ret = PTR_ERR(trans);
2712                                 break;
2713                         }
2714                         fprintf(stderr, "adding missing dir index/item pair "
2715                                 "for inode %llu\n",
2716                                 (unsigned long long)rec->ino);
2717                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2718                                                     backref->namelen,
2719                                                     backref->dir, &location,
2720                                                     imode_to_type(rec->imode),
2721                                                     backref->index);
2722                         BUG_ON(ret);
2723                         btrfs_commit_transaction(trans, root);
2724                         repaired++;
2725                 }
2726
2727                 if (!delete && (backref->found_inode_ref &&
2728                                 backref->found_dir_index &&
2729                                 backref->found_dir_item &&
2730                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2731                                 !rec->found_inode_item)) {
2732                         ret = create_inode_item(root, rec, backref, 0);
2733                         if (ret)
2734                                 break;
2735                         repaired++;
2736                 }
2737
2738         }
2739         return ret ? ret : repaired;
2740 }
2741
2742 /*
2743  * To determine the file type for nlink/inode_item repair
2744  *
2745  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2746  * Return -ENOENT if file type is not found.
2747  */
2748 static int find_file_type(struct inode_record *rec, u8 *type)
2749 {
2750         struct inode_backref *backref;
2751
2752         /* For inode item recovered case */
2753         if (rec->found_inode_item) {
2754                 *type = imode_to_type(rec->imode);
2755                 return 0;
2756         }
2757
2758         list_for_each_entry(backref, &rec->backrefs, list) {
2759                 if (backref->found_dir_index || backref->found_dir_item) {
2760                         *type = backref->filetype;
2761                         return 0;
2762                 }
2763         }
2764         return -ENOENT;
2765 }
2766
2767 /*
2768  * To determine the file name for nlink repair
2769  *
2770  * Return 0 if file name is found, set name and namelen.
2771  * Return -ENOENT if file name is not found.
2772  */
2773 static int find_file_name(struct inode_record *rec,
2774                           char *name, int *namelen)
2775 {
2776         struct inode_backref *backref;
2777
2778         list_for_each_entry(backref, &rec->backrefs, list) {
2779                 if (backref->found_dir_index || backref->found_dir_item ||
2780                     backref->found_inode_ref) {
2781                         memcpy(name, backref->name, backref->namelen);
2782                         *namelen = backref->namelen;
2783                         return 0;
2784                 }
2785         }
2786         return -ENOENT;
2787 }
2788
2789 /* Reset the nlink of the inode to the correct one */
2790 static int reset_nlink(struct btrfs_trans_handle *trans,
2791                        struct btrfs_root *root,
2792                        struct btrfs_path *path,
2793                        struct inode_record *rec)
2794 {
2795         struct inode_backref *backref;
2796         struct inode_backref *tmp;
2797         struct btrfs_key key;
2798         struct btrfs_inode_item *inode_item;
2799         int ret = 0;
2800
2801         /* We don't believe this either, reset it and iterate backref */
2802         rec->found_link = 0;
2803
2804         /* Remove all backref including the valid ones */
2805         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2806                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2807                                    backref->index, backref->name,
2808                                    backref->namelen, 0);
2809                 if (ret < 0)
2810                         goto out;
2811
2812                 /* remove invalid backref, so it won't be added back */
2813                 if (!(backref->found_dir_index &&
2814                       backref->found_dir_item &&
2815                       backref->found_inode_ref)) {
2816                         list_del(&backref->list);
2817                         free(backref);
2818                 } else {
2819                         rec->found_link++;
2820                 }
2821         }
2822
2823         /* Set nlink to 0 */
2824         key.objectid = rec->ino;
2825         key.type = BTRFS_INODE_ITEM_KEY;
2826         key.offset = 0;
2827         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2828         if (ret < 0)
2829                 goto out;
2830         if (ret > 0) {
2831                 ret = -ENOENT;
2832                 goto out;
2833         }
2834         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2835                                     struct btrfs_inode_item);
2836         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2837         btrfs_mark_buffer_dirty(path->nodes[0]);
2838         btrfs_release_path(path);
2839
2840         /*
2841          * Add back valid inode_ref/dir_item/dir_index,
2842          * add_link() will handle the nlink inc, so new nlink must be correct
2843          */
2844         list_for_each_entry(backref, &rec->backrefs, list) {
2845                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2846                                      backref->name, backref->namelen,
2847                                      backref->filetype, &backref->index, 1);
2848                 if (ret < 0)
2849                         goto out;
2850         }
2851 out:
2852         btrfs_release_path(path);
2853         return ret;
2854 }
2855
2856 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2857                                struct btrfs_root *root,
2858                                struct btrfs_path *path,
2859                                struct inode_record *rec)
2860 {
2861         char *dir_name = "lost+found";
2862         char namebuf[BTRFS_NAME_LEN] = {0};
2863         u64 lost_found_ino;
2864         u32 mode = 0700;
2865         u8 type = 0;
2866         int namelen = 0;
2867         int name_recovered = 0;
2868         int type_recovered = 0;
2869         int ret = 0;
2870
2871         /*
2872          * Get file name and type first before these invalid inode ref
2873          * are deleted by remove_all_invalid_backref()
2874          */
2875         name_recovered = !find_file_name(rec, namebuf, &namelen);
2876         type_recovered = !find_file_type(rec, &type);
2877
2878         if (!name_recovered) {
2879                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2880                        rec->ino, rec->ino);
2881                 namelen = count_digits(rec->ino);
2882                 sprintf(namebuf, "%llu", rec->ino);
2883                 name_recovered = 1;
2884         }
2885         if (!type_recovered) {
2886                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2887                        rec->ino);
2888                 type = BTRFS_FT_REG_FILE;
2889                 type_recovered = 1;
2890         }
2891
2892         ret = reset_nlink(trans, root, path, rec);
2893         if (ret < 0) {
2894                 fprintf(stderr,
2895                         "Failed to reset nlink for inode %llu: %s\n",
2896                         rec->ino, strerror(-ret));
2897                 goto out;
2898         }
2899
2900         if (rec->found_link == 0) {
2901                 lost_found_ino = root->highest_inode;
2902                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2903                         ret = -EOVERFLOW;
2904                         goto out;
2905                 }
2906                 lost_found_ino++;
2907                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2908                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2909                                   mode);
2910                 if (ret < 0) {
2911                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2912                                 dir_name, strerror(-ret));
2913                         goto out;
2914                 }
2915                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2916                                      namebuf, namelen, type, NULL, 1);
2917                 /*
2918                  * Add ".INO" suffix several times to handle case where
2919                  * "FILENAME.INO" is already taken by another file.
2920                  */
2921                 while (ret == -EEXIST) {
2922                         /*
2923                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2924                          */
2925                         if (namelen + count_digits(rec->ino) + 1 >
2926                             BTRFS_NAME_LEN) {
2927                                 ret = -EFBIG;
2928                                 goto out;
2929                         }
2930                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2931                                  ".%llu", rec->ino);
2932                         namelen += count_digits(rec->ino) + 1;
2933                         ret = btrfs_add_link(trans, root, rec->ino,
2934                                              lost_found_ino, namebuf,
2935                                              namelen, type, NULL, 1);
2936                 }
2937                 if (ret < 0) {
2938                         fprintf(stderr,
2939                                 "Failed to link the inode %llu to %s dir: %s\n",
2940                                 rec->ino, dir_name, strerror(-ret));
2941                         goto out;
2942                 }
2943                 /*
2944                  * Just increase the found_link, don't actually add the
2945                  * backref. This will make things easier and this inode
2946                  * record will be freed after the repair is done.
2947                  * So fsck will not report problem about this inode.
2948                  */
2949                 rec->found_link++;
2950                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2951                        namelen, namebuf, dir_name);
2952         }
2953         printf("Fixed the nlink of inode %llu\n", rec->ino);
2954 out:
2955         /*
2956          * Clear the flag anyway, or we will loop forever for the same inode
2957          * as it will not be removed from the bad inode list and the dead loop
2958          * happens.
2959          */
2960         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2961         btrfs_release_path(path);
2962         return ret;
2963 }
2964
2965 /*
2966  * Check if there is any normal(reg or prealloc) file extent for given
2967  * ino.
2968  * This is used to determine the file type when neither its dir_index/item or
2969  * inode_item exists.
2970  *
2971  * This will *NOT* report error, if any error happens, just consider it does
2972  * not have any normal file extent.
2973  */
2974 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2975 {
2976         struct btrfs_path path;
2977         struct btrfs_key key;
2978         struct btrfs_key found_key;
2979         struct btrfs_file_extent_item *fi;
2980         u8 type;
2981         int ret = 0;
2982
2983         btrfs_init_path(&path);
2984         key.objectid = ino;
2985         key.type = BTRFS_EXTENT_DATA_KEY;
2986         key.offset = 0;
2987
2988         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2989         if (ret < 0) {
2990                 ret = 0;
2991                 goto out;
2992         }
2993         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2994                 ret = btrfs_next_leaf(root, &path);
2995                 if (ret) {
2996                         ret = 0;
2997                         goto out;
2998                 }
2999         }
3000         while (1) {
3001                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3002                                       path.slots[0]);
3003                 if (found_key.objectid != ino ||
3004                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3005                         break;
3006                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3007                                     struct btrfs_file_extent_item);
3008                 type = btrfs_file_extent_type(path.nodes[0], fi);
3009                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3010                         ret = 1;
3011                         goto out;
3012                 }
3013         }
3014 out:
3015         btrfs_release_path(&path);
3016         return ret;
3017 }
3018
3019 static u32 btrfs_type_to_imode(u8 type)
3020 {
3021         static u32 imode_by_btrfs_type[] = {
3022                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3023                 [BTRFS_FT_DIR]          = S_IFDIR,
3024                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3025                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3026                 [BTRFS_FT_FIFO]         = S_IFIFO,
3027                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3028                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3029         };
3030
3031         return imode_by_btrfs_type[(type)];
3032 }
3033
3034 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3035                                 struct btrfs_root *root,
3036                                 struct btrfs_path *path,
3037                                 struct inode_record *rec)
3038 {
3039         u8 filetype;
3040         u32 mode = 0700;
3041         int type_recovered = 0;
3042         int ret = 0;
3043
3044         printf("Trying to rebuild inode:%llu\n", rec->ino);
3045
3046         type_recovered = !find_file_type(rec, &filetype);
3047
3048         /*
3049          * Try to determine inode type if type not found.
3050          *
3051          * For found regular file extent, it must be FILE.
3052          * For found dir_item/index, it must be DIR.
3053          *
3054          * For undetermined one, use FILE as fallback.
3055          *
3056          * TODO:
3057          * 1. If found backref(inode_index/item is already handled) to it,
3058          *    it must be DIR.
3059          *    Need new inode-inode ref structure to allow search for that.
3060          */
3061         if (!type_recovered) {
3062                 if (rec->found_file_extent &&
3063                     find_normal_file_extent(root, rec->ino)) {
3064                         type_recovered = 1;
3065                         filetype = BTRFS_FT_REG_FILE;
3066                 } else if (rec->found_dir_item) {
3067                         type_recovered = 1;
3068                         filetype = BTRFS_FT_DIR;
3069                 } else if (!list_empty(&rec->orphan_extents)) {
3070                         type_recovered = 1;
3071                         filetype = BTRFS_FT_REG_FILE;
3072                 } else{
3073                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3074                                rec->ino);
3075                         type_recovered = 1;
3076                         filetype = BTRFS_FT_REG_FILE;
3077                 }
3078         }
3079
3080         ret = btrfs_new_inode(trans, root, rec->ino,
3081                               mode | btrfs_type_to_imode(filetype));
3082         if (ret < 0)
3083                 goto out;
3084
3085         /*
3086          * Here inode rebuild is done, we only rebuild the inode item,
3087          * don't repair the nlink(like move to lost+found).
3088          * That is the job of nlink repair.
3089          *
3090          * We just fill the record and return
3091          */
3092         rec->found_dir_item = 1;
3093         rec->imode = mode | btrfs_type_to_imode(filetype);
3094         rec->nlink = 0;
3095         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3096         /* Ensure the inode_nlinks repair function will be called */
3097         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3098 out:
3099         return ret;
3100 }
3101
3102 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3103                                       struct btrfs_root *root,
3104                                       struct btrfs_path *path,
3105                                       struct inode_record *rec)
3106 {
3107         struct orphan_data_extent *orphan;
3108         struct orphan_data_extent *tmp;
3109         int ret = 0;
3110
3111         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3112                 /*
3113                  * Check for conflicting file extents
3114                  *
3115                  * Here we don't know whether the extents is compressed or not,
3116                  * so we can only assume it not compressed nor data offset,
3117                  * and use its disk_len as extent length.
3118                  */
3119                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3120                                        orphan->offset, orphan->disk_len, 0);
3121                 btrfs_release_path(path);
3122                 if (ret < 0)
3123                         goto out;
3124                 if (!ret) {
3125                         fprintf(stderr,
3126                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3127                                 orphan->disk_bytenr, orphan->disk_len);
3128                         ret = btrfs_free_extent(trans,
3129                                         root->fs_info->extent_root,
3130                                         orphan->disk_bytenr, orphan->disk_len,
3131                                         0, root->objectid, orphan->objectid,
3132                                         orphan->offset);
3133                         if (ret < 0)
3134                                 goto out;
3135                 }
3136                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3137                                 orphan->offset, orphan->disk_bytenr,
3138                                 orphan->disk_len, orphan->disk_len);
3139                 if (ret < 0)
3140                         goto out;
3141
3142                 /* Update file size info */
3143                 rec->found_size += orphan->disk_len;
3144                 if (rec->found_size == rec->nbytes)
3145                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3146
3147                 /* Update the file extent hole info too */
3148                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3149                                            orphan->disk_len);
3150                 if (ret < 0)
3151                         goto out;
3152                 if (RB_EMPTY_ROOT(&rec->holes))
3153                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3154
3155                 list_del(&orphan->list);
3156                 free(orphan);
3157         }
3158         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3159 out:
3160         return ret;
3161 }
3162
3163 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3164                                         struct btrfs_root *root,
3165                                         struct btrfs_path *path,
3166                                         struct inode_record *rec)
3167 {
3168         struct rb_node *node;
3169         struct file_extent_hole *hole;
3170         int found = 0;
3171         int ret = 0;
3172
3173         node = rb_first(&rec->holes);
3174
3175         while (node) {
3176                 found = 1;
3177                 hole = rb_entry(node, struct file_extent_hole, node);
3178                 ret = btrfs_punch_hole(trans, root, rec->ino,
3179                                        hole->start, hole->len);
3180                 if (ret < 0)
3181                         goto out;
3182                 ret = del_file_extent_hole(&rec->holes, hole->start,
3183                                            hole->len);
3184                 if (ret < 0)
3185                         goto out;
3186                 if (RB_EMPTY_ROOT(&rec->holes))
3187                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3188                 node = rb_first(&rec->holes);
3189         }
3190         /* special case for a file losing all its file extent */
3191         if (!found) {
3192                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3193                                        round_up(rec->isize, root->sectorsize));
3194                 if (ret < 0)
3195                         goto out;
3196         }
3197         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3198                rec->ino, root->objectid);
3199 out:
3200         return ret;
3201 }
3202
3203 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3204 {
3205         struct btrfs_trans_handle *trans;
3206         struct btrfs_path path;
3207         int ret = 0;
3208
3209         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3210                              I_ERR_NO_ORPHAN_ITEM |
3211                              I_ERR_LINK_COUNT_WRONG |
3212                              I_ERR_NO_INODE_ITEM |
3213                              I_ERR_FILE_EXTENT_ORPHAN |
3214                              I_ERR_FILE_EXTENT_DISCOUNT|
3215                              I_ERR_FILE_NBYTES_WRONG)))
3216                 return rec->errors;
3217
3218         /*
3219          * For nlink repair, it may create a dir and add link, so
3220          * 2 for parent(256)'s dir_index and dir_item
3221          * 2 for lost+found dir's inode_item and inode_ref
3222          * 1 for the new inode_ref of the file
3223          * 2 for lost+found dir's dir_index and dir_item for the file
3224          */
3225         trans = btrfs_start_transaction(root, 7);
3226         if (IS_ERR(trans))
3227                 return PTR_ERR(trans);
3228
3229         btrfs_init_path(&path);
3230         if (rec->errors & I_ERR_NO_INODE_ITEM)
3231                 ret = repair_inode_no_item(trans, root, &path, rec);
3232         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3233                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3234         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3235                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3236         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3237                 ret = repair_inode_isize(trans, root, &path, rec);
3238         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3239                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3240         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3241                 ret = repair_inode_nlinks(trans, root, &path, rec);
3242         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3243                 ret = repair_inode_nbytes(trans, root, &path, rec);
3244         btrfs_commit_transaction(trans, root);
3245         btrfs_release_path(&path);
3246         return ret;
3247 }
3248
3249 static int check_inode_recs(struct btrfs_root *root,
3250                             struct cache_tree *inode_cache)
3251 {
3252         struct cache_extent *cache;
3253         struct ptr_node *node;
3254         struct inode_record *rec;
3255         struct inode_backref *backref;
3256         int stage = 0;
3257         int ret = 0;
3258         int err = 0;
3259         u64 error = 0;
3260         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3261
3262         if (btrfs_root_refs(&root->root_item) == 0) {
3263                 if (!cache_tree_empty(inode_cache))
3264                         fprintf(stderr, "warning line %d\n", __LINE__);
3265                 return 0;
3266         }
3267
3268         /*
3269          * We need to record the highest inode number for later 'lost+found'
3270          * dir creation.
3271          * We must select an ino not used/referred by any existing inode, or
3272          * 'lost+found' ino may be a missing ino in a corrupted leaf,
3273          * this may cause 'lost+found' dir has wrong nlinks.
3274          */
3275         cache = last_cache_extent(inode_cache);
3276         if (cache) {
3277                 node = container_of(cache, struct ptr_node, cache);
3278                 rec = node->data;
3279                 if (rec->ino > root->highest_inode)
3280                         root->highest_inode = rec->ino;
3281         }
3282
3283         /*
3284          * We need to repair backrefs first because we could change some of the
3285          * errors in the inode recs.
3286          *
3287          * We also need to go through and delete invalid backrefs first and then
3288          * add the correct ones second.  We do this because we may get EEXIST
3289          * when adding back the correct index because we hadn't yet deleted the
3290          * invalid index.
3291          *
3292          * For example, if we were missing a dir index then the directories
3293          * isize would be wrong, so if we fixed the isize to what we thought it
3294          * would be and then fixed the backref we'd still have a invalid fs, so
3295          * we need to add back the dir index and then check to see if the isize
3296          * is still wrong.
3297          */
3298         while (stage < 3) {
3299                 stage++;
3300                 if (stage == 3 && !err)
3301                         break;
3302
3303                 cache = search_cache_extent(inode_cache, 0);
3304                 while (repair && cache) {
3305                         node = container_of(cache, struct ptr_node, cache);
3306                         rec = node->data;
3307                         cache = next_cache_extent(cache);
3308
3309                         /* Need to free everything up and rescan */
3310                         if (stage == 3) {
3311                                 remove_cache_extent(inode_cache, &node->cache);
3312                                 free(node);
3313                                 free_inode_rec(rec);
3314                                 continue;
3315                         }
3316
3317                         if (list_empty(&rec->backrefs))
3318                                 continue;
3319
3320                         ret = repair_inode_backrefs(root, rec, inode_cache,
3321                                                     stage == 1);
3322                         if (ret < 0) {
3323                                 err = ret;
3324                                 stage = 2;
3325                                 break;
3326                         } if (ret > 0) {
3327                                 err = -EAGAIN;
3328                         }
3329                 }
3330         }
3331         if (err)
3332                 return err;
3333
3334         rec = get_inode_rec(inode_cache, root_dirid, 0);
3335         BUG_ON(IS_ERR(rec));
3336         if (rec) {
3337                 ret = check_root_dir(rec);
3338                 if (ret) {
3339                         fprintf(stderr, "root %llu root dir %llu error\n",
3340                                 (unsigned long long)root->root_key.objectid,
3341                                 (unsigned long long)root_dirid);
3342                         print_inode_error(root, rec);
3343                         error++;
3344                 }
3345         } else {
3346                 if (repair) {
3347                         struct btrfs_trans_handle *trans;
3348
3349                         trans = btrfs_start_transaction(root, 1);
3350                         if (IS_ERR(trans)) {
3351                                 err = PTR_ERR(trans);
3352                                 return err;
3353                         }
3354
3355                         fprintf(stderr,
3356                                 "root %llu missing its root dir, recreating\n",
3357                                 (unsigned long long)root->objectid);
3358
3359                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3360                         BUG_ON(ret);
3361
3362                         btrfs_commit_transaction(trans, root);
3363                         return -EAGAIN;
3364                 }
3365
3366                 fprintf(stderr, "root %llu root dir %llu not found\n",
3367                         (unsigned long long)root->root_key.objectid,
3368                         (unsigned long long)root_dirid);
3369         }
3370
3371         while (1) {
3372                 cache = search_cache_extent(inode_cache, 0);
3373                 if (!cache)
3374                         break;
3375                 node = container_of(cache, struct ptr_node, cache);
3376                 rec = node->data;
3377                 remove_cache_extent(inode_cache, &node->cache);
3378                 free(node);
3379                 if (rec->ino == root_dirid ||
3380                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3381                         free_inode_rec(rec);
3382                         continue;
3383                 }
3384
3385                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3386                         ret = check_orphan_item(root, rec->ino);
3387                         if (ret == 0)
3388                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3389                         if (can_free_inode_rec(rec)) {
3390                                 free_inode_rec(rec);
3391                                 continue;
3392                         }
3393                 }
3394
3395                 if (!rec->found_inode_item)
3396                         rec->errors |= I_ERR_NO_INODE_ITEM;
3397                 if (rec->found_link != rec->nlink)
3398                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3399                 if (repair) {
3400                         ret = try_repair_inode(root, rec);
3401                         if (ret == 0 && can_free_inode_rec(rec)) {
3402                                 free_inode_rec(rec);
3403                                 continue;
3404                         }
3405                         ret = 0;
3406                 }
3407
3408                 if (!(repair && ret == 0))
3409                         error++;
3410                 print_inode_error(root, rec);
3411                 list_for_each_entry(backref, &rec->backrefs, list) {
3412                         if (!backref->found_dir_item)
3413                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3414                         if (!backref->found_dir_index)
3415                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3416                         if (!backref->found_inode_ref)
3417                                 backref->errors |= REF_ERR_NO_INODE_REF;
3418                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3419                                 " namelen %u name %s filetype %d errors %x",
3420                                 (unsigned long long)backref->dir,
3421                                 (unsigned long long)backref->index,
3422                                 backref->namelen, backref->name,
3423                                 backref->filetype, backref->errors);
3424                         print_ref_error(backref->errors);
3425                 }
3426                 free_inode_rec(rec);
3427         }
3428         return (error > 0) ? -1 : 0;
3429 }
3430
3431 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3432                                         u64 objectid)
3433 {
3434         struct cache_extent *cache;
3435         struct root_record *rec = NULL;
3436         int ret;
3437
3438         cache = lookup_cache_extent(root_cache, objectid, 1);
3439         if (cache) {
3440                 rec = container_of(cache, struct root_record, cache);
3441         } else {
3442                 rec = calloc(1, sizeof(*rec));
3443                 if (!rec)
3444                         return ERR_PTR(-ENOMEM);
3445                 rec->objectid = objectid;
3446                 INIT_LIST_HEAD(&rec->backrefs);
3447                 rec->cache.start = objectid;
3448                 rec->cache.size = 1;
3449
3450                 ret = insert_cache_extent(root_cache, &rec->cache);
3451                 if (ret)
3452                         return ERR_PTR(-EEXIST);
3453         }
3454         return rec;
3455 }
3456
3457 static struct root_backref *get_root_backref(struct root_record *rec,
3458                                              u64 ref_root, u64 dir, u64 index,
3459                                              const char *name, int namelen)
3460 {
3461         struct root_backref *backref;
3462
3463         list_for_each_entry(backref, &rec->backrefs, list) {
3464                 if (backref->ref_root != ref_root || backref->dir != dir ||
3465                     backref->namelen != namelen)
3466                         continue;
3467                 if (memcmp(name, backref->name, namelen))
3468                         continue;
3469                 return backref;
3470         }
3471
3472         backref = calloc(1, sizeof(*backref) + namelen + 1);
3473         if (!backref)
3474                 return NULL;
3475         backref->ref_root = ref_root;
3476         backref->dir = dir;
3477         backref->index = index;
3478         backref->namelen = namelen;
3479         memcpy(backref->name, name, namelen);
3480         backref->name[namelen] = '\0';
3481         list_add_tail(&backref->list, &rec->backrefs);
3482         return backref;
3483 }
3484
3485 static void free_root_record(struct cache_extent *cache)
3486 {
3487         struct root_record *rec;
3488         struct root_backref *backref;
3489
3490         rec = container_of(cache, struct root_record, cache);
3491         while (!list_empty(&rec->backrefs)) {
3492                 backref = to_root_backref(rec->backrefs.next);
3493                 list_del(&backref->list);
3494                 free(backref);
3495         }
3496
3497         free(rec);
3498 }
3499
3500 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3501
3502 static int add_root_backref(struct cache_tree *root_cache,
3503                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3504                             const char *name, int namelen,
3505                             int item_type, int errors)
3506 {
3507         struct root_record *rec;
3508         struct root_backref *backref;
3509
3510         rec = get_root_rec(root_cache, root_id);
3511         BUG_ON(IS_ERR(rec));
3512         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3513         BUG_ON(!backref);
3514
3515         backref->errors |= errors;
3516
3517         if (item_type != BTRFS_DIR_ITEM_KEY) {
3518                 if (backref->found_dir_index || backref->found_back_ref ||
3519                     backref->found_forward_ref) {
3520                         if (backref->index != index)
3521                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3522                 } else {
3523                         backref->index = index;
3524                 }
3525         }
3526
3527         if (item_type == BTRFS_DIR_ITEM_KEY) {
3528                 if (backref->found_forward_ref)
3529                         rec->found_ref++;
3530                 backref->found_dir_item = 1;
3531         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3532                 backref->found_dir_index = 1;
3533         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3534                 if (backref->found_forward_ref)
3535                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3536                 else if (backref->found_dir_item)
3537                         rec->found_ref++;
3538                 backref->found_forward_ref = 1;
3539         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3540                 if (backref->found_back_ref)
3541                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3542                 backref->found_back_ref = 1;
3543         } else {
3544                 BUG_ON(1);
3545         }
3546
3547         if (backref->found_forward_ref && backref->found_dir_item)
3548                 backref->reachable = 1;
3549         return 0;
3550 }
3551
3552 static int merge_root_recs(struct btrfs_root *root,
3553                            struct cache_tree *src_cache,
3554                            struct cache_tree *dst_cache)
3555 {
3556         struct cache_extent *cache;
3557         struct ptr_node *node;
3558         struct inode_record *rec;
3559         struct inode_backref *backref;
3560         int ret = 0;
3561
3562         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3563                 free_inode_recs_tree(src_cache);
3564                 return 0;
3565         }
3566
3567         while (1) {
3568                 cache = search_cache_extent(src_cache, 0);
3569                 if (!cache)
3570                         break;
3571                 node = container_of(cache, struct ptr_node, cache);
3572                 rec = node->data;
3573                 remove_cache_extent(src_cache, &node->cache);
3574                 free(node);
3575
3576                 ret = is_child_root(root, root->objectid, rec->ino);
3577                 if (ret < 0)
3578                         break;
3579                 else if (ret == 0)
3580                         goto skip;
3581
3582                 list_for_each_entry(backref, &rec->backrefs, list) {
3583                         BUG_ON(backref->found_inode_ref);
3584                         if (backref->found_dir_item)
3585                                 add_root_backref(dst_cache, rec->ino,
3586                                         root->root_key.objectid, backref->dir,
3587                                         backref->index, backref->name,
3588                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3589                                         backref->errors);
3590                         if (backref->found_dir_index)
3591                                 add_root_backref(dst_cache, rec->ino,
3592                                         root->root_key.objectid, backref->dir,
3593                                         backref->index, backref->name,
3594                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3595                                         backref->errors);
3596                 }
3597 skip:
3598                 free_inode_rec(rec);
3599         }
3600         if (ret < 0)
3601                 return ret;
3602         return 0;
3603 }
3604
3605 static int check_root_refs(struct btrfs_root *root,
3606                            struct cache_tree *root_cache)
3607 {
3608         struct root_record *rec;
3609         struct root_record *ref_root;
3610         struct root_backref *backref;
3611         struct cache_extent *cache;
3612         int loop = 1;
3613         int ret;
3614         int error;
3615         int errors = 0;
3616
3617         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3618         BUG_ON(IS_ERR(rec));
3619         rec->found_ref = 1;
3620
3621         /* fixme: this can not detect circular references */
3622         while (loop) {
3623                 loop = 0;
3624                 cache = search_cache_extent(root_cache, 0);
3625                 while (1) {
3626                         if (!cache)
3627                                 break;
3628                         rec = container_of(cache, struct root_record, cache);
3629                         cache = next_cache_extent(cache);
3630
3631                         if (rec->found_ref == 0)
3632                                 continue;
3633
3634                         list_for_each_entry(backref, &rec->backrefs, list) {
3635                                 if (!backref->reachable)
3636                                         continue;
3637
3638                                 ref_root = get_root_rec(root_cache,
3639                                                         backref->ref_root);
3640                                 BUG_ON(IS_ERR(ref_root));
3641                                 if (ref_root->found_ref > 0)
3642                                         continue;
3643
3644                                 backref->reachable = 0;
3645                                 rec->found_ref--;
3646                                 if (rec->found_ref == 0)
3647                                         loop = 1;
3648                         }
3649                 }
3650         }
3651
3652         cache = search_cache_extent(root_cache, 0);
3653         while (1) {
3654                 if (!cache)
3655                         break;
3656                 rec = container_of(cache, struct root_record, cache);
3657                 cache = next_cache_extent(cache);
3658
3659                 if (rec->found_ref == 0 &&
3660                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3661                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3662                         ret = check_orphan_item(root->fs_info->tree_root,
3663                                                 rec->objectid);
3664                         if (ret == 0)
3665                                 continue;
3666
3667                         /*
3668                          * If we don't have a root item then we likely just have
3669                          * a dir item in a snapshot for this root but no actual
3670                          * ref key or anything so it's meaningless.
3671                          */
3672                         if (!rec->found_root_item)
3673                                 continue;
3674                         errors++;
3675                         fprintf(stderr, "fs tree %llu not referenced\n",
3676                                 (unsigned long long)rec->objectid);
3677                 }
3678
3679                 error = 0;
3680                 if (rec->found_ref > 0 && !rec->found_root_item)
3681                         error = 1;
3682                 list_for_each_entry(backref, &rec->backrefs, list) {
3683                         if (!backref->found_dir_item)
3684                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3685                         if (!backref->found_dir_index)
3686                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3687                         if (!backref->found_back_ref)
3688                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3689                         if (!backref->found_forward_ref)
3690                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3691                         if (backref->reachable && backref->errors)
3692                                 error = 1;
3693                 }
3694                 if (!error)
3695                         continue;
3696
3697                 errors++;
3698                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3699                         (unsigned long long)rec->objectid, rec->found_ref,
3700                          rec->found_root_item ? "" : "not found");
3701
3702                 list_for_each_entry(backref, &rec->backrefs, list) {
3703                         if (!backref->reachable)
3704                                 continue;
3705                         if (!backref->errors && rec->found_root_item)
3706                                 continue;
3707                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3708                                 " index %llu namelen %u name %s errors %x\n",
3709                                 (unsigned long long)backref->ref_root,
3710                                 (unsigned long long)backref->dir,
3711                                 (unsigned long long)backref->index,
3712                                 backref->namelen, backref->name,
3713                                 backref->errors);
3714                         print_ref_error(backref->errors);
3715                 }
3716         }
3717         return errors > 0 ? 1 : 0;
3718 }
3719
3720 static int process_root_ref(struct extent_buffer *eb, int slot,
3721                             struct btrfs_key *key,
3722                             struct cache_tree *root_cache)
3723 {
3724         u64 dirid;
3725         u64 index;
3726         u32 len;
3727         u32 name_len;
3728         struct btrfs_root_ref *ref;
3729         char namebuf[BTRFS_NAME_LEN];
3730         int error;
3731
3732         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3733
3734         dirid = btrfs_root_ref_dirid(eb, ref);
3735         index = btrfs_root_ref_sequence(eb, ref);
3736         name_len = btrfs_root_ref_name_len(eb, ref);
3737
3738         if (name_len <= BTRFS_NAME_LEN) {
3739                 len = name_len;
3740                 error = 0;
3741         } else {
3742                 len = BTRFS_NAME_LEN;
3743                 error = REF_ERR_NAME_TOO_LONG;
3744         }
3745         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3746
3747         if (key->type == BTRFS_ROOT_REF_KEY) {
3748                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3749                                  index, namebuf, len, key->type, error);
3750         } else {
3751                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3752                                  index, namebuf, len, key->type, error);
3753         }
3754         return 0;
3755 }
3756
3757 static void free_corrupt_block(struct cache_extent *cache)
3758 {
3759         struct btrfs_corrupt_block *corrupt;
3760
3761         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3762         free(corrupt);
3763 }
3764
3765 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3766
3767 /*
3768  * Repair the btree of the given root.
3769  *
3770  * The fix is to remove the node key in corrupt_blocks cache_tree.
3771  * and rebalance the tree.
3772  * After the fix, the btree should be writeable.
3773  */
3774 static int repair_btree(struct btrfs_root *root,
3775                         struct cache_tree *corrupt_blocks)
3776 {
3777         struct btrfs_trans_handle *trans;
3778         struct btrfs_path path;
3779         struct btrfs_corrupt_block *corrupt;
3780         struct cache_extent *cache;
3781         struct btrfs_key key;
3782         u64 offset;
3783         int level;
3784         int ret = 0;
3785
3786         if (cache_tree_empty(corrupt_blocks))
3787                 return 0;
3788
3789         trans = btrfs_start_transaction(root, 1);
3790         if (IS_ERR(trans)) {
3791                 ret = PTR_ERR(trans);
3792                 fprintf(stderr, "Error starting transaction: %s\n",
3793                         strerror(-ret));
3794                 return ret;
3795         }
3796         btrfs_init_path(&path);
3797         cache = first_cache_extent(corrupt_blocks);
3798         while (cache) {
3799                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3800                                        cache);
3801                 level = corrupt->level;
3802                 path.lowest_level = level;
3803                 key.objectid = corrupt->key.objectid;
3804                 key.type = corrupt->key.type;
3805                 key.offset = corrupt->key.offset;
3806
3807                 /*
3808                  * Here we don't want to do any tree balance, since it may
3809                  * cause a balance with corrupted brother leaf/node,
3810                  * so ins_len set to 0 here.
3811                  * Balance will be done after all corrupt node/leaf is deleted.
3812                  */
3813                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3814                 if (ret < 0)
3815                         goto out;
3816                 offset = btrfs_node_blockptr(path.nodes[level],
3817                                              path.slots[level]);
3818
3819                 /* Remove the ptr */
3820                 ret = btrfs_del_ptr(trans, root, &path, level,
3821                                     path.slots[level]);
3822                 if (ret < 0)
3823                         goto out;
3824                 /*
3825                  * Remove the corresponding extent
3826                  * return value is not concerned.
3827                  */
3828                 btrfs_release_path(&path);
3829                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3830                                         0, root->root_key.objectid,
3831                                         level - 1, 0);
3832                 cache = next_cache_extent(cache);
3833         }
3834
3835         /* Balance the btree using btrfs_search_slot() */
3836         cache = first_cache_extent(corrupt_blocks);
3837         while (cache) {
3838                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3839                                        cache);
3840                 memcpy(&key, &corrupt->key, sizeof(key));
3841                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3842                 if (ret < 0)
3843                         goto out;
3844                 /* return will always >0 since it won't find the item */
3845                 ret = 0;
3846                 btrfs_release_path(&path);
3847                 cache = next_cache_extent(cache);
3848         }
3849 out:
3850         btrfs_commit_transaction(trans, root);
3851         btrfs_release_path(&path);
3852         return ret;
3853 }
3854
3855 static int check_fs_root(struct btrfs_root *root,
3856                          struct cache_tree *root_cache,
3857                          struct walk_control *wc)
3858 {
3859         int ret = 0;
3860         int err = 0;
3861         int wret;
3862         int level;
3863         struct btrfs_path path;
3864         struct shared_node root_node;
3865         struct root_record *rec;
3866         struct btrfs_root_item *root_item = &root->root_item;
3867         struct cache_tree corrupt_blocks;
3868         struct orphan_data_extent *orphan;
3869         struct orphan_data_extent *tmp;
3870         enum btrfs_tree_block_status status;
3871         struct node_refs nrefs;
3872
3873         /*
3874          * Reuse the corrupt_block cache tree to record corrupted tree block
3875          *
3876          * Unlike the usage in extent tree check, here we do it in a per
3877          * fs/subvol tree base.
3878          */
3879         cache_tree_init(&corrupt_blocks);
3880         root->fs_info->corrupt_blocks = &corrupt_blocks;
3881
3882         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3883                 rec = get_root_rec(root_cache, root->root_key.objectid);
3884                 BUG_ON(IS_ERR(rec));
3885                 if (btrfs_root_refs(root_item) > 0)
3886                         rec->found_root_item = 1;
3887         }
3888
3889         btrfs_init_path(&path);
3890         memset(&root_node, 0, sizeof(root_node));
3891         cache_tree_init(&root_node.root_cache);
3892         cache_tree_init(&root_node.inode_cache);
3893         memset(&nrefs, 0, sizeof(nrefs));
3894
3895         /* Move the orphan extent record to corresponding inode_record */
3896         list_for_each_entry_safe(orphan, tmp,
3897                                  &root->orphan_data_extents, list) {
3898                 struct inode_record *inode;
3899
3900                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3901                                       1);
3902                 BUG_ON(IS_ERR(inode));
3903                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3904                 list_move(&orphan->list, &inode->orphan_extents);
3905         }
3906
3907         level = btrfs_header_level(root->node);
3908         memset(wc->nodes, 0, sizeof(wc->nodes));
3909         wc->nodes[level] = &root_node;
3910         wc->active_node = level;
3911         wc->root_level = level;
3912
3913         /* We may not have checked the root block, lets do that now */
3914         if (btrfs_is_leaf(root->node))
3915                 status = btrfs_check_leaf(root, NULL, root->node);
3916         else
3917                 status = btrfs_check_node(root, NULL, root->node);
3918         if (status != BTRFS_TREE_BLOCK_CLEAN)
3919                 return -EIO;
3920
3921         if (btrfs_root_refs(root_item) > 0 ||
3922             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3923                 path.nodes[level] = root->node;
3924                 extent_buffer_get(root->node);
3925                 path.slots[level] = 0;
3926         } else {
3927                 struct btrfs_key key;
3928                 struct btrfs_disk_key found_key;
3929
3930                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3931                 level = root_item->drop_level;
3932                 path.lowest_level = level;
3933                 if (level > btrfs_header_level(root->node) ||
3934                     level >= BTRFS_MAX_LEVEL) {
3935                         error("ignoring invalid drop level: %u", level);
3936                         goto skip_walking;
3937                 }
3938                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3939                 if (wret < 0)
3940                         goto skip_walking;
3941                 btrfs_node_key(path.nodes[level], &found_key,
3942                                 path.slots[level]);
3943                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3944                                         sizeof(found_key)));
3945         }
3946
3947         while (1) {
3948                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3949                 if (wret < 0)
3950                         ret = wret;
3951                 if (wret != 0)
3952                         break;
3953
3954                 wret = walk_up_tree(root, &path, wc, &level);
3955                 if (wret < 0)
3956                         ret = wret;
3957                 if (wret != 0)
3958                         break;
3959         }
3960 skip_walking:
3961         btrfs_release_path(&path);
3962
3963         if (!cache_tree_empty(&corrupt_blocks)) {
3964                 struct cache_extent *cache;
3965                 struct btrfs_corrupt_block *corrupt;
3966
3967                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3968                        root->root_key.objectid);
3969                 cache = first_cache_extent(&corrupt_blocks);
3970                 while (cache) {
3971                         corrupt = container_of(cache,
3972                                                struct btrfs_corrupt_block,
3973                                                cache);
3974                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3975                                cache->start, corrupt->level,
3976                                corrupt->key.objectid, corrupt->key.type,
3977                                corrupt->key.offset);
3978                         cache = next_cache_extent(cache);
3979                 }
3980                 if (repair) {
3981                         printf("Try to repair the btree for root %llu\n",
3982                                root->root_key.objectid);
3983                         ret = repair_btree(root, &corrupt_blocks);
3984                         if (ret < 0)
3985                                 fprintf(stderr, "Failed to repair btree: %s\n",
3986                                         strerror(-ret));
3987                         if (!ret)
3988                                 printf("Btree for root %llu is fixed\n",
3989                                        root->root_key.objectid);
3990                 }
3991         }
3992
3993         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3994         if (err < 0)
3995                 ret = err;
3996
3997         if (root_node.current) {
3998                 root_node.current->checked = 1;
3999                 maybe_free_inode_rec(&root_node.inode_cache,
4000                                 root_node.current);
4001         }
4002
4003         err = check_inode_recs(root, &root_node.inode_cache);
4004         if (!ret)
4005                 ret = err;
4006
4007         free_corrupt_blocks_tree(&corrupt_blocks);
4008         root->fs_info->corrupt_blocks = NULL;
4009         free_orphan_data_extents(&root->orphan_data_extents);
4010         return ret;
4011 }
4012
4013 static int fs_root_objectid(u64 objectid)
4014 {
4015         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4016             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4017                 return 1;
4018         return is_fstree(objectid);
4019 }
4020
4021 static int check_fs_roots(struct btrfs_root *root,
4022                           struct cache_tree *root_cache)
4023 {
4024         struct btrfs_path path;
4025         struct btrfs_key key;
4026         struct walk_control wc;
4027         struct extent_buffer *leaf, *tree_node;
4028         struct btrfs_root *tmp_root;
4029         struct btrfs_root *tree_root = root->fs_info->tree_root;
4030         int ret;
4031         int err = 0;
4032
4033         if (ctx.progress_enabled) {
4034                 ctx.tp = TASK_FS_ROOTS;
4035                 task_start(ctx.info);
4036         }
4037
4038         /*
4039          * Just in case we made any changes to the extent tree that weren't
4040          * reflected into the free space cache yet.
4041          */
4042         if (repair)
4043                 reset_cached_block_groups(root->fs_info);
4044         memset(&wc, 0, sizeof(wc));
4045         cache_tree_init(&wc.shared);
4046         btrfs_init_path(&path);
4047
4048 again:
4049         key.offset = 0;
4050         key.objectid = 0;
4051         key.type = BTRFS_ROOT_ITEM_KEY;
4052         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4053         if (ret < 0) {
4054                 err = 1;
4055                 goto out;
4056         }
4057         tree_node = tree_root->node;
4058         while (1) {
4059                 if (tree_node != tree_root->node) {
4060                         free_root_recs_tree(root_cache);
4061                         btrfs_release_path(&path);
4062                         goto again;
4063                 }
4064                 leaf = path.nodes[0];
4065                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4066                         ret = btrfs_next_leaf(tree_root, &path);
4067                         if (ret) {
4068                                 if (ret < 0)
4069                                         err = 1;
4070                                 break;
4071                         }
4072                         leaf = path.nodes[0];
4073                 }
4074                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4075                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4076                     fs_root_objectid(key.objectid)) {
4077                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4078                                 tmp_root = btrfs_read_fs_root_no_cache(
4079                                                 root->fs_info, &key);
4080                         } else {
4081                                 key.offset = (u64)-1;
4082                                 tmp_root = btrfs_read_fs_root(
4083                                                 root->fs_info, &key);
4084                         }
4085                         if (IS_ERR(tmp_root)) {
4086                                 err = 1;
4087                                 goto next;
4088                         }
4089                         ret = check_fs_root(tmp_root, root_cache, &wc);
4090                         if (ret == -EAGAIN) {
4091                                 free_root_recs_tree(root_cache);
4092                                 btrfs_release_path(&path);
4093                                 goto again;
4094                         }
4095                         if (ret)
4096                                 err = 1;
4097                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4098                                 btrfs_free_fs_root(tmp_root);
4099                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4100                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4101                         process_root_ref(leaf, path.slots[0], &key,
4102                                          root_cache);
4103                 }
4104 next:
4105                 path.slots[0]++;
4106         }
4107 out:
4108         btrfs_release_path(&path);
4109         if (err)
4110                 free_extent_cache_tree(&wc.shared);
4111         if (!cache_tree_empty(&wc.shared))
4112                 fprintf(stderr, "warning line %d\n", __LINE__);
4113
4114         task_stop(ctx.info);
4115
4116         return err;
4117 }
4118
4119 /*
4120  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4121  * INODE_REF/INODE_EXTREF match.
4122  *
4123  * @root:       the root of the fs/file tree
4124  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4125  * @key:        the key of the DIR_ITEM/DIR_INDEX
4126  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4127  *              distinguish root_dir between normal dir/file
4128  * @name:       the name in the INODE_REF/INODE_EXTREF
4129  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4130  * @mode:       the st_mode of INODE_ITEM
4131  *
4132  * Return 0 if no error occurred.
4133  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4134  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4135  * dir/file.
4136  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4137  * not match for normal dir/file.
4138  */
4139 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4140                          struct btrfs_key *key, u64 index, char *name,
4141                          u32 namelen, u32 mode)
4142 {
4143         struct btrfs_path path;
4144         struct extent_buffer *node;
4145         struct btrfs_dir_item *di;
4146         struct btrfs_key location;
4147         char namebuf[BTRFS_NAME_LEN] = {0};
4148         u32 total;
4149         u32 cur = 0;
4150         u32 len;
4151         u32 name_len;
4152         u32 data_len;
4153         u8 filetype;
4154         int slot;
4155         int ret;
4156
4157         btrfs_init_path(&path);
4158         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4159         if (ret < 0) {
4160                 ret = DIR_ITEM_MISSING;
4161                 goto out;
4162         }
4163
4164         /* Process root dir and goto out*/
4165         if (index == 0) {
4166                 if (ret == 0) {
4167                         ret = ROOT_DIR_ERROR;
4168                         error(
4169                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4170                                 root->objectid,
4171                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4172                                         "REF" : "EXTREF",
4173                                 ref_key->objectid, ref_key->offset,
4174                                 key->type == BTRFS_DIR_ITEM_KEY ?
4175                                         "DIR_ITEM" : "DIR_INDEX");
4176                 } else {
4177                         ret = 0;
4178                 }
4179
4180                 goto out;
4181         }
4182
4183         /* Process normal file/dir */
4184         if (ret > 0) {
4185                 ret = DIR_ITEM_MISSING;
4186                 error(
4187                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4188                         root->objectid,
4189                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4190                         ref_key->objectid, ref_key->offset,
4191                         key->type == BTRFS_DIR_ITEM_KEY ?
4192                                 "DIR_ITEM" : "DIR_INDEX",
4193                         key->objectid, key->offset, namelen, name,
4194                         imode_to_type(mode));
4195                 goto out;
4196         }
4197
4198         /* Check whether inode_id/filetype/name match */
4199         node = path.nodes[0];
4200         slot = path.slots[0];
4201         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4202         total = btrfs_item_size_nr(node, slot);
4203         while (cur < total) {
4204                 ret = DIR_ITEM_MISMATCH;
4205                 name_len = btrfs_dir_name_len(node, di);
4206                 data_len = btrfs_dir_data_len(node, di);
4207
4208                 btrfs_dir_item_key_to_cpu(node, di, &location);
4209                 if (location.objectid != ref_key->objectid ||
4210                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4211                     location.offset != 0)
4212                         goto next;
4213
4214                 filetype = btrfs_dir_type(node, di);
4215                 if (imode_to_type(mode) != filetype)
4216                         goto next;
4217
4218                 if (name_len <= BTRFS_NAME_LEN) {
4219                         len = name_len;
4220                 } else {
4221                         len = BTRFS_NAME_LEN;
4222                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4223                         root->objectid,
4224                         key->type == BTRFS_DIR_ITEM_KEY ?
4225                         "DIR_ITEM" : "DIR_INDEX",
4226                         key->objectid, key->offset, name_len);
4227                 }
4228                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4229                 if (len != namelen || strncmp(namebuf, name, len))
4230                         goto next;
4231
4232                 ret = 0;
4233                 goto out;
4234 next:
4235                 len = sizeof(*di) + name_len + data_len;
4236                 di = (struct btrfs_dir_item *)((char *)di + len);
4237                 cur += len;
4238         }
4239         if (ret == DIR_ITEM_MISMATCH)
4240                 error(
4241                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4242                         root->objectid,
4243                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4244                         ref_key->objectid, ref_key->offset,
4245                         key->type == BTRFS_DIR_ITEM_KEY ?
4246                                 "DIR_ITEM" : "DIR_INDEX",
4247                         key->objectid, key->offset, namelen, name,
4248                         imode_to_type(mode));
4249 out:
4250         btrfs_release_path(&path);
4251         return ret;
4252 }
4253
4254 /*
4255  * Traverse the given INODE_REF and call find_dir_item() to find related
4256  * DIR_ITEM/DIR_INDEX.
4257  *
4258  * @root:       the root of the fs/file tree
4259  * @ref_key:    the key of the INODE_REF
4260  * @refs:       the count of INODE_REF
4261  * @mode:       the st_mode of INODE_ITEM
4262  *
4263  * Return 0 if no error occurred.
4264  */
4265 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4266                            struct extent_buffer *node, int slot, u64 *refs,
4267                            int mode)
4268 {
4269         struct btrfs_key key;
4270         struct btrfs_inode_ref *ref;
4271         char namebuf[BTRFS_NAME_LEN] = {0};
4272         u32 total;
4273         u32 cur = 0;
4274         u32 len;
4275         u32 name_len;
4276         u64 index;
4277         int ret, err = 0;
4278
4279         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4280         total = btrfs_item_size_nr(node, slot);
4281
4282 next:
4283         /* Update inode ref count */
4284         (*refs)++;
4285
4286         index = btrfs_inode_ref_index(node, ref);
4287         name_len = btrfs_inode_ref_name_len(node, ref);
4288         if (name_len <= BTRFS_NAME_LEN) {
4289                 len = name_len;
4290         } else {
4291                 len = BTRFS_NAME_LEN;
4292                 warning("root %llu INODE_REF[%llu %llu] name too long",
4293                         root->objectid, ref_key->objectid, ref_key->offset);
4294         }
4295
4296         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4297
4298         /* Check root dir ref name */
4299         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4300                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4301                       root->objectid, ref_key->objectid, ref_key->offset,
4302                       namebuf);
4303                 err |= ROOT_DIR_ERROR;
4304         }
4305
4306         /* Find related DIR_INDEX */
4307         key.objectid = ref_key->offset;
4308         key.type = BTRFS_DIR_INDEX_KEY;
4309         key.offset = index;
4310         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4311         err |= ret;
4312
4313         /* Find related dir_item */
4314         key.objectid = ref_key->offset;
4315         key.type = BTRFS_DIR_ITEM_KEY;
4316         key.offset = btrfs_name_hash(namebuf, len);
4317         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4318         err |= ret;
4319
4320         len = sizeof(*ref) + name_len;
4321         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4322         cur += len;
4323         if (cur < total)
4324                 goto next;
4325
4326         return err;
4327 }
4328
4329 /*
4330  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4331  * DIR_ITEM/DIR_INDEX.
4332  *
4333  * @root:       the root of the fs/file tree
4334  * @ref_key:    the key of the INODE_EXTREF
4335  * @refs:       the count of INODE_EXTREF
4336  * @mode:       the st_mode of INODE_ITEM
4337  *
4338  * Return 0 if no error occurred.
4339  */
4340 static int check_inode_extref(struct btrfs_root *root,
4341                               struct btrfs_key *ref_key,
4342                               struct extent_buffer *node, int slot, u64 *refs,
4343                               int mode)
4344 {
4345         struct btrfs_key key;
4346         struct btrfs_inode_extref *extref;
4347         char namebuf[BTRFS_NAME_LEN] = {0};
4348         u32 total;
4349         u32 cur = 0;
4350         u32 len;
4351         u32 name_len;
4352         u64 index;
4353         u64 parent;
4354         int ret;
4355         int err = 0;
4356
4357         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4358         total = btrfs_item_size_nr(node, slot);
4359
4360 next:
4361         /* update inode ref count */
4362         (*refs)++;
4363         name_len = btrfs_inode_extref_name_len(node, extref);
4364         index = btrfs_inode_extref_index(node, extref);
4365         parent = btrfs_inode_extref_parent(node, extref);
4366         if (name_len <= BTRFS_NAME_LEN) {
4367                 len = name_len;
4368         } else {
4369                 len = BTRFS_NAME_LEN;
4370                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4371                         root->objectid, ref_key->objectid, ref_key->offset);
4372         }
4373         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4374
4375         /* Check root dir ref name */
4376         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4377                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4378                       root->objectid, ref_key->objectid, ref_key->offset,
4379                       namebuf);
4380                 err |= ROOT_DIR_ERROR;
4381         }
4382
4383         /* find related dir_index */
4384         key.objectid = parent;
4385         key.type = BTRFS_DIR_INDEX_KEY;
4386         key.offset = index;
4387         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4388         err |= ret;
4389
4390         /* find related dir_item */
4391         key.objectid = parent;
4392         key.type = BTRFS_DIR_ITEM_KEY;
4393         key.offset = btrfs_name_hash(namebuf, len);
4394         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4395         err |= ret;
4396
4397         len = sizeof(*extref) + name_len;
4398         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4399         cur += len;
4400
4401         if (cur < total)
4402                 goto next;
4403
4404         return err;
4405 }
4406
4407 /*
4408  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4409  * DIR_ITEM/DIR_INDEX match.
4410  *
4411  * @root:       the root of the fs/file tree
4412  * @key:        the key of the INODE_REF/INODE_EXTREF
4413  * @name:       the name in the INODE_REF/INODE_EXTREF
4414  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4415  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4416  * to (u64)-1
4417  * @ext_ref:    the EXTENDED_IREF feature
4418  *
4419  * Return 0 if no error occurred.
4420  * Return >0 for error bitmap
4421  */
4422 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4423                           char *name, int namelen, u64 index,
4424                           unsigned int ext_ref)
4425 {
4426         struct btrfs_path path;
4427         struct btrfs_inode_ref *ref;
4428         struct btrfs_inode_extref *extref;
4429         struct extent_buffer *node;
4430         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4431         u32 total;
4432         u32 cur = 0;
4433         u32 len;
4434         u32 ref_namelen;
4435         u64 ref_index;
4436         u64 parent;
4437         u64 dir_id;
4438         int slot;
4439         int ret;
4440
4441         btrfs_init_path(&path);
4442         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4443         if (ret) {
4444                 ret = INODE_REF_MISSING;
4445                 goto extref;
4446         }
4447
4448         node = path.nodes[0];
4449         slot = path.slots[0];
4450
4451         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4452         total = btrfs_item_size_nr(node, slot);
4453
4454         /* Iterate all entry of INODE_REF */
4455         while (cur < total) {
4456                 ret = INODE_REF_MISSING;
4457
4458                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4459                 ref_index = btrfs_inode_ref_index(node, ref);
4460                 if (index != (u64)-1 && index != ref_index)
4461                         goto next_ref;
4462
4463                 if (ref_namelen <= BTRFS_NAME_LEN) {
4464                         len = ref_namelen;
4465                 } else {
4466                         len = BTRFS_NAME_LEN;
4467                         warning("root %llu INODE %s[%llu %llu] name too long",
4468                                 root->objectid,
4469                                 key->type == BTRFS_INODE_REF_KEY ?
4470                                         "REF" : "EXTREF",
4471                                 key->objectid, key->offset);
4472                 }
4473                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4474                                    len);
4475
4476                 if (len != namelen || strncmp(ref_namebuf, name, len))
4477                         goto next_ref;
4478
4479                 ret = 0;
4480                 goto out;
4481 next_ref:
4482                 len = sizeof(*ref) + ref_namelen;
4483                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4484                 cur += len;
4485         }
4486
4487 extref:
4488         /* Skip if not support EXTENDED_IREF feature */
4489         if (!ext_ref)
4490                 goto out;
4491
4492         btrfs_release_path(&path);
4493         btrfs_init_path(&path);
4494
4495         dir_id = key->offset;
4496         key->type = BTRFS_INODE_EXTREF_KEY;
4497         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4498
4499         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4500         if (ret) {
4501                 ret = INODE_REF_MISSING;
4502                 goto out;
4503         }
4504
4505         node = path.nodes[0];
4506         slot = path.slots[0];
4507
4508         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4509         cur = 0;
4510         total = btrfs_item_size_nr(node, slot);
4511
4512         /* Iterate all entry of INODE_EXTREF */
4513         while (cur < total) {
4514                 ret = INODE_REF_MISSING;
4515
4516                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4517                 ref_index = btrfs_inode_extref_index(node, extref);
4518                 parent = btrfs_inode_extref_parent(node, extref);
4519                 if (index != (u64)-1 && index != ref_index)
4520                         goto next_extref;
4521
4522                 if (parent != dir_id)
4523                         goto next_extref;
4524
4525                 if (ref_namelen <= BTRFS_NAME_LEN) {
4526                         len = ref_namelen;
4527                 } else {
4528                         len = BTRFS_NAME_LEN;
4529                         warning("root %llu INODE %s[%llu %llu] name too long",
4530                                 root->objectid,
4531                                 key->type == BTRFS_INODE_REF_KEY ?
4532                                         "REF" : "EXTREF",
4533                                 key->objectid, key->offset);
4534                 }
4535                 read_extent_buffer(node, ref_namebuf,
4536                                    (unsigned long)(extref + 1), len);
4537
4538                 if (len != namelen || strncmp(ref_namebuf, name, len))
4539                         goto next_extref;
4540
4541                 ret = 0;
4542                 goto out;
4543
4544 next_extref:
4545                 len = sizeof(*extref) + ref_namelen;
4546                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4547                 cur += len;
4548
4549         }
4550 out:
4551         btrfs_release_path(&path);
4552         return ret;
4553 }
4554
4555 /*
4556  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4557  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4558  *
4559  * @root:       the root of the fs/file tree
4560  * @key:        the key of the INODE_REF/INODE_EXTREF
4561  * @size:       the st_size of the INODE_ITEM
4562  * @ext_ref:    the EXTENDED_IREF feature
4563  *
4564  * Return 0 if no error occurred.
4565  */
4566 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4567                           struct extent_buffer *node, int slot, u64 *size,
4568                           unsigned int ext_ref)
4569 {
4570         struct btrfs_dir_item *di;
4571         struct btrfs_inode_item *ii;
4572         struct btrfs_path path;
4573         struct btrfs_key location;
4574         char namebuf[BTRFS_NAME_LEN] = {0};
4575         u32 total;
4576         u32 cur = 0;
4577         u32 len;
4578         u32 name_len;
4579         u32 data_len;
4580         u8 filetype;
4581         u32 mode;
4582         u64 index;
4583         int ret;
4584         int err = 0;
4585
4586         /*
4587          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4588          * ignore index check.
4589          */
4590         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4591
4592         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4593         total = btrfs_item_size_nr(node, slot);
4594
4595         while (cur < total) {
4596                 data_len = btrfs_dir_data_len(node, di);
4597                 if (data_len)
4598                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4599                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4600                               "DIR_ITEM" : "DIR_INDEX",
4601                               key->objectid, key->offset, data_len);
4602
4603                 name_len = btrfs_dir_name_len(node, di);
4604                 if (name_len <= BTRFS_NAME_LEN) {
4605                         len = name_len;
4606                 } else {
4607                         len = BTRFS_NAME_LEN;
4608                         warning("root %llu %s[%llu %llu] name too long",
4609                                 root->objectid,
4610                                 key->type == BTRFS_DIR_ITEM_KEY ?
4611                                 "DIR_ITEM" : "DIR_INDEX",
4612                                 key->objectid, key->offset);
4613                 }
4614                 (*size) += name_len;
4615
4616                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4617                 filetype = btrfs_dir_type(node, di);
4618
4619                 btrfs_init_path(&path);
4620                 btrfs_dir_item_key_to_cpu(node, di, &location);
4621
4622                 /* Ignore related ROOT_ITEM check */
4623                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4624                         goto next;
4625
4626                 /* Check relative INODE_ITEM(existence/filetype) */
4627                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4628                 if (ret) {
4629                         err |= INODE_ITEM_MISSING;
4630                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4631                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4632                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4633                               key->offset, location.objectid, name_len,
4634                               namebuf, filetype);
4635                         goto next;
4636                 }
4637
4638                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4639                                     struct btrfs_inode_item);
4640                 mode = btrfs_inode_mode(path.nodes[0], ii);
4641
4642                 if (imode_to_type(mode) != filetype) {
4643                         err |= INODE_ITEM_MISMATCH;
4644                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4645                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4646                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4647                               key->offset, name_len, namebuf, filetype);
4648                 }
4649
4650                 /* Check relative INODE_REF/INODE_EXTREF */
4651                 location.type = BTRFS_INODE_REF_KEY;
4652                 location.offset = key->objectid;
4653                 ret = find_inode_ref(root, &location, namebuf, len,
4654                                        index, ext_ref);
4655                 err |= ret;
4656                 if (ret & INODE_REF_MISSING)
4657                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4658                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4659                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4660                               key->offset, name_len, namebuf, filetype);
4661
4662 next:
4663                 btrfs_release_path(&path);
4664                 len = sizeof(*di) + name_len + data_len;
4665                 di = (struct btrfs_dir_item *)((char *)di + len);
4666                 cur += len;
4667
4668                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4669                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4670                               root->objectid, key->objectid, key->offset);
4671                         break;
4672                 }
4673         }
4674
4675         return err;
4676 }
4677
4678 /*
4679  * Check file extent datasum/hole, update the size of the file extents,
4680  * check and update the last offset of the file extent.
4681  *
4682  * @root:       the root of fs/file tree.
4683  * @fkey:       the key of the file extent.
4684  * @nodatasum:  INODE_NODATASUM feature.
4685  * @size:       the sum of all EXTENT_DATA items size for this inode.
4686  * @end:        the offset of the last extent.
4687  *
4688  * Return 0 if no error occurred.
4689  */
4690 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4691                              struct extent_buffer *node, int slot,
4692                              unsigned int nodatasum, u64 *size, u64 *end)
4693 {
4694         struct btrfs_file_extent_item *fi;
4695         u64 disk_bytenr;
4696         u64 disk_num_bytes;
4697         u64 extent_num_bytes;
4698         u64 found;
4699         unsigned int extent_type;
4700         unsigned int is_hole;
4701         int ret;
4702         int err = 0;
4703
4704         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4705
4706         extent_type = btrfs_file_extent_type(node, fi);
4707         /* Skip if file extent is inline */
4708         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4709                 struct btrfs_item *e = btrfs_item_nr(slot);
4710                 u32 item_inline_len;
4711
4712                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4713                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4714                 if (extent_num_bytes == 0 ||
4715                     extent_num_bytes != item_inline_len)
4716                         err |= FILE_EXTENT_ERROR;
4717                 *size += extent_num_bytes;
4718                 return err;
4719         }
4720
4721         /* Check extent type */
4722         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4723                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4724                 err |= FILE_EXTENT_ERROR;
4725                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4726                       root->objectid, fkey->objectid, fkey->offset);
4727                 return err;
4728         }
4729
4730         /* Check REG_EXTENT/PREALLOC_EXTENT */
4731         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4732         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4733         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4734         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4735
4736         /* Check EXTENT_DATA datasum */
4737         ret = count_csum_range(root, disk_bytenr, disk_num_bytes, &found);
4738         if (found > 0 && nodatasum) {
4739                 err |= ODD_CSUM_ITEM;
4740                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4741                       root->objectid, fkey->objectid, fkey->offset);
4742         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4743                    !is_hole &&
4744                    (ret < 0 || found == 0 || found < disk_num_bytes)) {
4745                 err |= CSUM_ITEM_MISSING;
4746                 error("root %llu EXTENT_DATA[%llu %llu] datasum missing",
4747                       root->objectid, fkey->objectid, fkey->offset);
4748         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && found > 0) {
4749                 err |= ODD_CSUM_ITEM;
4750                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have datasum",
4751                       root->objectid, fkey->objectid, fkey->offset);
4752         }
4753
4754         /* Check EXTENT_DATA hole */
4755         if (no_holes && is_hole) {
4756                 err |= FILE_EXTENT_ERROR;
4757                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4758                       root->objectid, fkey->objectid, fkey->offset);
4759         } else if (!no_holes && *end != fkey->offset) {
4760                 err |= FILE_EXTENT_ERROR;
4761                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4762                       root->objectid, fkey->objectid, fkey->offset);
4763         }
4764
4765         *end += extent_num_bytes;
4766         if (!is_hole)
4767                 *size += extent_num_bytes;
4768
4769         return err;
4770 }
4771
4772 /*
4773  * Check INODE_ITEM and related ITEMs (the same inode number)
4774  * 1. check link count
4775  * 2. check inode ref/extref
4776  * 3. check dir item/index
4777  *
4778  * @ext_ref:    the EXTENDED_IREF feature
4779  *
4780  * Return 0 if no error occurred.
4781  * Return >0 for error or hit the traversal is done(by error bitmap)
4782  */
4783 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4784                             unsigned int ext_ref)
4785 {
4786         struct extent_buffer *node;
4787         struct btrfs_inode_item *ii;
4788         struct btrfs_key key;
4789         u64 inode_id;
4790         u32 mode;
4791         u64 nlink;
4792         u64 nbytes;
4793         u64 isize;
4794         u64 size = 0;
4795         u64 refs = 0;
4796         u64 extent_end = 0;
4797         u64 extent_size = 0;
4798         unsigned int dir;
4799         unsigned int nodatasum;
4800         int slot;
4801         int ret;
4802         int err = 0;
4803
4804         node = path->nodes[0];
4805         slot = path->slots[0];
4806
4807         btrfs_item_key_to_cpu(node, &key, slot);
4808         inode_id = key.objectid;
4809
4810         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4811                 ret = btrfs_next_item(root, path);
4812                 if (ret > 0)
4813                         err |= LAST_ITEM;
4814                 return err;
4815         }
4816
4817         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4818         isize = btrfs_inode_size(node, ii);
4819         nbytes = btrfs_inode_nbytes(node, ii);
4820         mode = btrfs_inode_mode(node, ii);
4821         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4822         nlink = btrfs_inode_nlink(node, ii);
4823         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4824
4825         while (1) {
4826                 ret = btrfs_next_item(root, path);
4827                 if (ret < 0) {
4828                         /* out will fill 'err' rusing current statistics */
4829                         goto out;
4830                 } else if (ret > 0) {
4831                         err |= LAST_ITEM;
4832                         goto out;
4833                 }
4834
4835                 node = path->nodes[0];
4836                 slot = path->slots[0];
4837                 btrfs_item_key_to_cpu(node, &key, slot);
4838                 if (key.objectid != inode_id)
4839                         goto out;
4840
4841                 switch (key.type) {
4842                 case BTRFS_INODE_REF_KEY:
4843                         ret = check_inode_ref(root, &key, node, slot, &refs,
4844                                               mode);
4845                         err |= ret;
4846                         break;
4847                 case BTRFS_INODE_EXTREF_KEY:
4848                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4849                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4850                                         root->objectid, key.objectid,
4851                                         key.offset);
4852                         ret = check_inode_extref(root, &key, node, slot, &refs,
4853                                                  mode);
4854                         err |= ret;
4855                         break;
4856                 case BTRFS_DIR_ITEM_KEY:
4857                 case BTRFS_DIR_INDEX_KEY:
4858                         if (!dir) {
4859                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4860                                         root->objectid, inode_id,
4861                                         imode_to_type(mode), key.objectid,
4862                                         key.offset);
4863                         }
4864                         ret = check_dir_item(root, &key, node, slot, &size,
4865                                              ext_ref);
4866                         err |= ret;
4867                         break;
4868                 case BTRFS_EXTENT_DATA_KEY:
4869                         if (dir) {
4870                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4871                                         root->objectid, inode_id, key.objectid,
4872                                         key.offset);
4873                         }
4874                         ret = check_file_extent(root, &key, node, slot,
4875                                                 nodatasum, &extent_size,
4876                                                 &extent_end);
4877                         err |= ret;
4878                         break;
4879                 case BTRFS_XATTR_ITEM_KEY:
4880                         break;
4881                 default:
4882                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4883                               key.objectid, key.type, key.offset);
4884                 }
4885         }
4886
4887 out:
4888         /* verify INODE_ITEM nlink/isize/nbytes */
4889         if (dir) {
4890                 if (nlink != 1) {
4891                         err |= LINK_COUNT_ERROR;
4892                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4893                               root->objectid, inode_id, nlink);
4894                 }
4895
4896                 /*
4897                  * Just a warning, as dir inode nbytes is just an
4898                  * instructive value.
4899                  */
4900                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4901                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4902                                 root->objectid, inode_id, root->nodesize);
4903                 }
4904
4905                 if (isize != size) {
4906                         err |= ISIZE_ERROR;
4907                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4908                               root->objectid, inode_id, isize, size);
4909                 }
4910         } else {
4911                 if (nlink != refs) {
4912                         err |= LINK_COUNT_ERROR;
4913                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4914                               root->objectid, inode_id, nlink, refs);
4915                 } else if (!nlink) {
4916                         err |= ORPHAN_ITEM;
4917                 }
4918
4919                 if (!nbytes && !no_holes && extent_end < isize) {
4920                         err |= NBYTES_ERROR;
4921                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4922                               root->objectid, inode_id, isize);
4923                 }
4924
4925                 if (nbytes != extent_size) {
4926                         err |= NBYTES_ERROR;
4927                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4928                               root->objectid, inode_id, nbytes, extent_size);
4929                 }
4930         }
4931
4932         return err;
4933 }
4934
4935 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4936 {
4937         struct btrfs_path *path;
4938         struct btrfs_key key;
4939         int err = 0;
4940         int ret;
4941
4942         path = btrfs_alloc_path();
4943         if (!path)
4944                 return -ENOMEM;
4945         key.objectid = 256;
4946         key.type = BTRFS_INODE_ITEM_KEY;
4947         key.offset = 0;
4948
4949         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4950         if (ret < 0)
4951                 goto out;
4952         if (ret > 0) {
4953                 ret = 0;
4954                 err |= INODE_ITEM_MISSING;
4955         }
4956
4957         err |= check_inode_item(root, path, ext_ref);
4958         err &= ~LAST_ITEM;
4959         if (err && !ret)
4960                 ret = -EIO;
4961 out:
4962         btrfs_free_path(path);
4963         return ret;
4964 }
4965
4966 /*
4967  * Iterate all item on the tree and call check_inode_item() to check.
4968  *
4969  * @root:       the root of the tree to be checked.
4970  * @ext_ref:    the EXTENDED_IREF feature
4971  *
4972  * Return 0 if no error found.
4973  * Return <0 for error.
4974  */
4975 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
4976 {
4977         struct btrfs_path *path;
4978         struct node_refs nrefs;
4979         struct btrfs_root_item *root_item = &root->root_item;
4980         int ret, wret;
4981         int level;
4982
4983         /*
4984          * We need to manually check the first inode item(256)
4985          * As the following traversal function will only start from
4986          * the first inode item in the leaf, if inode item(256) is missing
4987          * we will just skip it forever.
4988          */
4989         ret = check_fs_first_inode(root, ext_ref);
4990         if (ret < 0)
4991                 return ret;
4992
4993         path = btrfs_alloc_path();
4994         if (!path)
4995                 return -ENOMEM;
4996
4997         memset(&nrefs, 0, sizeof(nrefs));
4998         level = btrfs_header_level(root->node);
4999
5000         if (btrfs_root_refs(root_item) > 0 ||
5001             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5002                 path->nodes[level] = root->node;
5003                 path->slots[level] = 0;
5004                 extent_buffer_get(root->node);
5005         } else {
5006                 struct btrfs_key key;
5007
5008                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5009                 level = root_item->drop_level;
5010                 path->lowest_level = level;
5011                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5012                 if (ret < 0)
5013                         goto out;
5014                 ret = 0;
5015         }
5016
5017         while (1) {
5018                 wret = walk_down_tree_v2(root, path, &level, &nrefs, ext_ref);
5019                 if (wret < 0)
5020                         ret = wret;
5021                 if (wret != 0)
5022                         break;
5023
5024                 wret = walk_up_tree_v2(root, path, &level);
5025                 if (wret < 0)
5026                         ret = wret;
5027                 if (wret != 0)
5028                         break;
5029         }
5030
5031 out:
5032         btrfs_free_path(path);
5033         return ret;
5034 }
5035
5036 /*
5037  * Find the relative ref for root_ref and root_backref.
5038  *
5039  * @root:       the root of the root tree.
5040  * @ref_key:    the key of the root ref.
5041  *
5042  * Return 0 if no error occurred.
5043  */
5044 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5045                           struct extent_buffer *node, int slot)
5046 {
5047         struct btrfs_path path;
5048         struct btrfs_key key;
5049         struct btrfs_root_ref *ref;
5050         struct btrfs_root_ref *backref;
5051         char ref_name[BTRFS_NAME_LEN] = {0};
5052         char backref_name[BTRFS_NAME_LEN] = {0};
5053         u64 ref_dirid;
5054         u64 ref_seq;
5055         u32 ref_namelen;
5056         u64 backref_dirid;
5057         u64 backref_seq;
5058         u32 backref_namelen;
5059         u32 len;
5060         int ret;
5061         int err = 0;
5062
5063         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5064         ref_dirid = btrfs_root_ref_dirid(node, ref);
5065         ref_seq = btrfs_root_ref_sequence(node, ref);
5066         ref_namelen = btrfs_root_ref_name_len(node, ref);
5067
5068         if (ref_namelen <= BTRFS_NAME_LEN) {
5069                 len = ref_namelen;
5070         } else {
5071                 len = BTRFS_NAME_LEN;
5072                 warning("%s[%llu %llu] ref_name too long",
5073                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5074                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5075                         ref_key->offset);
5076         }
5077         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5078
5079         /* Find relative root_ref */
5080         key.objectid = ref_key->offset;
5081         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5082         key.offset = ref_key->objectid;
5083
5084         btrfs_init_path(&path);
5085         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5086         if (ret) {
5087                 err |= ROOT_REF_MISSING;
5088                 error("%s[%llu %llu] couldn't find relative ref",
5089                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5090                       "ROOT_REF" : "ROOT_BACKREF",
5091                       ref_key->objectid, ref_key->offset);
5092                 goto out;
5093         }
5094
5095         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5096                                  struct btrfs_root_ref);
5097         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5098         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5099         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5100
5101         if (backref_namelen <= BTRFS_NAME_LEN) {
5102                 len = backref_namelen;
5103         } else {
5104                 len = BTRFS_NAME_LEN;
5105                 warning("%s[%llu %llu] ref_name too long",
5106                         key.type == BTRFS_ROOT_REF_KEY ?
5107                         "ROOT_REF" : "ROOT_BACKREF",
5108                         key.objectid, key.offset);
5109         }
5110         read_extent_buffer(path.nodes[0], backref_name,
5111                            (unsigned long)(backref + 1), len);
5112
5113         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5114             ref_namelen != backref_namelen ||
5115             strncmp(ref_name, backref_name, len)) {
5116                 err |= ROOT_REF_MISMATCH;
5117                 error("%s[%llu %llu] mismatch relative ref",
5118                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5119                       "ROOT_REF" : "ROOT_BACKREF",
5120                       ref_key->objectid, ref_key->offset);
5121         }
5122 out:
5123         btrfs_release_path(&path);
5124         return err;
5125 }
5126
5127 /*
5128  * Check all fs/file tree in low_memory mode.
5129  *
5130  * 1. for fs tree root item, call check_fs_root_v2()
5131  * 2. for fs tree root ref/backref, call check_root_ref()
5132  *
5133  * Return 0 if no error occurred.
5134  */
5135 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5136 {
5137         struct btrfs_root *tree_root = fs_info->tree_root;
5138         struct btrfs_root *cur_root = NULL;
5139         struct btrfs_path *path;
5140         struct btrfs_key key;
5141         struct extent_buffer *node;
5142         unsigned int ext_ref;
5143         int slot;
5144         int ret;
5145         int err = 0;
5146
5147         ext_ref = btrfs_fs_incompat(fs_info,
5148                                     BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF);
5149
5150         path = btrfs_alloc_path();
5151         if (!path)
5152                 return -ENOMEM;
5153
5154         key.objectid = BTRFS_FS_TREE_OBJECTID;
5155         key.offset = 0;
5156         key.type = BTRFS_ROOT_ITEM_KEY;
5157
5158         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
5159         if (ret < 0) {
5160                 err = ret;
5161                 goto out;
5162         } else if (ret > 0) {
5163                 err = -ENOENT;
5164                 goto out;
5165         }
5166
5167         while (1) {
5168                 node = path->nodes[0];
5169                 slot = path->slots[0];
5170                 btrfs_item_key_to_cpu(node, &key, slot);
5171                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5172                         goto out;
5173                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5174                     fs_root_objectid(key.objectid)) {
5175                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5176                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5177                                                                        &key);
5178                         } else {
5179                                 key.offset = (u64)-1;
5180                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5181                         }
5182
5183                         if (IS_ERR(cur_root)) {
5184                                 error("Fail to read fs/subvol tree: %lld",
5185                                       key.objectid);
5186                                 err = -EIO;
5187                                 goto next;
5188                         }
5189
5190                         ret = check_fs_root_v2(cur_root, ext_ref);
5191                         err |= ret;
5192
5193                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5194                                 btrfs_free_fs_root(cur_root);
5195                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5196                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5197                         ret = check_root_ref(tree_root, &key, node, slot);
5198                         err |= ret;
5199                 }
5200 next:
5201                 ret = btrfs_next_item(tree_root, path);
5202                 if (ret > 0)
5203                         goto out;
5204                 if (ret < 0) {
5205                         err = ret;
5206                         goto out;
5207                 }
5208         }
5209
5210 out:
5211         btrfs_free_path(path);
5212         return err;
5213 }
5214
5215 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5216 {
5217         struct list_head *cur = rec->backrefs.next;
5218         struct extent_backref *back;
5219         struct tree_backref *tback;
5220         struct data_backref *dback;
5221         u64 found = 0;
5222         int err = 0;
5223
5224         while(cur != &rec->backrefs) {
5225                 back = to_extent_backref(cur);
5226                 cur = cur->next;
5227                 if (!back->found_extent_tree) {
5228                         err = 1;
5229                         if (!print_errs)
5230                                 goto out;
5231                         if (back->is_data) {
5232                                 dback = to_data_backref(back);
5233                                 fprintf(stderr, "Backref %llu %s %llu"
5234                                         " owner %llu offset %llu num_refs %lu"
5235                                         " not found in extent tree\n",
5236                                         (unsigned long long)rec->start,
5237                                         back->full_backref ?
5238                                         "parent" : "root",
5239                                         back->full_backref ?
5240                                         (unsigned long long)dback->parent:
5241                                         (unsigned long long)dback->root,
5242                                         (unsigned long long)dback->owner,
5243                                         (unsigned long long)dback->offset,
5244                                         (unsigned long)dback->num_refs);
5245                         } else {
5246                                 tback = to_tree_backref(back);
5247                                 fprintf(stderr, "Backref %llu parent %llu"
5248                                         " root %llu not found in extent tree\n",
5249                                         (unsigned long long)rec->start,
5250                                         (unsigned long long)tback->parent,
5251                                         (unsigned long long)tback->root);
5252                         }
5253                 }
5254                 if (!back->is_data && !back->found_ref) {
5255                         err = 1;
5256                         if (!print_errs)
5257                                 goto out;
5258                         tback = to_tree_backref(back);
5259                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5260                                 (unsigned long long)rec->start,
5261                                 back->full_backref ? "parent" : "root",
5262                                 back->full_backref ?
5263                                 (unsigned long long)tback->parent :
5264                                 (unsigned long long)tback->root, back);
5265                 }
5266                 if (back->is_data) {
5267                         dback = to_data_backref(back);
5268                         if (dback->found_ref != dback->num_refs) {
5269                                 err = 1;
5270                                 if (!print_errs)
5271                                         goto out;
5272                                 fprintf(stderr, "Incorrect local backref count"
5273                                         " on %llu %s %llu owner %llu"
5274                                         " offset %llu found %u wanted %u back %p\n",
5275                                         (unsigned long long)rec->start,
5276                                         back->full_backref ?
5277                                         "parent" : "root",
5278                                         back->full_backref ?
5279                                         (unsigned long long)dback->parent:
5280                                         (unsigned long long)dback->root,
5281                                         (unsigned long long)dback->owner,
5282                                         (unsigned long long)dback->offset,
5283                                         dback->found_ref, dback->num_refs, back);
5284                         }
5285                         if (dback->disk_bytenr != rec->start) {
5286                                 err = 1;
5287                                 if (!print_errs)
5288                                         goto out;
5289                                 fprintf(stderr, "Backref disk bytenr does not"
5290                                         " match extent record, bytenr=%llu, "
5291                                         "ref bytenr=%llu\n",
5292                                         (unsigned long long)rec->start,
5293                                         (unsigned long long)dback->disk_bytenr);
5294                         }
5295
5296                         if (dback->bytes != rec->nr) {
5297                                 err = 1;
5298                                 if (!print_errs)
5299                                         goto out;
5300                                 fprintf(stderr, "Backref bytes do not match "
5301                                         "extent backref, bytenr=%llu, ref "
5302                                         "bytes=%llu, backref bytes=%llu\n",
5303                                         (unsigned long long)rec->start,
5304                                         (unsigned long long)rec->nr,
5305                                         (unsigned long long)dback->bytes);
5306                         }
5307                 }
5308                 if (!back->is_data) {
5309                         found += 1;
5310                 } else {
5311                         dback = to_data_backref(back);
5312                         found += dback->found_ref;
5313                 }
5314         }
5315         if (found != rec->refs) {
5316                 err = 1;
5317                 if (!print_errs)
5318                         goto out;
5319                 fprintf(stderr, "Incorrect global backref count "
5320                         "on %llu found %llu wanted %llu\n",
5321                         (unsigned long long)rec->start,
5322                         (unsigned long long)found,
5323                         (unsigned long long)rec->refs);
5324         }
5325 out:
5326         return err;
5327 }
5328
5329 static int free_all_extent_backrefs(struct extent_record *rec)
5330 {
5331         struct extent_backref *back;
5332         struct list_head *cur;
5333         while (!list_empty(&rec->backrefs)) {
5334                 cur = rec->backrefs.next;
5335                 back = to_extent_backref(cur);
5336                 list_del(cur);
5337                 free(back);
5338         }
5339         return 0;
5340 }
5341
5342 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
5343                                      struct cache_tree *extent_cache)
5344 {
5345         struct cache_extent *cache;
5346         struct extent_record *rec;
5347
5348         while (1) {
5349                 cache = first_cache_extent(extent_cache);
5350                 if (!cache)
5351                         break;
5352                 rec = container_of(cache, struct extent_record, cache);
5353                 remove_cache_extent(extent_cache, cache);
5354                 free_all_extent_backrefs(rec);
5355                 free(rec);
5356         }
5357 }
5358
5359 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5360                                  struct extent_record *rec)
5361 {
5362         if (rec->content_checked && rec->owner_ref_checked &&
5363             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5364             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5365             !rec->bad_full_backref && !rec->crossing_stripes &&
5366             !rec->wrong_chunk_type) {
5367                 remove_cache_extent(extent_cache, &rec->cache);
5368                 free_all_extent_backrefs(rec);
5369                 list_del_init(&rec->list);
5370                 free(rec);
5371         }
5372         return 0;
5373 }
5374
5375 static int check_owner_ref(struct btrfs_root *root,
5376                             struct extent_record *rec,
5377                             struct extent_buffer *buf)
5378 {
5379         struct extent_backref *node;
5380         struct tree_backref *back;
5381         struct btrfs_root *ref_root;
5382         struct btrfs_key key;
5383         struct btrfs_path path;
5384         struct extent_buffer *parent;
5385         int level;
5386         int found = 0;
5387         int ret;
5388
5389         list_for_each_entry(node, &rec->backrefs, list) {
5390                 if (node->is_data)
5391                         continue;
5392                 if (!node->found_ref)
5393                         continue;
5394                 if (node->full_backref)
5395                         continue;
5396                 back = to_tree_backref(node);
5397                 if (btrfs_header_owner(buf) == back->root)
5398                         return 0;
5399         }
5400         BUG_ON(rec->is_root);
5401
5402         /* try to find the block by search corresponding fs tree */
5403         key.objectid = btrfs_header_owner(buf);
5404         key.type = BTRFS_ROOT_ITEM_KEY;
5405         key.offset = (u64)-1;
5406
5407         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5408         if (IS_ERR(ref_root))
5409                 return 1;
5410
5411         level = btrfs_header_level(buf);
5412         if (level == 0)
5413                 btrfs_item_key_to_cpu(buf, &key, 0);
5414         else
5415                 btrfs_node_key_to_cpu(buf, &key, 0);
5416
5417         btrfs_init_path(&path);
5418         path.lowest_level = level + 1;
5419         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5420         if (ret < 0)
5421                 return 0;
5422
5423         parent = path.nodes[level + 1];
5424         if (parent && buf->start == btrfs_node_blockptr(parent,
5425                                                         path.slots[level + 1]))
5426                 found = 1;
5427
5428         btrfs_release_path(&path);
5429         return found ? 0 : 1;
5430 }
5431
5432 static int is_extent_tree_record(struct extent_record *rec)
5433 {
5434         struct list_head *cur = rec->backrefs.next;
5435         struct extent_backref *node;
5436         struct tree_backref *back;
5437         int is_extent = 0;
5438
5439         while(cur != &rec->backrefs) {
5440                 node = to_extent_backref(cur);
5441                 cur = cur->next;
5442                 if (node->is_data)
5443                         return 0;
5444                 back = to_tree_backref(node);
5445                 if (node->full_backref)
5446                         return 0;
5447                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5448                         is_extent = 1;
5449         }
5450         return is_extent;
5451 }
5452
5453
5454 static int record_bad_block_io(struct btrfs_fs_info *info,
5455                                struct cache_tree *extent_cache,
5456                                u64 start, u64 len)
5457 {
5458         struct extent_record *rec;
5459         struct cache_extent *cache;
5460         struct btrfs_key key;
5461
5462         cache = lookup_cache_extent(extent_cache, start, len);
5463         if (!cache)
5464                 return 0;
5465
5466         rec = container_of(cache, struct extent_record, cache);
5467         if (!is_extent_tree_record(rec))
5468                 return 0;
5469
5470         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5471         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5472 }
5473
5474 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5475                        struct extent_buffer *buf, int slot)
5476 {
5477         if (btrfs_header_level(buf)) {
5478                 struct btrfs_key_ptr ptr1, ptr2;
5479
5480                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5481                                    sizeof(struct btrfs_key_ptr));
5482                 read_extent_buffer(buf, &ptr2,
5483                                    btrfs_node_key_ptr_offset(slot + 1),
5484                                    sizeof(struct btrfs_key_ptr));
5485                 write_extent_buffer(buf, &ptr1,
5486                                     btrfs_node_key_ptr_offset(slot + 1),
5487                                     sizeof(struct btrfs_key_ptr));
5488                 write_extent_buffer(buf, &ptr2,
5489                                     btrfs_node_key_ptr_offset(slot),
5490                                     sizeof(struct btrfs_key_ptr));
5491                 if (slot == 0) {
5492                         struct btrfs_disk_key key;
5493                         btrfs_node_key(buf, &key, 0);
5494                         btrfs_fixup_low_keys(root, path, &key,
5495                                              btrfs_header_level(buf) + 1);
5496                 }
5497         } else {
5498                 struct btrfs_item *item1, *item2;
5499                 struct btrfs_key k1, k2;
5500                 char *item1_data, *item2_data;
5501                 u32 item1_offset, item2_offset, item1_size, item2_size;
5502
5503                 item1 = btrfs_item_nr(slot);
5504                 item2 = btrfs_item_nr(slot + 1);
5505                 btrfs_item_key_to_cpu(buf, &k1, slot);
5506                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5507                 item1_offset = btrfs_item_offset(buf, item1);
5508                 item2_offset = btrfs_item_offset(buf, item2);
5509                 item1_size = btrfs_item_size(buf, item1);
5510                 item2_size = btrfs_item_size(buf, item2);
5511
5512                 item1_data = malloc(item1_size);
5513                 if (!item1_data)
5514                         return -ENOMEM;
5515                 item2_data = malloc(item2_size);
5516                 if (!item2_data) {
5517                         free(item1_data);
5518                         return -ENOMEM;
5519                 }
5520
5521                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5522                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5523
5524                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5525                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5526                 free(item1_data);
5527                 free(item2_data);
5528
5529                 btrfs_set_item_offset(buf, item1, item2_offset);
5530                 btrfs_set_item_offset(buf, item2, item1_offset);
5531                 btrfs_set_item_size(buf, item1, item2_size);
5532                 btrfs_set_item_size(buf, item2, item1_size);
5533
5534                 path->slots[0] = slot;
5535                 btrfs_set_item_key_unsafe(root, path, &k2);
5536                 path->slots[0] = slot + 1;
5537                 btrfs_set_item_key_unsafe(root, path, &k1);
5538         }
5539         return 0;
5540 }
5541
5542 static int fix_key_order(struct btrfs_trans_handle *trans,
5543                          struct btrfs_root *root,
5544                          struct btrfs_path *path)
5545 {
5546         struct extent_buffer *buf;
5547         struct btrfs_key k1, k2;
5548         int i;
5549         int level = path->lowest_level;
5550         int ret = -EIO;
5551
5552         buf = path->nodes[level];
5553         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5554                 if (level) {
5555                         btrfs_node_key_to_cpu(buf, &k1, i);
5556                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5557                 } else {
5558                         btrfs_item_key_to_cpu(buf, &k1, i);
5559                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5560                 }
5561                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5562                         continue;
5563                 ret = swap_values(root, path, buf, i);
5564                 if (ret)
5565                         break;
5566                 btrfs_mark_buffer_dirty(buf);
5567                 i = 0;
5568         }
5569         return ret;
5570 }
5571
5572 static int delete_bogus_item(struct btrfs_trans_handle *trans,
5573                              struct btrfs_root *root,
5574                              struct btrfs_path *path,
5575                              struct extent_buffer *buf, int slot)
5576 {
5577         struct btrfs_key key;
5578         int nritems = btrfs_header_nritems(buf);
5579
5580         btrfs_item_key_to_cpu(buf, &key, slot);
5581
5582         /* These are all the keys we can deal with missing. */
5583         if (key.type != BTRFS_DIR_INDEX_KEY &&
5584             key.type != BTRFS_EXTENT_ITEM_KEY &&
5585             key.type != BTRFS_METADATA_ITEM_KEY &&
5586             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5587             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5588                 return -1;
5589
5590         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5591                (unsigned long long)key.objectid, key.type,
5592                (unsigned long long)key.offset, slot, buf->start);
5593         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5594                               btrfs_item_nr_offset(slot + 1),
5595                               sizeof(struct btrfs_item) *
5596                               (nritems - slot - 1));
5597         btrfs_set_header_nritems(buf, nritems - 1);
5598         if (slot == 0) {
5599                 struct btrfs_disk_key disk_key;
5600
5601                 btrfs_item_key(buf, &disk_key, 0);
5602                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5603         }
5604         btrfs_mark_buffer_dirty(buf);
5605         return 0;
5606 }
5607
5608 static int fix_item_offset(struct btrfs_trans_handle *trans,
5609                            struct btrfs_root *root,
5610                            struct btrfs_path *path)
5611 {
5612         struct extent_buffer *buf;
5613         int i;
5614         int ret = 0;
5615
5616         /* We should only get this for leaves */
5617         BUG_ON(path->lowest_level);
5618         buf = path->nodes[0];
5619 again:
5620         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5621                 unsigned int shift = 0, offset;
5622
5623                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5624                     BTRFS_LEAF_DATA_SIZE(root)) {
5625                         if (btrfs_item_end_nr(buf, i) >
5626                             BTRFS_LEAF_DATA_SIZE(root)) {
5627                                 ret = delete_bogus_item(trans, root, path,
5628                                                         buf, i);
5629                                 if (!ret)
5630                                         goto again;
5631                                 fprintf(stderr, "item is off the end of the "
5632                                         "leaf, can't fix\n");
5633                                 ret = -EIO;
5634                                 break;
5635                         }
5636                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5637                                 btrfs_item_end_nr(buf, i);
5638                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5639                            btrfs_item_offset_nr(buf, i - 1)) {
5640                         if (btrfs_item_end_nr(buf, i) >
5641                             btrfs_item_offset_nr(buf, i - 1)) {
5642                                 ret = delete_bogus_item(trans, root, path,
5643                                                         buf, i);
5644                                 if (!ret)
5645                                         goto again;
5646                                 fprintf(stderr, "items overlap, can't fix\n");
5647                                 ret = -EIO;
5648                                 break;
5649                         }
5650                         shift = btrfs_item_offset_nr(buf, i - 1) -
5651                                 btrfs_item_end_nr(buf, i);
5652                 }
5653                 if (!shift)
5654                         continue;
5655
5656                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5657                        i, shift, (unsigned long long)buf->start);
5658                 offset = btrfs_item_offset_nr(buf, i);
5659                 memmove_extent_buffer(buf,
5660                                       btrfs_leaf_data(buf) + offset + shift,
5661                                       btrfs_leaf_data(buf) + offset,
5662                                       btrfs_item_size_nr(buf, i));
5663                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5664                                       offset + shift);
5665                 btrfs_mark_buffer_dirty(buf);
5666         }
5667
5668         /*
5669          * We may have moved things, in which case we want to exit so we don't
5670          * write those changes out.  Once we have proper abort functionality in
5671          * progs this can be changed to something nicer.
5672          */
5673         BUG_ON(ret);
5674         return ret;
5675 }
5676
5677 /*
5678  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5679  * then just return -EIO.
5680  */
5681 static int try_to_fix_bad_block(struct btrfs_root *root,
5682                                 struct extent_buffer *buf,
5683                                 enum btrfs_tree_block_status status)
5684 {
5685         struct btrfs_trans_handle *trans;
5686         struct ulist *roots;
5687         struct ulist_node *node;
5688         struct btrfs_root *search_root;
5689         struct btrfs_path path;
5690         struct ulist_iterator iter;
5691         struct btrfs_key root_key, key;
5692         int ret;
5693
5694         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5695             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5696                 return -EIO;
5697
5698         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5699         if (ret)
5700                 return -EIO;
5701
5702         btrfs_init_path(&path);
5703         ULIST_ITER_INIT(&iter);
5704         while ((node = ulist_next(roots, &iter))) {
5705                 root_key.objectid = node->val;
5706                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5707                 root_key.offset = (u64)-1;
5708
5709                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5710                 if (IS_ERR(root)) {
5711                         ret = -EIO;
5712                         break;
5713                 }
5714
5715
5716                 trans = btrfs_start_transaction(search_root, 0);
5717                 if (IS_ERR(trans)) {
5718                         ret = PTR_ERR(trans);
5719                         break;
5720                 }
5721
5722                 path.lowest_level = btrfs_header_level(buf);
5723                 path.skip_check_block = 1;
5724                 if (path.lowest_level)
5725                         btrfs_node_key_to_cpu(buf, &key, 0);
5726                 else
5727                         btrfs_item_key_to_cpu(buf, &key, 0);
5728                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5729                 if (ret) {
5730                         ret = -EIO;
5731                         btrfs_commit_transaction(trans, search_root);
5732                         break;
5733                 }
5734                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5735                         ret = fix_key_order(trans, search_root, &path);
5736                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5737                         ret = fix_item_offset(trans, search_root, &path);
5738                 if (ret) {
5739                         btrfs_commit_transaction(trans, search_root);
5740                         break;
5741                 }
5742                 btrfs_release_path(&path);
5743                 btrfs_commit_transaction(trans, search_root);
5744         }
5745         ulist_free(roots);
5746         btrfs_release_path(&path);
5747         return ret;
5748 }
5749
5750 static int check_block(struct btrfs_root *root,
5751                        struct cache_tree *extent_cache,
5752                        struct extent_buffer *buf, u64 flags)
5753 {
5754         struct extent_record *rec;
5755         struct cache_extent *cache;
5756         struct btrfs_key key;
5757         enum btrfs_tree_block_status status;
5758         int ret = 0;
5759         int level;
5760
5761         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5762         if (!cache)
5763                 return 1;
5764         rec = container_of(cache, struct extent_record, cache);
5765         rec->generation = btrfs_header_generation(buf);
5766
5767         level = btrfs_header_level(buf);
5768         if (btrfs_header_nritems(buf) > 0) {
5769
5770                 if (level == 0)
5771                         btrfs_item_key_to_cpu(buf, &key, 0);
5772                 else
5773                         btrfs_node_key_to_cpu(buf, &key, 0);
5774
5775                 rec->info_objectid = key.objectid;
5776         }
5777         rec->info_level = level;
5778
5779         if (btrfs_is_leaf(buf))
5780                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5781         else
5782                 status = btrfs_check_node(root, &rec->parent_key, buf);
5783
5784         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5785                 if (repair)
5786                         status = try_to_fix_bad_block(root, buf, status);
5787                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5788                         ret = -EIO;
5789                         fprintf(stderr, "bad block %llu\n",
5790                                 (unsigned long long)buf->start);
5791                 } else {
5792                         /*
5793                          * Signal to callers we need to start the scan over
5794                          * again since we'll have cowed blocks.
5795                          */
5796                         ret = -EAGAIN;
5797                 }
5798         } else {
5799                 rec->content_checked = 1;
5800                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5801                         rec->owner_ref_checked = 1;
5802                 else {
5803                         ret = check_owner_ref(root, rec, buf);
5804                         if (!ret)
5805                                 rec->owner_ref_checked = 1;
5806                 }
5807         }
5808         if (!ret)
5809                 maybe_free_extent_rec(extent_cache, rec);
5810         return ret;
5811 }
5812
5813 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5814                                                 u64 parent, u64 root)
5815 {
5816         struct list_head *cur = rec->backrefs.next;
5817         struct extent_backref *node;
5818         struct tree_backref *back;
5819
5820         while(cur != &rec->backrefs) {
5821                 node = to_extent_backref(cur);
5822                 cur = cur->next;
5823                 if (node->is_data)
5824                         continue;
5825                 back = to_tree_backref(node);
5826                 if (parent > 0) {
5827                         if (!node->full_backref)
5828                                 continue;
5829                         if (parent == back->parent)
5830                                 return back;
5831                 } else {
5832                         if (node->full_backref)
5833                                 continue;
5834                         if (back->root == root)
5835                                 return back;
5836                 }
5837         }
5838         return NULL;
5839 }
5840
5841 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5842                                                 u64 parent, u64 root)
5843 {
5844         struct tree_backref *ref = malloc(sizeof(*ref));
5845
5846         if (!ref)
5847                 return NULL;
5848         memset(&ref->node, 0, sizeof(ref->node));
5849         if (parent > 0) {
5850                 ref->parent = parent;
5851                 ref->node.full_backref = 1;
5852         } else {
5853                 ref->root = root;
5854                 ref->node.full_backref = 0;
5855         }
5856         list_add_tail(&ref->node.list, &rec->backrefs);
5857
5858         return ref;
5859 }
5860
5861 static struct data_backref *find_data_backref(struct extent_record *rec,
5862                                                 u64 parent, u64 root,
5863                                                 u64 owner, u64 offset,
5864                                                 int found_ref,
5865                                                 u64 disk_bytenr, u64 bytes)
5866 {
5867         struct list_head *cur = rec->backrefs.next;
5868         struct extent_backref *node;
5869         struct data_backref *back;
5870
5871         while(cur != &rec->backrefs) {
5872                 node = to_extent_backref(cur);
5873                 cur = cur->next;
5874                 if (!node->is_data)
5875                         continue;
5876                 back = to_data_backref(node);
5877                 if (parent > 0) {
5878                         if (!node->full_backref)
5879                                 continue;
5880                         if (parent == back->parent)
5881                                 return back;
5882                 } else {
5883                         if (node->full_backref)
5884                                 continue;
5885                         if (back->root == root && back->owner == owner &&
5886                             back->offset == offset) {
5887                                 if (found_ref && node->found_ref &&
5888                                     (back->bytes != bytes ||
5889                                     back->disk_bytenr != disk_bytenr))
5890                                         continue;
5891                                 return back;
5892                         }
5893                 }
5894         }
5895         return NULL;
5896 }
5897
5898 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5899                                                 u64 parent, u64 root,
5900                                                 u64 owner, u64 offset,
5901                                                 u64 max_size)
5902 {
5903         struct data_backref *ref = malloc(sizeof(*ref));
5904
5905         if (!ref)
5906                 return NULL;
5907         memset(&ref->node, 0, sizeof(ref->node));
5908         ref->node.is_data = 1;
5909
5910         if (parent > 0) {
5911                 ref->parent = parent;
5912                 ref->owner = 0;
5913                 ref->offset = 0;
5914                 ref->node.full_backref = 1;
5915         } else {
5916                 ref->root = root;
5917                 ref->owner = owner;
5918                 ref->offset = offset;
5919                 ref->node.full_backref = 0;
5920         }
5921         ref->bytes = max_size;
5922         ref->found_ref = 0;
5923         ref->num_refs = 0;
5924         list_add_tail(&ref->node.list, &rec->backrefs);
5925         if (max_size > rec->max_size)
5926                 rec->max_size = max_size;
5927         return ref;
5928 }
5929
5930 /* Check if the type of extent matches with its chunk */
5931 static void check_extent_type(struct extent_record *rec)
5932 {
5933         struct btrfs_block_group_cache *bg_cache;
5934
5935         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5936         if (!bg_cache)
5937                 return;
5938
5939         /* data extent, check chunk directly*/
5940         if (!rec->metadata) {
5941                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5942                         rec->wrong_chunk_type = 1;
5943                 return;
5944         }
5945
5946         /* metadata extent, check the obvious case first */
5947         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5948                                  BTRFS_BLOCK_GROUP_METADATA))) {
5949                 rec->wrong_chunk_type = 1;
5950                 return;
5951         }
5952
5953         /*
5954          * Check SYSTEM extent, as it's also marked as metadata, we can only
5955          * make sure it's a SYSTEM extent by its backref
5956          */
5957         if (!list_empty(&rec->backrefs)) {
5958                 struct extent_backref *node;
5959                 struct tree_backref *tback;
5960                 u64 bg_type;
5961
5962                 node = to_extent_backref(rec->backrefs.next);
5963                 if (node->is_data) {
5964                         /* tree block shouldn't have data backref */
5965                         rec->wrong_chunk_type = 1;
5966                         return;
5967                 }
5968                 tback = container_of(node, struct tree_backref, node);
5969
5970                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5971                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5972                 else
5973                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
5974                 if (!(bg_cache->flags & bg_type))
5975                         rec->wrong_chunk_type = 1;
5976         }
5977 }
5978
5979 /*
5980  * Allocate a new extent record, fill default values from @tmpl and insert int
5981  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
5982  * the cache, otherwise it fails.
5983  */
5984 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
5985                 struct extent_record *tmpl)
5986 {
5987         struct extent_record *rec;
5988         int ret = 0;
5989
5990         rec = malloc(sizeof(*rec));
5991         if (!rec)
5992                 return -ENOMEM;
5993         rec->start = tmpl->start;
5994         rec->max_size = tmpl->max_size;
5995         rec->nr = max(tmpl->nr, tmpl->max_size);
5996         rec->found_rec = tmpl->found_rec;
5997         rec->content_checked = tmpl->content_checked;
5998         rec->owner_ref_checked = tmpl->owner_ref_checked;
5999         rec->num_duplicates = 0;
6000         rec->metadata = tmpl->metadata;
6001         rec->flag_block_full_backref = FLAG_UNSET;
6002         rec->bad_full_backref = 0;
6003         rec->crossing_stripes = 0;
6004         rec->wrong_chunk_type = 0;
6005         rec->is_root = tmpl->is_root;
6006         rec->refs = tmpl->refs;
6007         rec->extent_item_refs = tmpl->extent_item_refs;
6008         rec->parent_generation = tmpl->parent_generation;
6009         INIT_LIST_HEAD(&rec->backrefs);
6010         INIT_LIST_HEAD(&rec->dups);
6011         INIT_LIST_HEAD(&rec->list);
6012         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6013         rec->cache.start = tmpl->start;
6014         rec->cache.size = tmpl->nr;
6015         ret = insert_cache_extent(extent_cache, &rec->cache);
6016         if (ret) {
6017                 free(rec);
6018                 return ret;
6019         }
6020         bytes_used += rec->nr;
6021
6022         if (tmpl->metadata)
6023                 rec->crossing_stripes = check_crossing_stripes(global_info,
6024                                 rec->start, global_info->tree_root->nodesize);
6025         check_extent_type(rec);
6026         return ret;
6027 }
6028
6029 /*
6030  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6031  * some are hints:
6032  * - refs              - if found, increase refs
6033  * - is_root           - if found, set
6034  * - content_checked   - if found, set
6035  * - owner_ref_checked - if found, set
6036  *
6037  * If not found, create a new one, initialize and insert.
6038  */
6039 static int add_extent_rec(struct cache_tree *extent_cache,
6040                 struct extent_record *tmpl)
6041 {
6042         struct extent_record *rec;
6043         struct cache_extent *cache;
6044         int ret = 0;
6045         int dup = 0;
6046
6047         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6048         if (cache) {
6049                 rec = container_of(cache, struct extent_record, cache);
6050                 if (tmpl->refs)
6051                         rec->refs++;
6052                 if (rec->nr == 1)
6053                         rec->nr = max(tmpl->nr, tmpl->max_size);
6054
6055                 /*
6056                  * We need to make sure to reset nr to whatever the extent
6057                  * record says was the real size, this way we can compare it to
6058                  * the backrefs.
6059                  */
6060                 if (tmpl->found_rec) {
6061                         if (tmpl->start != rec->start || rec->found_rec) {
6062                                 struct extent_record *tmp;
6063
6064                                 dup = 1;
6065                                 if (list_empty(&rec->list))
6066                                         list_add_tail(&rec->list,
6067                                                       &duplicate_extents);
6068
6069                                 /*
6070                                  * We have to do this song and dance in case we
6071                                  * find an extent record that falls inside of
6072                                  * our current extent record but does not have
6073                                  * the same objectid.
6074                                  */
6075                                 tmp = malloc(sizeof(*tmp));
6076                                 if (!tmp)
6077                                         return -ENOMEM;
6078                                 tmp->start = tmpl->start;
6079                                 tmp->max_size = tmpl->max_size;
6080                                 tmp->nr = tmpl->nr;
6081                                 tmp->found_rec = 1;
6082                                 tmp->metadata = tmpl->metadata;
6083                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6084                                 INIT_LIST_HEAD(&tmp->list);
6085                                 list_add_tail(&tmp->list, &rec->dups);
6086                                 rec->num_duplicates++;
6087                         } else {
6088                                 rec->nr = tmpl->nr;
6089                                 rec->found_rec = 1;
6090                         }
6091                 }
6092
6093                 if (tmpl->extent_item_refs && !dup) {
6094                         if (rec->extent_item_refs) {
6095                                 fprintf(stderr, "block %llu rec "
6096                                         "extent_item_refs %llu, passed %llu\n",
6097                                         (unsigned long long)tmpl->start,
6098                                         (unsigned long long)
6099                                                         rec->extent_item_refs,
6100                                         (unsigned long long)tmpl->extent_item_refs);
6101                         }
6102                         rec->extent_item_refs = tmpl->extent_item_refs;
6103                 }
6104                 if (tmpl->is_root)
6105                         rec->is_root = 1;
6106                 if (tmpl->content_checked)
6107                         rec->content_checked = 1;
6108                 if (tmpl->owner_ref_checked)
6109                         rec->owner_ref_checked = 1;
6110                 memcpy(&rec->parent_key, &tmpl->parent_key,
6111                                 sizeof(tmpl->parent_key));
6112                 if (tmpl->parent_generation)
6113                         rec->parent_generation = tmpl->parent_generation;
6114                 if (rec->max_size < tmpl->max_size)
6115                         rec->max_size = tmpl->max_size;
6116
6117                 /*
6118                  * A metadata extent can't cross stripe_len boundary, otherwise
6119                  * kernel scrub won't be able to handle it.
6120                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6121                  * it.
6122                  */
6123                 if (tmpl->metadata)
6124                         rec->crossing_stripes = check_crossing_stripes(
6125                                         global_info, rec->start,
6126                                         global_info->tree_root->nodesize);
6127                 check_extent_type(rec);
6128                 maybe_free_extent_rec(extent_cache, rec);
6129                 return ret;
6130         }
6131
6132         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6133
6134         return ret;
6135 }
6136
6137 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6138                             u64 parent, u64 root, int found_ref)
6139 {
6140         struct extent_record *rec;
6141         struct tree_backref *back;
6142         struct cache_extent *cache;
6143         int ret;
6144
6145         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6146         if (!cache) {
6147                 struct extent_record tmpl;
6148
6149                 memset(&tmpl, 0, sizeof(tmpl));
6150                 tmpl.start = bytenr;
6151                 tmpl.nr = 1;
6152                 tmpl.metadata = 1;
6153
6154                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6155                 if (ret)
6156                         return ret;
6157
6158                 /* really a bug in cache_extent implement now */
6159                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6160                 if (!cache)
6161                         return -ENOENT;
6162         }
6163
6164         rec = container_of(cache, struct extent_record, cache);
6165         if (rec->start != bytenr) {
6166                 /*
6167                  * Several cause, from unaligned bytenr to over lapping extents
6168                  */
6169                 return -EEXIST;
6170         }
6171
6172         back = find_tree_backref(rec, parent, root);
6173         if (!back) {
6174                 back = alloc_tree_backref(rec, parent, root);
6175                 if (!back)
6176                         return -ENOMEM;
6177         }
6178
6179         if (found_ref) {
6180                 if (back->node.found_ref) {
6181                         fprintf(stderr, "Extent back ref already exists "
6182                                 "for %llu parent %llu root %llu \n",
6183                                 (unsigned long long)bytenr,
6184                                 (unsigned long long)parent,
6185                                 (unsigned long long)root);
6186                 }
6187                 back->node.found_ref = 1;
6188         } else {
6189                 if (back->node.found_extent_tree) {
6190                         fprintf(stderr, "Extent back ref already exists "
6191                                 "for %llu parent %llu root %llu \n",
6192                                 (unsigned long long)bytenr,
6193                                 (unsigned long long)parent,
6194                                 (unsigned long long)root);
6195                 }
6196                 back->node.found_extent_tree = 1;
6197         }
6198         check_extent_type(rec);
6199         maybe_free_extent_rec(extent_cache, rec);
6200         return 0;
6201 }
6202
6203 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6204                             u64 parent, u64 root, u64 owner, u64 offset,
6205                             u32 num_refs, int found_ref, u64 max_size)
6206 {
6207         struct extent_record *rec;
6208         struct data_backref *back;
6209         struct cache_extent *cache;
6210         int ret;
6211
6212         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6213         if (!cache) {
6214                 struct extent_record tmpl;
6215
6216                 memset(&tmpl, 0, sizeof(tmpl));
6217                 tmpl.start = bytenr;
6218                 tmpl.nr = 1;
6219                 tmpl.max_size = max_size;
6220
6221                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6222                 if (ret)
6223                         return ret;
6224
6225                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6226                 if (!cache)
6227                         abort();
6228         }
6229
6230         rec = container_of(cache, struct extent_record, cache);
6231         if (rec->max_size < max_size)
6232                 rec->max_size = max_size;
6233
6234         /*
6235          * If found_ref is set then max_size is the real size and must match the
6236          * existing refs.  So if we have already found a ref then we need to
6237          * make sure that this ref matches the existing one, otherwise we need
6238          * to add a new backref so we can notice that the backrefs don't match
6239          * and we need to figure out who is telling the truth.  This is to
6240          * account for that awful fsync bug I introduced where we'd end up with
6241          * a btrfs_file_extent_item that would have its length include multiple
6242          * prealloc extents or point inside of a prealloc extent.
6243          */
6244         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6245                                  bytenr, max_size);
6246         if (!back) {
6247                 back = alloc_data_backref(rec, parent, root, owner, offset,
6248                                           max_size);
6249                 BUG_ON(!back);
6250         }
6251
6252         if (found_ref) {
6253                 BUG_ON(num_refs != 1);
6254                 if (back->node.found_ref)
6255                         BUG_ON(back->bytes != max_size);
6256                 back->node.found_ref = 1;
6257                 back->found_ref += 1;
6258                 back->bytes = max_size;
6259                 back->disk_bytenr = bytenr;
6260                 rec->refs += 1;
6261                 rec->content_checked = 1;
6262                 rec->owner_ref_checked = 1;
6263         } else {
6264                 if (back->node.found_extent_tree) {
6265                         fprintf(stderr, "Extent back ref already exists "
6266                                 "for %llu parent %llu root %llu "
6267                                 "owner %llu offset %llu num_refs %lu\n",
6268                                 (unsigned long long)bytenr,
6269                                 (unsigned long long)parent,
6270                                 (unsigned long long)root,
6271                                 (unsigned long long)owner,
6272                                 (unsigned long long)offset,
6273                                 (unsigned long)num_refs);
6274                 }
6275                 back->num_refs = num_refs;
6276                 back->node.found_extent_tree = 1;
6277         }
6278         maybe_free_extent_rec(extent_cache, rec);
6279         return 0;
6280 }
6281
6282 static int add_pending(struct cache_tree *pending,
6283                        struct cache_tree *seen, u64 bytenr, u32 size)
6284 {
6285         int ret;
6286         ret = add_cache_extent(seen, bytenr, size);
6287         if (ret)
6288                 return ret;
6289         add_cache_extent(pending, bytenr, size);
6290         return 0;
6291 }
6292
6293 static int pick_next_pending(struct cache_tree *pending,
6294                         struct cache_tree *reada,
6295                         struct cache_tree *nodes,
6296                         u64 last, struct block_info *bits, int bits_nr,
6297                         int *reada_bits)
6298 {
6299         unsigned long node_start = last;
6300         struct cache_extent *cache;
6301         int ret;
6302
6303         cache = search_cache_extent(reada, 0);
6304         if (cache) {
6305                 bits[0].start = cache->start;
6306                 bits[0].size = cache->size;
6307                 *reada_bits = 1;
6308                 return 1;
6309         }
6310         *reada_bits = 0;
6311         if (node_start > 32768)
6312                 node_start -= 32768;
6313
6314         cache = search_cache_extent(nodes, node_start);
6315         if (!cache)
6316                 cache = search_cache_extent(nodes, 0);
6317
6318         if (!cache) {
6319                  cache = search_cache_extent(pending, 0);
6320                  if (!cache)
6321                          return 0;
6322                  ret = 0;
6323                  do {
6324                          bits[ret].start = cache->start;
6325                          bits[ret].size = cache->size;
6326                          cache = next_cache_extent(cache);
6327                          ret++;
6328                  } while (cache && ret < bits_nr);
6329                  return ret;
6330         }
6331
6332         ret = 0;
6333         do {
6334                 bits[ret].start = cache->start;
6335                 bits[ret].size = cache->size;
6336                 cache = next_cache_extent(cache);
6337                 ret++;
6338         } while (cache && ret < bits_nr);
6339
6340         if (bits_nr - ret > 8) {
6341                 u64 lookup = bits[0].start + bits[0].size;
6342                 struct cache_extent *next;
6343                 next = search_cache_extent(pending, lookup);
6344                 while(next) {
6345                         if (next->start - lookup > 32768)
6346                                 break;
6347                         bits[ret].start = next->start;
6348                         bits[ret].size = next->size;
6349                         lookup = next->start + next->size;
6350                         ret++;
6351                         if (ret == bits_nr)
6352                                 break;
6353                         next = next_cache_extent(next);
6354                         if (!next)
6355                                 break;
6356                 }
6357         }
6358         return ret;
6359 }
6360
6361 static void free_chunk_record(struct cache_extent *cache)
6362 {
6363         struct chunk_record *rec;
6364
6365         rec = container_of(cache, struct chunk_record, cache);
6366         list_del_init(&rec->list);
6367         list_del_init(&rec->dextents);
6368         free(rec);
6369 }
6370
6371 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6372 {
6373         cache_tree_free_extents(chunk_cache, free_chunk_record);
6374 }
6375
6376 static void free_device_record(struct rb_node *node)
6377 {
6378         struct device_record *rec;
6379
6380         rec = container_of(node, struct device_record, node);
6381         free(rec);
6382 }
6383
6384 FREE_RB_BASED_TREE(device_cache, free_device_record);
6385
6386 int insert_block_group_record(struct block_group_tree *tree,
6387                               struct block_group_record *bg_rec)
6388 {
6389         int ret;
6390
6391         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6392         if (ret)
6393                 return ret;
6394
6395         list_add_tail(&bg_rec->list, &tree->block_groups);
6396         return 0;
6397 }
6398
6399 static void free_block_group_record(struct cache_extent *cache)
6400 {
6401         struct block_group_record *rec;
6402
6403         rec = container_of(cache, struct block_group_record, cache);
6404         list_del_init(&rec->list);
6405         free(rec);
6406 }
6407
6408 void free_block_group_tree(struct block_group_tree *tree)
6409 {
6410         cache_tree_free_extents(&tree->tree, free_block_group_record);
6411 }
6412
6413 int insert_device_extent_record(struct device_extent_tree *tree,
6414                                 struct device_extent_record *de_rec)
6415 {
6416         int ret;
6417
6418         /*
6419          * Device extent is a bit different from the other extents, because
6420          * the extents which belong to the different devices may have the
6421          * same start and size, so we need use the special extent cache
6422          * search/insert functions.
6423          */
6424         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6425         if (ret)
6426                 return ret;
6427
6428         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6429         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6430         return 0;
6431 }
6432
6433 static void free_device_extent_record(struct cache_extent *cache)
6434 {
6435         struct device_extent_record *rec;
6436
6437         rec = container_of(cache, struct device_extent_record, cache);
6438         if (!list_empty(&rec->chunk_list))
6439                 list_del_init(&rec->chunk_list);
6440         if (!list_empty(&rec->device_list))
6441                 list_del_init(&rec->device_list);
6442         free(rec);
6443 }
6444
6445 void free_device_extent_tree(struct device_extent_tree *tree)
6446 {
6447         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6448 }
6449
6450 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6451 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6452                                  struct extent_buffer *leaf, int slot)
6453 {
6454         struct btrfs_extent_ref_v0 *ref0;
6455         struct btrfs_key key;
6456         int ret;
6457
6458         btrfs_item_key_to_cpu(leaf, &key, slot);
6459         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6460         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6461                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6462                                 0, 0);
6463         } else {
6464                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6465                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6466         }
6467         return ret;
6468 }
6469 #endif
6470
6471 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6472                                             struct btrfs_key *key,
6473                                             int slot)
6474 {
6475         struct btrfs_chunk *ptr;
6476         struct chunk_record *rec;
6477         int num_stripes, i;
6478
6479         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6480         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6481
6482         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6483         if (!rec) {
6484                 fprintf(stderr, "memory allocation failed\n");
6485                 exit(-1);
6486         }
6487
6488         INIT_LIST_HEAD(&rec->list);
6489         INIT_LIST_HEAD(&rec->dextents);
6490         rec->bg_rec = NULL;
6491
6492         rec->cache.start = key->offset;
6493         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6494
6495         rec->generation = btrfs_header_generation(leaf);
6496
6497         rec->objectid = key->objectid;
6498         rec->type = key->type;
6499         rec->offset = key->offset;
6500
6501         rec->length = rec->cache.size;
6502         rec->owner = btrfs_chunk_owner(leaf, ptr);
6503         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6504         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6505         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6506         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6507         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6508         rec->num_stripes = num_stripes;
6509         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6510
6511         for (i = 0; i < rec->num_stripes; ++i) {
6512                 rec->stripes[i].devid =
6513                         btrfs_stripe_devid_nr(leaf, ptr, i);
6514                 rec->stripes[i].offset =
6515                         btrfs_stripe_offset_nr(leaf, ptr, i);
6516                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6517                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6518                                 BTRFS_UUID_SIZE);
6519         }
6520
6521         return rec;
6522 }
6523
6524 static int process_chunk_item(struct cache_tree *chunk_cache,
6525                               struct btrfs_key *key, struct extent_buffer *eb,
6526                               int slot)
6527 {
6528         struct chunk_record *rec;
6529         struct btrfs_chunk *chunk;
6530         int ret = 0;
6531
6532         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6533         /*
6534          * Do extra check for this chunk item,
6535          *
6536          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6537          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6538          * and owner<->key_type check.
6539          */
6540         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6541                                       key->offset);
6542         if (ret < 0) {
6543                 error("chunk(%llu, %llu) is not valid, ignore it",
6544                       key->offset, btrfs_chunk_length(eb, chunk));
6545                 return 0;
6546         }
6547         rec = btrfs_new_chunk_record(eb, key, slot);
6548         ret = insert_cache_extent(chunk_cache, &rec->cache);
6549         if (ret) {
6550                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6551                         rec->offset, rec->length);
6552                 free(rec);
6553         }
6554
6555         return ret;
6556 }
6557
6558 static int process_device_item(struct rb_root *dev_cache,
6559                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6560 {
6561         struct btrfs_dev_item *ptr;
6562         struct device_record *rec;
6563         int ret = 0;
6564
6565         ptr = btrfs_item_ptr(eb,
6566                 slot, struct btrfs_dev_item);
6567
6568         rec = malloc(sizeof(*rec));
6569         if (!rec) {
6570                 fprintf(stderr, "memory allocation failed\n");
6571                 return -ENOMEM;
6572         }
6573
6574         rec->devid = key->offset;
6575         rec->generation = btrfs_header_generation(eb);
6576
6577         rec->objectid = key->objectid;
6578         rec->type = key->type;
6579         rec->offset = key->offset;
6580
6581         rec->devid = btrfs_device_id(eb, ptr);
6582         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6583         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6584
6585         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6586         if (ret) {
6587                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6588                 free(rec);
6589         }
6590
6591         return ret;
6592 }
6593
6594 struct block_group_record *
6595 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6596                              int slot)
6597 {
6598         struct btrfs_block_group_item *ptr;
6599         struct block_group_record *rec;
6600
6601         rec = calloc(1, sizeof(*rec));
6602         if (!rec) {
6603                 fprintf(stderr, "memory allocation failed\n");
6604                 exit(-1);
6605         }
6606
6607         rec->cache.start = key->objectid;
6608         rec->cache.size = key->offset;
6609
6610         rec->generation = btrfs_header_generation(leaf);
6611
6612         rec->objectid = key->objectid;
6613         rec->type = key->type;
6614         rec->offset = key->offset;
6615
6616         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6617         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6618
6619         INIT_LIST_HEAD(&rec->list);
6620
6621         return rec;
6622 }
6623
6624 static int process_block_group_item(struct block_group_tree *block_group_cache,
6625                                     struct btrfs_key *key,
6626                                     struct extent_buffer *eb, int slot)
6627 {
6628         struct block_group_record *rec;
6629         int ret = 0;
6630
6631         rec = btrfs_new_block_group_record(eb, key, slot);
6632         ret = insert_block_group_record(block_group_cache, rec);
6633         if (ret) {
6634                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6635                         rec->objectid, rec->offset);
6636                 free(rec);
6637         }
6638
6639         return ret;
6640 }
6641
6642 struct device_extent_record *
6643 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6644                                struct btrfs_key *key, int slot)
6645 {
6646         struct device_extent_record *rec;
6647         struct btrfs_dev_extent *ptr;
6648
6649         rec = calloc(1, sizeof(*rec));
6650         if (!rec) {
6651                 fprintf(stderr, "memory allocation failed\n");
6652                 exit(-1);
6653         }
6654
6655         rec->cache.objectid = key->objectid;
6656         rec->cache.start = key->offset;
6657
6658         rec->generation = btrfs_header_generation(leaf);
6659
6660         rec->objectid = key->objectid;
6661         rec->type = key->type;
6662         rec->offset = key->offset;
6663
6664         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6665         rec->chunk_objecteid =
6666                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6667         rec->chunk_offset =
6668                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6669         rec->length = btrfs_dev_extent_length(leaf, ptr);
6670         rec->cache.size = rec->length;
6671
6672         INIT_LIST_HEAD(&rec->chunk_list);
6673         INIT_LIST_HEAD(&rec->device_list);
6674
6675         return rec;
6676 }
6677
6678 static int
6679 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6680                            struct btrfs_key *key, struct extent_buffer *eb,
6681                            int slot)
6682 {
6683         struct device_extent_record *rec;
6684         int ret;
6685
6686         rec = btrfs_new_device_extent_record(eb, key, slot);
6687         ret = insert_device_extent_record(dev_extent_cache, rec);
6688         if (ret) {
6689                 fprintf(stderr,
6690                         "Device extent[%llu, %llu, %llu] existed.\n",
6691                         rec->objectid, rec->offset, rec->length);
6692                 free(rec);
6693         }
6694
6695         return ret;
6696 }
6697
6698 static int process_extent_item(struct btrfs_root *root,
6699                                struct cache_tree *extent_cache,
6700                                struct extent_buffer *eb, int slot)
6701 {
6702         struct btrfs_extent_item *ei;
6703         struct btrfs_extent_inline_ref *iref;
6704         struct btrfs_extent_data_ref *dref;
6705         struct btrfs_shared_data_ref *sref;
6706         struct btrfs_key key;
6707         struct extent_record tmpl;
6708         unsigned long end;
6709         unsigned long ptr;
6710         int ret;
6711         int type;
6712         u32 item_size = btrfs_item_size_nr(eb, slot);
6713         u64 refs = 0;
6714         u64 offset;
6715         u64 num_bytes;
6716         int metadata = 0;
6717
6718         btrfs_item_key_to_cpu(eb, &key, slot);
6719
6720         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6721                 metadata = 1;
6722                 num_bytes = root->nodesize;
6723         } else {
6724                 num_bytes = key.offset;
6725         }
6726
6727         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6728                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6729                       key.objectid, root->sectorsize);
6730                 return -EIO;
6731         }
6732         if (item_size < sizeof(*ei)) {
6733 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6734                 struct btrfs_extent_item_v0 *ei0;
6735                 BUG_ON(item_size != sizeof(*ei0));
6736                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6737                 refs = btrfs_extent_refs_v0(eb, ei0);
6738 #else
6739                 BUG();
6740 #endif
6741                 memset(&tmpl, 0, sizeof(tmpl));
6742                 tmpl.start = key.objectid;
6743                 tmpl.nr = num_bytes;
6744                 tmpl.extent_item_refs = refs;
6745                 tmpl.metadata = metadata;
6746                 tmpl.found_rec = 1;
6747                 tmpl.max_size = num_bytes;
6748
6749                 return add_extent_rec(extent_cache, &tmpl);
6750         }
6751
6752         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6753         refs = btrfs_extent_refs(eb, ei);
6754         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6755                 metadata = 1;
6756         else
6757                 metadata = 0;
6758         if (metadata && num_bytes != root->nodesize) {
6759                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6760                       num_bytes, root->nodesize);
6761                 return -EIO;
6762         }
6763         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6764                 error("ignore invalid data extent, length %llu is not aligned to %u",
6765                       num_bytes, root->sectorsize);
6766                 return -EIO;
6767         }
6768
6769         memset(&tmpl, 0, sizeof(tmpl));
6770         tmpl.start = key.objectid;
6771         tmpl.nr = num_bytes;
6772         tmpl.extent_item_refs = refs;
6773         tmpl.metadata = metadata;
6774         tmpl.found_rec = 1;
6775         tmpl.max_size = num_bytes;
6776         add_extent_rec(extent_cache, &tmpl);
6777
6778         ptr = (unsigned long)(ei + 1);
6779         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6780             key.type == BTRFS_EXTENT_ITEM_KEY)
6781                 ptr += sizeof(struct btrfs_tree_block_info);
6782
6783         end = (unsigned long)ei + item_size;
6784         while (ptr < end) {
6785                 iref = (struct btrfs_extent_inline_ref *)ptr;
6786                 type = btrfs_extent_inline_ref_type(eb, iref);
6787                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6788                 switch (type) {
6789                 case BTRFS_TREE_BLOCK_REF_KEY:
6790                         ret = add_tree_backref(extent_cache, key.objectid,
6791                                         0, offset, 0);
6792                         if (ret < 0)
6793                                 error("add_tree_backref failed: %s",
6794                                       strerror(-ret));
6795                         break;
6796                 case BTRFS_SHARED_BLOCK_REF_KEY:
6797                         ret = add_tree_backref(extent_cache, key.objectid,
6798                                         offset, 0, 0);
6799                         if (ret < 0)
6800                                 error("add_tree_backref failed: %s",
6801                                       strerror(-ret));
6802                         break;
6803                 case BTRFS_EXTENT_DATA_REF_KEY:
6804                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6805                         add_data_backref(extent_cache, key.objectid, 0,
6806                                         btrfs_extent_data_ref_root(eb, dref),
6807                                         btrfs_extent_data_ref_objectid(eb,
6808                                                                        dref),
6809                                         btrfs_extent_data_ref_offset(eb, dref),
6810                                         btrfs_extent_data_ref_count(eb, dref),
6811                                         0, num_bytes);
6812                         break;
6813                 case BTRFS_SHARED_DATA_REF_KEY:
6814                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6815                         add_data_backref(extent_cache, key.objectid, offset,
6816                                         0, 0, 0,
6817                                         btrfs_shared_data_ref_count(eb, sref),
6818                                         0, num_bytes);
6819                         break;
6820                 default:
6821                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6822                                 key.objectid, key.type, num_bytes);
6823                         goto out;
6824                 }
6825                 ptr += btrfs_extent_inline_ref_size(type);
6826         }
6827         WARN_ON(ptr > end);
6828 out:
6829         return 0;
6830 }
6831
6832 static int check_cache_range(struct btrfs_root *root,
6833                              struct btrfs_block_group_cache *cache,
6834                              u64 offset, u64 bytes)
6835 {
6836         struct btrfs_free_space *entry;
6837         u64 *logical;
6838         u64 bytenr;
6839         int stripe_len;
6840         int i, nr, ret;
6841
6842         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6843                 bytenr = btrfs_sb_offset(i);
6844                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6845                                        cache->key.objectid, bytenr, 0,
6846                                        &logical, &nr, &stripe_len);
6847                 if (ret)
6848                         return ret;
6849
6850                 while (nr--) {
6851                         if (logical[nr] + stripe_len <= offset)
6852                                 continue;
6853                         if (offset + bytes <= logical[nr])
6854                                 continue;
6855                         if (logical[nr] == offset) {
6856                                 if (stripe_len >= bytes) {
6857                                         free(logical);
6858                                         return 0;
6859                                 }
6860                                 bytes -= stripe_len;
6861                                 offset += stripe_len;
6862                         } else if (logical[nr] < offset) {
6863                                 if (logical[nr] + stripe_len >=
6864                                     offset + bytes) {
6865                                         free(logical);
6866                                         return 0;
6867                                 }
6868                                 bytes = (offset + bytes) -
6869                                         (logical[nr] + stripe_len);
6870                                 offset = logical[nr] + stripe_len;
6871                         } else {
6872                                 /*
6873                                  * Could be tricky, the super may land in the
6874                                  * middle of the area we're checking.  First
6875                                  * check the easiest case, it's at the end.
6876                                  */
6877                                 if (logical[nr] + stripe_len >=
6878                                     bytes + offset) {
6879                                         bytes = logical[nr] - offset;
6880                                         continue;
6881                                 }
6882
6883                                 /* Check the left side */
6884                                 ret = check_cache_range(root, cache,
6885                                                         offset,
6886                                                         logical[nr] - offset);
6887                                 if (ret) {
6888                                         free(logical);
6889                                         return ret;
6890                                 }
6891
6892                                 /* Now we continue with the right side */
6893                                 bytes = (offset + bytes) -
6894                                         (logical[nr] + stripe_len);
6895                                 offset = logical[nr] + stripe_len;
6896                         }
6897                 }
6898
6899                 free(logical);
6900         }
6901
6902         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6903         if (!entry) {
6904                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6905                         offset, offset+bytes);
6906                 return -EINVAL;
6907         }
6908
6909         if (entry->offset != offset) {
6910                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6911                         entry->offset);
6912                 return -EINVAL;
6913         }
6914
6915         if (entry->bytes != bytes) {
6916                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6917                         bytes, entry->bytes, offset);
6918                 return -EINVAL;
6919         }
6920
6921         unlink_free_space(cache->free_space_ctl, entry);
6922         free(entry);
6923         return 0;
6924 }
6925
6926 static int verify_space_cache(struct btrfs_root *root,
6927                               struct btrfs_block_group_cache *cache)
6928 {
6929         struct btrfs_path path;
6930         struct extent_buffer *leaf;
6931         struct btrfs_key key;
6932         u64 last;
6933         int ret = 0;
6934
6935         root = root->fs_info->extent_root;
6936
6937         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6938
6939         btrfs_init_path(&path);
6940         key.objectid = last;
6941         key.offset = 0;
6942         key.type = BTRFS_EXTENT_ITEM_KEY;
6943         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6944         if (ret < 0)
6945                 goto out;
6946         ret = 0;
6947         while (1) {
6948                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6949                         ret = btrfs_next_leaf(root, &path);
6950                         if (ret < 0)
6951                                 goto out;
6952                         if (ret > 0) {
6953                                 ret = 0;
6954                                 break;
6955                         }
6956                 }
6957                 leaf = path.nodes[0];
6958                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6959                 if (key.objectid >= cache->key.offset + cache->key.objectid)
6960                         break;
6961                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6962                     key.type != BTRFS_METADATA_ITEM_KEY) {
6963                         path.slots[0]++;
6964                         continue;
6965                 }
6966
6967                 if (last == key.objectid) {
6968                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
6969                                 last = key.objectid + key.offset;
6970                         else
6971                                 last = key.objectid + root->nodesize;
6972                         path.slots[0]++;
6973                         continue;
6974                 }
6975
6976                 ret = check_cache_range(root, cache, last,
6977                                         key.objectid - last);
6978                 if (ret)
6979                         break;
6980                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6981                         last = key.objectid + key.offset;
6982                 else
6983                         last = key.objectid + root->nodesize;
6984                 path.slots[0]++;
6985         }
6986
6987         if (last < cache->key.objectid + cache->key.offset)
6988                 ret = check_cache_range(root, cache, last,
6989                                         cache->key.objectid +
6990                                         cache->key.offset - last);
6991
6992 out:
6993         btrfs_release_path(&path);
6994
6995         if (!ret &&
6996             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
6997                 fprintf(stderr, "There are still entries left in the space "
6998                         "cache\n");
6999                 ret = -EINVAL;
7000         }
7001
7002         return ret;
7003 }
7004
7005 static int check_space_cache(struct btrfs_root *root)
7006 {
7007         struct btrfs_block_group_cache *cache;
7008         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7009         int ret;
7010         int error = 0;
7011
7012         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7013             btrfs_super_generation(root->fs_info->super_copy) !=
7014             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7015                 printf("cache and super generation don't match, space cache "
7016                        "will be invalidated\n");
7017                 return 0;
7018         }
7019
7020         if (ctx.progress_enabled) {
7021                 ctx.tp = TASK_FREE_SPACE;
7022                 task_start(ctx.info);
7023         }
7024
7025         while (1) {
7026                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7027                 if (!cache)
7028                         break;
7029
7030                 start = cache->key.objectid + cache->key.offset;
7031                 if (!cache->free_space_ctl) {
7032                         if (btrfs_init_free_space_ctl(cache,
7033                                                       root->sectorsize)) {
7034                                 ret = -ENOMEM;
7035                                 break;
7036                         }
7037                 } else {
7038                         btrfs_remove_free_space_cache(cache);
7039                 }
7040
7041                 if (btrfs_fs_compat_ro(root->fs_info,
7042                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
7043                         ret = exclude_super_stripes(root, cache);
7044                         if (ret) {
7045                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7046                                         strerror(-ret));
7047                                 error++;
7048                                 continue;
7049                         }
7050                         ret = load_free_space_tree(root->fs_info, cache);
7051                         free_excluded_extents(root, cache);
7052                         if (ret < 0) {
7053                                 fprintf(stderr, "could not load free space tree: %s\n",
7054                                         strerror(-ret));
7055                                 error++;
7056                                 continue;
7057                         }
7058                         error += ret;
7059                 } else {
7060                         ret = load_free_space_cache(root->fs_info, cache);
7061                         if (!ret)
7062                                 continue;
7063                 }
7064
7065                 ret = verify_space_cache(root, cache);
7066                 if (ret) {
7067                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7068                                 cache->key.objectid);
7069                         error++;
7070                 }
7071         }
7072
7073         task_stop(ctx.info);
7074
7075         return error ? -EINVAL : 0;
7076 }
7077
7078 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7079                         u64 num_bytes, unsigned long leaf_offset,
7080                         struct extent_buffer *eb) {
7081
7082         u64 offset = 0;
7083         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7084         char *data;
7085         unsigned long csum_offset;
7086         u32 csum;
7087         u32 csum_expected;
7088         u64 read_len;
7089         u64 data_checked = 0;
7090         u64 tmp;
7091         int ret = 0;
7092         int mirror;
7093         int num_copies;
7094
7095         if (num_bytes % root->sectorsize)
7096                 return -EINVAL;
7097
7098         data = malloc(num_bytes);
7099         if (!data)
7100                 return -ENOMEM;
7101
7102         while (offset < num_bytes) {
7103                 mirror = 0;
7104 again:
7105                 read_len = num_bytes - offset;
7106                 /* read as much space once a time */
7107                 ret = read_extent_data(root, data + offset,
7108                                 bytenr + offset, &read_len, mirror);
7109                 if (ret)
7110                         goto out;
7111                 data_checked = 0;
7112                 /* verify every 4k data's checksum */
7113                 while (data_checked < read_len) {
7114                         csum = ~(u32)0;
7115                         tmp = offset + data_checked;
7116
7117                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
7118                                                csum, root->sectorsize);
7119                         btrfs_csum_final(csum, (u8 *)&csum);
7120
7121                         csum_offset = leaf_offset +
7122                                  tmp / root->sectorsize * csum_size;
7123                         read_extent_buffer(eb, (char *)&csum_expected,
7124                                            csum_offset, csum_size);
7125                         /* try another mirror */
7126                         if (csum != csum_expected) {
7127                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7128                                                 mirror, bytenr + tmp,
7129                                                 csum, csum_expected);
7130                                 num_copies = btrfs_num_copies(
7131                                                 &root->fs_info->mapping_tree,
7132                                                 bytenr, num_bytes);
7133                                 if (mirror < num_copies - 1) {
7134                                         mirror += 1;
7135                                         goto again;
7136                                 }
7137                         }
7138                         data_checked += root->sectorsize;
7139                 }
7140                 offset += read_len;
7141         }
7142 out:
7143         free(data);
7144         return ret;
7145 }
7146
7147 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7148                                u64 num_bytes)
7149 {
7150         struct btrfs_path path;
7151         struct extent_buffer *leaf;
7152         struct btrfs_key key;
7153         int ret;
7154
7155         btrfs_init_path(&path);
7156         key.objectid = bytenr;
7157         key.type = BTRFS_EXTENT_ITEM_KEY;
7158         key.offset = (u64)-1;
7159
7160 again:
7161         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7162                                 0, 0);
7163         if (ret < 0) {
7164                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7165                 btrfs_release_path(&path);
7166                 return ret;
7167         } else if (ret) {
7168                 if (path.slots[0] > 0) {
7169                         path.slots[0]--;
7170                 } else {
7171                         ret = btrfs_prev_leaf(root, &path);
7172                         if (ret < 0) {
7173                                 goto out;
7174                         } else if (ret > 0) {
7175                                 ret = 0;
7176                                 goto out;
7177                         }
7178                 }
7179         }
7180
7181         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7182
7183         /*
7184          * Block group items come before extent items if they have the same
7185          * bytenr, so walk back one more just in case.  Dear future traveller,
7186          * first congrats on mastering time travel.  Now if it's not too much
7187          * trouble could you go back to 2006 and tell Chris to make the
7188          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7189          * EXTENT_ITEM_KEY please?
7190          */
7191         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7192                 if (path.slots[0] > 0) {
7193                         path.slots[0]--;
7194                 } else {
7195                         ret = btrfs_prev_leaf(root, &path);
7196                         if (ret < 0) {
7197                                 goto out;
7198                         } else if (ret > 0) {
7199                                 ret = 0;
7200                                 goto out;
7201                         }
7202                 }
7203                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7204         }
7205
7206         while (num_bytes) {
7207                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7208                         ret = btrfs_next_leaf(root, &path);
7209                         if (ret < 0) {
7210                                 fprintf(stderr, "Error going to next leaf "
7211                                         "%d\n", ret);
7212                                 btrfs_release_path(&path);
7213                                 return ret;
7214                         } else if (ret) {
7215                                 break;
7216                         }
7217                 }
7218                 leaf = path.nodes[0];
7219                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7220                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7221                         path.slots[0]++;
7222                         continue;
7223                 }
7224                 if (key.objectid + key.offset < bytenr) {
7225                         path.slots[0]++;
7226                         continue;
7227                 }
7228                 if (key.objectid > bytenr + num_bytes)
7229                         break;
7230
7231                 if (key.objectid == bytenr) {
7232                         if (key.offset >= num_bytes) {
7233                                 num_bytes = 0;
7234                                 break;
7235                         }
7236                         num_bytes -= key.offset;
7237                         bytenr += key.offset;
7238                 } else if (key.objectid < bytenr) {
7239                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7240                                 num_bytes = 0;
7241                                 break;
7242                         }
7243                         num_bytes = (bytenr + num_bytes) -
7244                                 (key.objectid + key.offset);
7245                         bytenr = key.objectid + key.offset;
7246                 } else {
7247                         if (key.objectid + key.offset < bytenr + num_bytes) {
7248                                 u64 new_start = key.objectid + key.offset;
7249                                 u64 new_bytes = bytenr + num_bytes - new_start;
7250
7251                                 /*
7252                                  * Weird case, the extent is in the middle of
7253                                  * our range, we'll have to search one side
7254                                  * and then the other.  Not sure if this happens
7255                                  * in real life, but no harm in coding it up
7256                                  * anyway just in case.
7257                                  */
7258                                 btrfs_release_path(&path);
7259                                 ret = check_extent_exists(root, new_start,
7260                                                           new_bytes);
7261                                 if (ret) {
7262                                         fprintf(stderr, "Right section didn't "
7263                                                 "have a record\n");
7264                                         break;
7265                                 }
7266                                 num_bytes = key.objectid - bytenr;
7267                                 goto again;
7268                         }
7269                         num_bytes = key.objectid - bytenr;
7270                 }
7271                 path.slots[0]++;
7272         }
7273         ret = 0;
7274
7275 out:
7276         if (num_bytes && !ret) {
7277                 fprintf(stderr, "There are no extents for csum range "
7278                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7279                 ret = 1;
7280         }
7281
7282         btrfs_release_path(&path);
7283         return ret;
7284 }
7285
7286 static int check_csums(struct btrfs_root *root)
7287 {
7288         struct btrfs_path path;
7289         struct extent_buffer *leaf;
7290         struct btrfs_key key;
7291         u64 offset = 0, num_bytes = 0;
7292         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7293         int errors = 0;
7294         int ret;
7295         u64 data_len;
7296         unsigned long leaf_offset;
7297
7298         root = root->fs_info->csum_root;
7299         if (!extent_buffer_uptodate(root->node)) {
7300                 fprintf(stderr, "No valid csum tree found\n");
7301                 return -ENOENT;
7302         }
7303
7304         btrfs_init_path(&path);
7305         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7306         key.type = BTRFS_EXTENT_CSUM_KEY;
7307         key.offset = 0;
7308         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7309         if (ret < 0) {
7310                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7311                 btrfs_release_path(&path);
7312                 return ret;
7313         }
7314
7315         if (ret > 0 && path.slots[0])
7316                 path.slots[0]--;
7317         ret = 0;
7318
7319         while (1) {
7320                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7321                         ret = btrfs_next_leaf(root, &path);
7322                         if (ret < 0) {
7323                                 fprintf(stderr, "Error going to next leaf "
7324                                         "%d\n", ret);
7325                                 break;
7326                         }
7327                         if (ret)
7328                                 break;
7329                 }
7330                 leaf = path.nodes[0];
7331
7332                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7333                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7334                         path.slots[0]++;
7335                         continue;
7336                 }
7337
7338                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7339                               csum_size) * root->sectorsize;
7340                 if (!check_data_csum)
7341                         goto skip_csum_check;
7342                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7343                 ret = check_extent_csums(root, key.offset, data_len,
7344                                          leaf_offset, leaf);
7345                 if (ret)
7346                         break;
7347 skip_csum_check:
7348                 if (!num_bytes) {
7349                         offset = key.offset;
7350                 } else if (key.offset != offset + num_bytes) {
7351                         ret = check_extent_exists(root, offset, num_bytes);
7352                         if (ret) {
7353                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7354                                         "there is no extent record\n",
7355                                         offset, offset+num_bytes);
7356                                 errors++;
7357                         }
7358                         offset = key.offset;
7359                         num_bytes = 0;
7360                 }
7361                 num_bytes += data_len;
7362                 path.slots[0]++;
7363         }
7364
7365         btrfs_release_path(&path);
7366         return errors;
7367 }
7368
7369 static int is_dropped_key(struct btrfs_key *key,
7370                           struct btrfs_key *drop_key) {
7371         if (key->objectid < drop_key->objectid)
7372                 return 1;
7373         else if (key->objectid == drop_key->objectid) {
7374                 if (key->type < drop_key->type)
7375                         return 1;
7376                 else if (key->type == drop_key->type) {
7377                         if (key->offset < drop_key->offset)
7378                                 return 1;
7379                 }
7380         }
7381         return 0;
7382 }
7383
7384 /*
7385  * Here are the rules for FULL_BACKREF.
7386  *
7387  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7388  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7389  *      FULL_BACKREF set.
7390  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7391  *    if it happened after the relocation occurred since we'll have dropped the
7392  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7393  *    have no real way to know for sure.
7394  *
7395  * We process the blocks one root at a time, and we start from the lowest root
7396  * objectid and go to the highest.  So we can just lookup the owner backref for
7397  * the record and if we don't find it then we know it doesn't exist and we have
7398  * a FULL BACKREF.
7399  *
7400  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7401  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7402  * be set or not and then we can check later once we've gathered all the refs.
7403  */
7404 static int calc_extent_flag(struct btrfs_root *root,
7405                            struct cache_tree *extent_cache,
7406                            struct extent_buffer *buf,
7407                            struct root_item_record *ri,
7408                            u64 *flags)
7409 {
7410         struct extent_record *rec;
7411         struct cache_extent *cache;
7412         struct tree_backref *tback;
7413         u64 owner = 0;
7414
7415         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7416         /* we have added this extent before */
7417         if (!cache)
7418                 return -ENOENT;
7419
7420         rec = container_of(cache, struct extent_record, cache);
7421
7422         /*
7423          * Except file/reloc tree, we can not have
7424          * FULL BACKREF MODE
7425          */
7426         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7427                 goto normal;
7428         /*
7429          * root node
7430          */
7431         if (buf->start == ri->bytenr)
7432                 goto normal;
7433
7434         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7435                 goto full_backref;
7436
7437         owner = btrfs_header_owner(buf);
7438         if (owner == ri->objectid)
7439                 goto normal;
7440
7441         tback = find_tree_backref(rec, 0, owner);
7442         if (!tback)
7443                 goto full_backref;
7444 normal:
7445         *flags = 0;
7446         if (rec->flag_block_full_backref != FLAG_UNSET &&
7447             rec->flag_block_full_backref != 0)
7448                 rec->bad_full_backref = 1;
7449         return 0;
7450 full_backref:
7451         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7452         if (rec->flag_block_full_backref != FLAG_UNSET &&
7453             rec->flag_block_full_backref != 1)
7454                 rec->bad_full_backref = 1;
7455         return 0;
7456 }
7457
7458 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7459 {
7460         fprintf(stderr, "Invalid key type(");
7461         print_key_type(stderr, 0, key_type);
7462         fprintf(stderr, ") found in root(");
7463         print_objectid(stderr, rootid, 0);
7464         fprintf(stderr, ")\n");
7465 }
7466
7467 /*
7468  * Check if the key is valid with its extent buffer.
7469  *
7470  * This is a early check in case invalid key exists in a extent buffer
7471  * This is not comprehensive yet, but should prevent wrong key/item passed
7472  * further
7473  */
7474 static int check_type_with_root(u64 rootid, u8 key_type)
7475 {
7476         switch (key_type) {
7477         /* Only valid in chunk tree */
7478         case BTRFS_DEV_ITEM_KEY:
7479         case BTRFS_CHUNK_ITEM_KEY:
7480                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7481                         goto err;
7482                 break;
7483         /* valid in csum and log tree */
7484         case BTRFS_CSUM_TREE_OBJECTID:
7485                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7486                       is_fstree(rootid)))
7487                         goto err;
7488                 break;
7489         case BTRFS_EXTENT_ITEM_KEY:
7490         case BTRFS_METADATA_ITEM_KEY:
7491         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7492                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7493                         goto err;
7494                 break;
7495         case BTRFS_ROOT_ITEM_KEY:
7496                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7497                         goto err;
7498                 break;
7499         case BTRFS_DEV_EXTENT_KEY:
7500                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7501                         goto err;
7502                 break;
7503         }
7504         return 0;
7505 err:
7506         report_mismatch_key_root(key_type, rootid);
7507         return -EINVAL;
7508 }
7509
7510 static int run_next_block(struct btrfs_root *root,
7511                           struct block_info *bits,
7512                           int bits_nr,
7513                           u64 *last,
7514                           struct cache_tree *pending,
7515                           struct cache_tree *seen,
7516                           struct cache_tree *reada,
7517                           struct cache_tree *nodes,
7518                           struct cache_tree *extent_cache,
7519                           struct cache_tree *chunk_cache,
7520                           struct rb_root *dev_cache,
7521                           struct block_group_tree *block_group_cache,
7522                           struct device_extent_tree *dev_extent_cache,
7523                           struct root_item_record *ri)
7524 {
7525         struct extent_buffer *buf;
7526         struct extent_record *rec = NULL;
7527         u64 bytenr;
7528         u32 size;
7529         u64 parent;
7530         u64 owner;
7531         u64 flags;
7532         u64 ptr;
7533         u64 gen = 0;
7534         int ret = 0;
7535         int i;
7536         int nritems;
7537         struct btrfs_key key;
7538         struct cache_extent *cache;
7539         int reada_bits;
7540
7541         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7542                                     bits_nr, &reada_bits);
7543         if (nritems == 0)
7544                 return 1;
7545
7546         if (!reada_bits) {
7547                 for(i = 0; i < nritems; i++) {
7548                         ret = add_cache_extent(reada, bits[i].start,
7549                                                bits[i].size);
7550                         if (ret == -EEXIST)
7551                                 continue;
7552
7553                         /* fixme, get the parent transid */
7554                         readahead_tree_block(root, bits[i].start,
7555                                              bits[i].size, 0);
7556                 }
7557         }
7558         *last = bits[0].start;
7559         bytenr = bits[0].start;
7560         size = bits[0].size;
7561
7562         cache = lookup_cache_extent(pending, bytenr, size);
7563         if (cache) {
7564                 remove_cache_extent(pending, cache);
7565                 free(cache);
7566         }
7567         cache = lookup_cache_extent(reada, bytenr, size);
7568         if (cache) {
7569                 remove_cache_extent(reada, cache);
7570                 free(cache);
7571         }
7572         cache = lookup_cache_extent(nodes, bytenr, size);
7573         if (cache) {
7574                 remove_cache_extent(nodes, cache);
7575                 free(cache);
7576         }
7577         cache = lookup_cache_extent(extent_cache, bytenr, size);
7578         if (cache) {
7579                 rec = container_of(cache, struct extent_record, cache);
7580                 gen = rec->parent_generation;
7581         }
7582
7583         /* fixme, get the real parent transid */
7584         buf = read_tree_block(root, bytenr, size, gen);
7585         if (!extent_buffer_uptodate(buf)) {
7586                 record_bad_block_io(root->fs_info,
7587                                     extent_cache, bytenr, size);
7588                 goto out;
7589         }
7590
7591         nritems = btrfs_header_nritems(buf);
7592
7593         flags = 0;
7594         if (!init_extent_tree) {
7595                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7596                                        btrfs_header_level(buf), 1, NULL,
7597                                        &flags);
7598                 if (ret < 0) {
7599                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7600                         if (ret < 0) {
7601                                 fprintf(stderr, "Couldn't calc extent flags\n");
7602                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7603                         }
7604                 }
7605         } else {
7606                 flags = 0;
7607                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7608                 if (ret < 0) {
7609                         fprintf(stderr, "Couldn't calc extent flags\n");
7610                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7611                 }
7612         }
7613
7614         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7615                 if (ri != NULL &&
7616                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7617                     ri->objectid == btrfs_header_owner(buf)) {
7618                         /*
7619                          * Ok we got to this block from it's original owner and
7620                          * we have FULL_BACKREF set.  Relocation can leave
7621                          * converted blocks over so this is altogether possible,
7622                          * however it's not possible if the generation > the
7623                          * last snapshot, so check for this case.
7624                          */
7625                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7626                             btrfs_header_generation(buf) > ri->last_snapshot) {
7627                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7628                                 rec->bad_full_backref = 1;
7629                         }
7630                 }
7631         } else {
7632                 if (ri != NULL &&
7633                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7634                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7635                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7636                         rec->bad_full_backref = 1;
7637                 }
7638         }
7639
7640         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7641                 rec->flag_block_full_backref = 1;
7642                 parent = bytenr;
7643                 owner = 0;
7644         } else {
7645                 rec->flag_block_full_backref = 0;
7646                 parent = 0;
7647                 owner = btrfs_header_owner(buf);
7648         }
7649
7650         ret = check_block(root, extent_cache, buf, flags);
7651         if (ret)
7652                 goto out;
7653
7654         if (btrfs_is_leaf(buf)) {
7655                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7656                 for (i = 0; i < nritems; i++) {
7657                         struct btrfs_file_extent_item *fi;
7658                         btrfs_item_key_to_cpu(buf, &key, i);
7659                         /*
7660                          * Check key type against the leaf owner.
7661                          * Could filter quite a lot of early error if
7662                          * owner is correct
7663                          */
7664                         if (check_type_with_root(btrfs_header_owner(buf),
7665                                                  key.type)) {
7666                                 fprintf(stderr, "ignoring invalid key\n");
7667                                 continue;
7668                         }
7669                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7670                                 process_extent_item(root, extent_cache, buf,
7671                                                     i);
7672                                 continue;
7673                         }
7674                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7675                                 process_extent_item(root, extent_cache, buf,
7676                                                     i);
7677                                 continue;
7678                         }
7679                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7680                                 total_csum_bytes +=
7681                                         btrfs_item_size_nr(buf, i);
7682                                 continue;
7683                         }
7684                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7685                                 process_chunk_item(chunk_cache, &key, buf, i);
7686                                 continue;
7687                         }
7688                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7689                                 process_device_item(dev_cache, &key, buf, i);
7690                                 continue;
7691                         }
7692                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7693                                 process_block_group_item(block_group_cache,
7694                                         &key, buf, i);
7695                                 continue;
7696                         }
7697                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7698                                 process_device_extent_item(dev_extent_cache,
7699                                         &key, buf, i);
7700                                 continue;
7701
7702                         }
7703                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7704 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7705                                 process_extent_ref_v0(extent_cache, buf, i);
7706 #else
7707                                 BUG();
7708 #endif
7709                                 continue;
7710                         }
7711
7712                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7713                                 ret = add_tree_backref(extent_cache,
7714                                                 key.objectid, 0, key.offset, 0);
7715                                 if (ret < 0)
7716                                         error("add_tree_backref failed: %s",
7717                                               strerror(-ret));
7718                                 continue;
7719                         }
7720                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7721                                 ret = add_tree_backref(extent_cache,
7722                                                 key.objectid, key.offset, 0, 0);
7723                                 if (ret < 0)
7724                                         error("add_tree_backref failed: %s",
7725                                               strerror(-ret));
7726                                 continue;
7727                         }
7728                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7729                                 struct btrfs_extent_data_ref *ref;
7730                                 ref = btrfs_item_ptr(buf, i,
7731                                                 struct btrfs_extent_data_ref);
7732                                 add_data_backref(extent_cache,
7733                                         key.objectid, 0,
7734                                         btrfs_extent_data_ref_root(buf, ref),
7735                                         btrfs_extent_data_ref_objectid(buf,
7736                                                                        ref),
7737                                         btrfs_extent_data_ref_offset(buf, ref),
7738                                         btrfs_extent_data_ref_count(buf, ref),
7739                                         0, root->sectorsize);
7740                                 continue;
7741                         }
7742                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7743                                 struct btrfs_shared_data_ref *ref;
7744                                 ref = btrfs_item_ptr(buf, i,
7745                                                 struct btrfs_shared_data_ref);
7746                                 add_data_backref(extent_cache,
7747                                         key.objectid, key.offset, 0, 0, 0,
7748                                         btrfs_shared_data_ref_count(buf, ref),
7749                                         0, root->sectorsize);
7750                                 continue;
7751                         }
7752                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7753                                 struct bad_item *bad;
7754
7755                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7756                                         continue;
7757                                 if (!owner)
7758                                         continue;
7759                                 bad = malloc(sizeof(struct bad_item));
7760                                 if (!bad)
7761                                         continue;
7762                                 INIT_LIST_HEAD(&bad->list);
7763                                 memcpy(&bad->key, &key,
7764                                        sizeof(struct btrfs_key));
7765                                 bad->root_id = owner;
7766                                 list_add_tail(&bad->list, &delete_items);
7767                                 continue;
7768                         }
7769                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7770                                 continue;
7771                         fi = btrfs_item_ptr(buf, i,
7772                                             struct btrfs_file_extent_item);
7773                         if (btrfs_file_extent_type(buf, fi) ==
7774                             BTRFS_FILE_EXTENT_INLINE)
7775                                 continue;
7776                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7777                                 continue;
7778
7779                         data_bytes_allocated +=
7780                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7781                         if (data_bytes_allocated < root->sectorsize) {
7782                                 abort();
7783                         }
7784                         data_bytes_referenced +=
7785                                 btrfs_file_extent_num_bytes(buf, fi);
7786                         add_data_backref(extent_cache,
7787                                 btrfs_file_extent_disk_bytenr(buf, fi),
7788                                 parent, owner, key.objectid, key.offset -
7789                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7790                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7791                 }
7792         } else {
7793                 int level;
7794                 struct btrfs_key first_key;
7795
7796                 first_key.objectid = 0;
7797
7798                 if (nritems > 0)
7799                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7800                 level = btrfs_header_level(buf);
7801                 for (i = 0; i < nritems; i++) {
7802                         struct extent_record tmpl;
7803
7804                         ptr = btrfs_node_blockptr(buf, i);
7805                         size = root->nodesize;
7806                         btrfs_node_key_to_cpu(buf, &key, i);
7807                         if (ri != NULL) {
7808                                 if ((level == ri->drop_level)
7809                                     && is_dropped_key(&key, &ri->drop_key)) {
7810                                         continue;
7811                                 }
7812                         }
7813
7814                         memset(&tmpl, 0, sizeof(tmpl));
7815                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7816                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7817                         tmpl.start = ptr;
7818                         tmpl.nr = size;
7819                         tmpl.refs = 1;
7820                         tmpl.metadata = 1;
7821                         tmpl.max_size = size;
7822                         ret = add_extent_rec(extent_cache, &tmpl);
7823                         if (ret < 0)
7824                                 goto out;
7825
7826                         ret = add_tree_backref(extent_cache, ptr, parent,
7827                                         owner, 1);
7828                         if (ret < 0) {
7829                                 error("add_tree_backref failed: %s",
7830                                       strerror(-ret));
7831                                 continue;
7832                         }
7833
7834                         if (level > 1) {
7835                                 add_pending(nodes, seen, ptr, size);
7836                         } else {
7837                                 add_pending(pending, seen, ptr, size);
7838                         }
7839                 }
7840                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7841                                       nritems) * sizeof(struct btrfs_key_ptr);
7842         }
7843         total_btree_bytes += buf->len;
7844         if (fs_root_objectid(btrfs_header_owner(buf)))
7845                 total_fs_tree_bytes += buf->len;
7846         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7847                 total_extent_tree_bytes += buf->len;
7848         if (!found_old_backref &&
7849             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7850             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7851             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7852                 found_old_backref = 1;
7853 out:
7854         free_extent_buffer(buf);
7855         return ret;
7856 }
7857
7858 static int add_root_to_pending(struct extent_buffer *buf,
7859                                struct cache_tree *extent_cache,
7860                                struct cache_tree *pending,
7861                                struct cache_tree *seen,
7862                                struct cache_tree *nodes,
7863                                u64 objectid)
7864 {
7865         struct extent_record tmpl;
7866         int ret;
7867
7868         if (btrfs_header_level(buf) > 0)
7869                 add_pending(nodes, seen, buf->start, buf->len);
7870         else
7871                 add_pending(pending, seen, buf->start, buf->len);
7872
7873         memset(&tmpl, 0, sizeof(tmpl));
7874         tmpl.start = buf->start;
7875         tmpl.nr = buf->len;
7876         tmpl.is_root = 1;
7877         tmpl.refs = 1;
7878         tmpl.metadata = 1;
7879         tmpl.max_size = buf->len;
7880         add_extent_rec(extent_cache, &tmpl);
7881
7882         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7883             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7884                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7885                                 0, 1);
7886         else
7887                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7888                                 1);
7889         return ret;
7890 }
7891
7892 /* as we fix the tree, we might be deleting blocks that
7893  * we're tracking for repair.  This hook makes sure we
7894  * remove any backrefs for blocks as we are fixing them.
7895  */
7896 static int free_extent_hook(struct btrfs_trans_handle *trans,
7897                             struct btrfs_root *root,
7898                             u64 bytenr, u64 num_bytes, u64 parent,
7899                             u64 root_objectid, u64 owner, u64 offset,
7900                             int refs_to_drop)
7901 {
7902         struct extent_record *rec;
7903         struct cache_extent *cache;
7904         int is_data;
7905         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7906
7907         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7908         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7909         if (!cache)
7910                 return 0;
7911
7912         rec = container_of(cache, struct extent_record, cache);
7913         if (is_data) {
7914                 struct data_backref *back;
7915                 back = find_data_backref(rec, parent, root_objectid, owner,
7916                                          offset, 1, bytenr, num_bytes);
7917                 if (!back)
7918                         goto out;
7919                 if (back->node.found_ref) {
7920                         back->found_ref -= refs_to_drop;
7921                         if (rec->refs)
7922                                 rec->refs -= refs_to_drop;
7923                 }
7924                 if (back->node.found_extent_tree) {
7925                         back->num_refs -= refs_to_drop;
7926                         if (rec->extent_item_refs)
7927                                 rec->extent_item_refs -= refs_to_drop;
7928                 }
7929                 if (back->found_ref == 0)
7930                         back->node.found_ref = 0;
7931                 if (back->num_refs == 0)
7932                         back->node.found_extent_tree = 0;
7933
7934                 if (!back->node.found_extent_tree && back->node.found_ref) {
7935                         list_del(&back->node.list);
7936                         free(back);
7937                 }
7938         } else {
7939                 struct tree_backref *back;
7940                 back = find_tree_backref(rec, parent, root_objectid);
7941                 if (!back)
7942                         goto out;
7943                 if (back->node.found_ref) {
7944                         if (rec->refs)
7945                                 rec->refs--;
7946                         back->node.found_ref = 0;
7947                 }
7948                 if (back->node.found_extent_tree) {
7949                         if (rec->extent_item_refs)
7950                                 rec->extent_item_refs--;
7951                         back->node.found_extent_tree = 0;
7952                 }
7953                 if (!back->node.found_extent_tree && back->node.found_ref) {
7954                         list_del(&back->node.list);
7955                         free(back);
7956                 }
7957         }
7958         maybe_free_extent_rec(extent_cache, rec);
7959 out:
7960         return 0;
7961 }
7962
7963 static int delete_extent_records(struct btrfs_trans_handle *trans,
7964                                  struct btrfs_root *root,
7965                                  struct btrfs_path *path,
7966                                  u64 bytenr, u64 new_len)
7967 {
7968         struct btrfs_key key;
7969         struct btrfs_key found_key;
7970         struct extent_buffer *leaf;
7971         int ret;
7972         int slot;
7973
7974
7975         key.objectid = bytenr;
7976         key.type = (u8)-1;
7977         key.offset = (u64)-1;
7978
7979         while(1) {
7980                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7981                                         &key, path, 0, 1);
7982                 if (ret < 0)
7983                         break;
7984
7985                 if (ret > 0) {
7986                         ret = 0;
7987                         if (path->slots[0] == 0)
7988                                 break;
7989                         path->slots[0]--;
7990                 }
7991                 ret = 0;
7992
7993                 leaf = path->nodes[0];
7994                 slot = path->slots[0];
7995
7996                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7997                 if (found_key.objectid != bytenr)
7998                         break;
7999
8000                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8001                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8002                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8003                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8004                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8005                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8006                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8007                         btrfs_release_path(path);
8008                         if (found_key.type == 0) {
8009                                 if (found_key.offset == 0)
8010                                         break;
8011                                 key.offset = found_key.offset - 1;
8012                                 key.type = found_key.type;
8013                         }
8014                         key.type = found_key.type - 1;
8015                         key.offset = (u64)-1;
8016                         continue;
8017                 }
8018
8019                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8020                         found_key.objectid, found_key.type, found_key.offset);
8021
8022                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8023                 if (ret)
8024                         break;
8025                 btrfs_release_path(path);
8026
8027                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8028                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8029                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8030                                 found_key.offset : root->nodesize;
8031
8032                         ret = btrfs_update_block_group(trans, root, bytenr,
8033                                                        bytes, 0, 0);
8034                         if (ret)
8035                                 break;
8036                 }
8037         }
8038
8039         btrfs_release_path(path);
8040         return ret;
8041 }
8042
8043 /*
8044  * for a single backref, this will allocate a new extent
8045  * and add the backref to it.
8046  */
8047 static int record_extent(struct btrfs_trans_handle *trans,
8048                          struct btrfs_fs_info *info,
8049                          struct btrfs_path *path,
8050                          struct extent_record *rec,
8051                          struct extent_backref *back,
8052                          int allocated, u64 flags)
8053 {
8054         int ret;
8055         struct btrfs_root *extent_root = info->extent_root;
8056         struct extent_buffer *leaf;
8057         struct btrfs_key ins_key;
8058         struct btrfs_extent_item *ei;
8059         struct data_backref *dback;
8060         struct btrfs_tree_block_info *bi;
8061
8062         if (!back->is_data)
8063                 rec->max_size = max_t(u64, rec->max_size,
8064                                     info->extent_root->nodesize);
8065
8066         if (!allocated) {
8067                 u32 item_size = sizeof(*ei);
8068
8069                 if (!back->is_data)
8070                         item_size += sizeof(*bi);
8071
8072                 ins_key.objectid = rec->start;
8073                 ins_key.offset = rec->max_size;
8074                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8075
8076                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8077                                         &ins_key, item_size);
8078                 if (ret)
8079                         goto fail;
8080
8081                 leaf = path->nodes[0];
8082                 ei = btrfs_item_ptr(leaf, path->slots[0],
8083                                     struct btrfs_extent_item);
8084
8085                 btrfs_set_extent_refs(leaf, ei, 0);
8086                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8087
8088                 if (back->is_data) {
8089                         btrfs_set_extent_flags(leaf, ei,
8090                                                BTRFS_EXTENT_FLAG_DATA);
8091                 } else {
8092                         struct btrfs_disk_key copy_key;;
8093
8094                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8095                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8096                                              sizeof(*bi));
8097
8098                         btrfs_set_disk_key_objectid(&copy_key,
8099                                                     rec->info_objectid);
8100                         btrfs_set_disk_key_type(&copy_key, 0);
8101                         btrfs_set_disk_key_offset(&copy_key, 0);
8102
8103                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8104                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8105
8106                         btrfs_set_extent_flags(leaf, ei,
8107                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8108                 }
8109
8110                 btrfs_mark_buffer_dirty(leaf);
8111                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8112                                                rec->max_size, 1, 0);
8113                 if (ret)
8114                         goto fail;
8115                 btrfs_release_path(path);
8116         }
8117
8118         if (back->is_data) {
8119                 u64 parent;
8120                 int i;
8121
8122                 dback = to_data_backref(back);
8123                 if (back->full_backref)
8124                         parent = dback->parent;
8125                 else
8126                         parent = 0;
8127
8128                 for (i = 0; i < dback->found_ref; i++) {
8129                         /* if parent != 0, we're doing a full backref
8130                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8131                          * just makes the backref allocator create a data
8132                          * backref
8133                          */
8134                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8135                                                    rec->start, rec->max_size,
8136                                                    parent,
8137                                                    dback->root,
8138                                                    parent ?
8139                                                    BTRFS_FIRST_FREE_OBJECTID :
8140                                                    dback->owner,
8141                                                    dback->offset);
8142                         if (ret)
8143                                 break;
8144                 }
8145                 fprintf(stderr, "adding new data backref"
8146                                 " on %llu %s %llu owner %llu"
8147                                 " offset %llu found %d\n",
8148                                 (unsigned long long)rec->start,
8149                                 back->full_backref ?
8150                                 "parent" : "root",
8151                                 back->full_backref ?
8152                                 (unsigned long long)parent :
8153                                 (unsigned long long)dback->root,
8154                                 (unsigned long long)dback->owner,
8155                                 (unsigned long long)dback->offset,
8156                                 dback->found_ref);
8157         } else {
8158                 u64 parent;
8159                 struct tree_backref *tback;
8160
8161                 tback = to_tree_backref(back);
8162                 if (back->full_backref)
8163                         parent = tback->parent;
8164                 else
8165                         parent = 0;
8166
8167                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8168                                            rec->start, rec->max_size,
8169                                            parent, tback->root, 0, 0);
8170                 fprintf(stderr, "adding new tree backref on "
8171                         "start %llu len %llu parent %llu root %llu\n",
8172                         rec->start, rec->max_size, parent, tback->root);
8173         }
8174 fail:
8175         btrfs_release_path(path);
8176         return ret;
8177 }
8178
8179 static struct extent_entry *find_entry(struct list_head *entries,
8180                                        u64 bytenr, u64 bytes)
8181 {
8182         struct extent_entry *entry = NULL;
8183
8184         list_for_each_entry(entry, entries, list) {
8185                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8186                         return entry;
8187         }
8188
8189         return NULL;
8190 }
8191
8192 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8193 {
8194         struct extent_entry *entry, *best = NULL, *prev = NULL;
8195
8196         list_for_each_entry(entry, entries, list) {
8197                 /*
8198                  * If there are as many broken entries as entries then we know
8199                  * not to trust this particular entry.
8200                  */
8201                 if (entry->broken == entry->count)
8202                         continue;
8203
8204                 /*
8205                  * Special case, when there are only two entries and 'best' is
8206                  * the first one
8207                  */
8208                 if (!prev) {
8209                         best = entry;
8210                         prev = entry;
8211                         continue;
8212                 }
8213
8214                 /*
8215                  * If our current entry == best then we can't be sure our best
8216                  * is really the best, so we need to keep searching.
8217                  */
8218                 if (best && best->count == entry->count) {
8219                         prev = entry;
8220                         best = NULL;
8221                         continue;
8222                 }
8223
8224                 /* Prev == entry, not good enough, have to keep searching */
8225                 if (!prev->broken && prev->count == entry->count)
8226                         continue;
8227
8228                 if (!best)
8229                         best = (prev->count > entry->count) ? prev : entry;
8230                 else if (best->count < entry->count)
8231                         best = entry;
8232                 prev = entry;
8233         }
8234
8235         return best;
8236 }
8237
8238 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8239                       struct data_backref *dback, struct extent_entry *entry)
8240 {
8241         struct btrfs_trans_handle *trans;
8242         struct btrfs_root *root;
8243         struct btrfs_file_extent_item *fi;
8244         struct extent_buffer *leaf;
8245         struct btrfs_key key;
8246         u64 bytenr, bytes;
8247         int ret, err;
8248
8249         key.objectid = dback->root;
8250         key.type = BTRFS_ROOT_ITEM_KEY;
8251         key.offset = (u64)-1;
8252         root = btrfs_read_fs_root(info, &key);
8253         if (IS_ERR(root)) {
8254                 fprintf(stderr, "Couldn't find root for our ref\n");
8255                 return -EINVAL;
8256         }
8257
8258         /*
8259          * The backref points to the original offset of the extent if it was
8260          * split, so we need to search down to the offset we have and then walk
8261          * forward until we find the backref we're looking for.
8262          */
8263         key.objectid = dback->owner;
8264         key.type = BTRFS_EXTENT_DATA_KEY;
8265         key.offset = dback->offset;
8266         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8267         if (ret < 0) {
8268                 fprintf(stderr, "Error looking up ref %d\n", ret);
8269                 return ret;
8270         }
8271
8272         while (1) {
8273                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8274                         ret = btrfs_next_leaf(root, path);
8275                         if (ret) {
8276                                 fprintf(stderr, "Couldn't find our ref, next\n");
8277                                 return -EINVAL;
8278                         }
8279                 }
8280                 leaf = path->nodes[0];
8281                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8282                 if (key.objectid != dback->owner ||
8283                     key.type != BTRFS_EXTENT_DATA_KEY) {
8284                         fprintf(stderr, "Couldn't find our ref, search\n");
8285                         return -EINVAL;
8286                 }
8287                 fi = btrfs_item_ptr(leaf, path->slots[0],
8288                                     struct btrfs_file_extent_item);
8289                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8290                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8291
8292                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8293                         break;
8294                 path->slots[0]++;
8295         }
8296
8297         btrfs_release_path(path);
8298
8299         trans = btrfs_start_transaction(root, 1);
8300         if (IS_ERR(trans))
8301                 return PTR_ERR(trans);
8302
8303         /*
8304          * Ok we have the key of the file extent we want to fix, now we can cow
8305          * down to the thing and fix it.
8306          */
8307         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8308         if (ret < 0) {
8309                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8310                         key.objectid, key.type, key.offset, ret);
8311                 goto out;
8312         }
8313         if (ret > 0) {
8314                 fprintf(stderr, "Well that's odd, we just found this key "
8315                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8316                         key.offset);
8317                 ret = -EINVAL;
8318                 goto out;
8319         }
8320         leaf = path->nodes[0];
8321         fi = btrfs_item_ptr(leaf, path->slots[0],
8322                             struct btrfs_file_extent_item);
8323
8324         if (btrfs_file_extent_compression(leaf, fi) &&
8325             dback->disk_bytenr != entry->bytenr) {
8326                 fprintf(stderr, "Ref doesn't match the record start and is "
8327                         "compressed, please take a btrfs-image of this file "
8328                         "system and send it to a btrfs developer so they can "
8329                         "complete this functionality for bytenr %Lu\n",
8330                         dback->disk_bytenr);
8331                 ret = -EINVAL;
8332                 goto out;
8333         }
8334
8335         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8336                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8337         } else if (dback->disk_bytenr > entry->bytenr) {
8338                 u64 off_diff, offset;
8339
8340                 off_diff = dback->disk_bytenr - entry->bytenr;
8341                 offset = btrfs_file_extent_offset(leaf, fi);
8342                 if (dback->disk_bytenr + offset +
8343                     btrfs_file_extent_num_bytes(leaf, fi) >
8344                     entry->bytenr + entry->bytes) {
8345                         fprintf(stderr, "Ref is past the entry end, please "
8346                                 "take a btrfs-image of this file system and "
8347                                 "send it to a btrfs developer, ref %Lu\n",
8348                                 dback->disk_bytenr);
8349                         ret = -EINVAL;
8350                         goto out;
8351                 }
8352                 offset += off_diff;
8353                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8354                 btrfs_set_file_extent_offset(leaf, fi, offset);
8355         } else if (dback->disk_bytenr < entry->bytenr) {
8356                 u64 offset;
8357
8358                 offset = btrfs_file_extent_offset(leaf, fi);
8359                 if (dback->disk_bytenr + offset < entry->bytenr) {
8360                         fprintf(stderr, "Ref is before the entry start, please"
8361                                 " take a btrfs-image of this file system and "
8362                                 "send it to a btrfs developer, ref %Lu\n",
8363                                 dback->disk_bytenr);
8364                         ret = -EINVAL;
8365                         goto out;
8366                 }
8367
8368                 offset += dback->disk_bytenr;
8369                 offset -= entry->bytenr;
8370                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8371                 btrfs_set_file_extent_offset(leaf, fi, offset);
8372         }
8373
8374         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8375
8376         /*
8377          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8378          * only do this if we aren't using compression, otherwise it's a
8379          * trickier case.
8380          */
8381         if (!btrfs_file_extent_compression(leaf, fi))
8382                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8383         else
8384                 printf("ram bytes may be wrong?\n");
8385         btrfs_mark_buffer_dirty(leaf);
8386 out:
8387         err = btrfs_commit_transaction(trans, root);
8388         btrfs_release_path(path);
8389         return ret ? ret : err;
8390 }
8391
8392 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8393                            struct extent_record *rec)
8394 {
8395         struct extent_backref *back;
8396         struct data_backref *dback;
8397         struct extent_entry *entry, *best = NULL;
8398         LIST_HEAD(entries);
8399         int nr_entries = 0;
8400         int broken_entries = 0;
8401         int ret = 0;
8402         short mismatch = 0;
8403
8404         /*
8405          * Metadata is easy and the backrefs should always agree on bytenr and
8406          * size, if not we've got bigger issues.
8407          */
8408         if (rec->metadata)
8409                 return 0;
8410
8411         list_for_each_entry(back, &rec->backrefs, list) {
8412                 if (back->full_backref || !back->is_data)
8413                         continue;
8414
8415                 dback = to_data_backref(back);
8416
8417                 /*
8418                  * We only pay attention to backrefs that we found a real
8419                  * backref for.
8420                  */
8421                 if (dback->found_ref == 0)
8422                         continue;
8423
8424                 /*
8425                  * For now we only catch when the bytes don't match, not the
8426                  * bytenr.  We can easily do this at the same time, but I want
8427                  * to have a fs image to test on before we just add repair
8428                  * functionality willy-nilly so we know we won't screw up the
8429                  * repair.
8430                  */
8431
8432                 entry = find_entry(&entries, dback->disk_bytenr,
8433                                    dback->bytes);
8434                 if (!entry) {
8435                         entry = malloc(sizeof(struct extent_entry));
8436                         if (!entry) {
8437                                 ret = -ENOMEM;
8438                                 goto out;
8439                         }
8440                         memset(entry, 0, sizeof(*entry));
8441                         entry->bytenr = dback->disk_bytenr;
8442                         entry->bytes = dback->bytes;
8443                         list_add_tail(&entry->list, &entries);
8444                         nr_entries++;
8445                 }
8446
8447                 /*
8448                  * If we only have on entry we may think the entries agree when
8449                  * in reality they don't so we have to do some extra checking.
8450                  */
8451                 if (dback->disk_bytenr != rec->start ||
8452                     dback->bytes != rec->nr || back->broken)
8453                         mismatch = 1;
8454
8455                 if (back->broken) {
8456                         entry->broken++;
8457                         broken_entries++;
8458                 }
8459
8460                 entry->count++;
8461         }
8462
8463         /* Yay all the backrefs agree, carry on good sir */
8464         if (nr_entries <= 1 && !mismatch)
8465                 goto out;
8466
8467         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8468                 "%Lu\n", rec->start);
8469
8470         /*
8471          * First we want to see if the backrefs can agree amongst themselves who
8472          * is right, so figure out which one of the entries has the highest
8473          * count.
8474          */
8475         best = find_most_right_entry(&entries);
8476
8477         /*
8478          * Ok so we may have an even split between what the backrefs think, so
8479          * this is where we use the extent ref to see what it thinks.
8480          */
8481         if (!best) {
8482                 entry = find_entry(&entries, rec->start, rec->nr);
8483                 if (!entry && (!broken_entries || !rec->found_rec)) {
8484                         fprintf(stderr, "Backrefs don't agree with each other "
8485                                 "and extent record doesn't agree with anybody,"
8486                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8487                                 rec->start, rec->nr);
8488                         ret = -EINVAL;
8489                         goto out;
8490                 } else if (!entry) {
8491                         /*
8492                          * Ok our backrefs were broken, we'll assume this is the
8493                          * correct value and add an entry for this range.
8494                          */
8495                         entry = malloc(sizeof(struct extent_entry));
8496                         if (!entry) {
8497                                 ret = -ENOMEM;
8498                                 goto out;
8499                         }
8500                         memset(entry, 0, sizeof(*entry));
8501                         entry->bytenr = rec->start;
8502                         entry->bytes = rec->nr;
8503                         list_add_tail(&entry->list, &entries);
8504                         nr_entries++;
8505                 }
8506                 entry->count++;
8507                 best = find_most_right_entry(&entries);
8508                 if (!best) {
8509                         fprintf(stderr, "Backrefs and extent record evenly "
8510                                 "split on who is right, this is going to "
8511                                 "require user input to fix bytenr %Lu bytes "
8512                                 "%Lu\n", rec->start, rec->nr);
8513                         ret = -EINVAL;
8514                         goto out;
8515                 }
8516         }
8517
8518         /*
8519          * I don't think this can happen currently as we'll abort() if we catch
8520          * this case higher up, but in case somebody removes that we still can't
8521          * deal with it properly here yet, so just bail out of that's the case.
8522          */
8523         if (best->bytenr != rec->start) {
8524                 fprintf(stderr, "Extent start and backref starts don't match, "
8525                         "please use btrfs-image on this file system and send "
8526                         "it to a btrfs developer so they can make fsck fix "
8527                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8528                         rec->start, rec->nr);
8529                 ret = -EINVAL;
8530                 goto out;
8531         }
8532
8533         /*
8534          * Ok great we all agreed on an extent record, let's go find the real
8535          * references and fix up the ones that don't match.
8536          */
8537         list_for_each_entry(back, &rec->backrefs, list) {
8538                 if (back->full_backref || !back->is_data)
8539                         continue;
8540
8541                 dback = to_data_backref(back);
8542
8543                 /*
8544                  * Still ignoring backrefs that don't have a real ref attached
8545                  * to them.
8546                  */
8547                 if (dback->found_ref == 0)
8548                         continue;
8549
8550                 if (dback->bytes == best->bytes &&
8551                     dback->disk_bytenr == best->bytenr)
8552                         continue;
8553
8554                 ret = repair_ref(info, path, dback, best);
8555                 if (ret)
8556                         goto out;
8557         }
8558
8559         /*
8560          * Ok we messed with the actual refs, which means we need to drop our
8561          * entire cache and go back and rescan.  I know this is a huge pain and
8562          * adds a lot of extra work, but it's the only way to be safe.  Once all
8563          * the backrefs agree we may not need to do anything to the extent
8564          * record itself.
8565          */
8566         ret = -EAGAIN;
8567 out:
8568         while (!list_empty(&entries)) {
8569                 entry = list_entry(entries.next, struct extent_entry, list);
8570                 list_del_init(&entry->list);
8571                 free(entry);
8572         }
8573         return ret;
8574 }
8575
8576 static int process_duplicates(struct btrfs_root *root,
8577                               struct cache_tree *extent_cache,
8578                               struct extent_record *rec)
8579 {
8580         struct extent_record *good, *tmp;
8581         struct cache_extent *cache;
8582         int ret;
8583
8584         /*
8585          * If we found a extent record for this extent then return, or if we
8586          * have more than one duplicate we are likely going to need to delete
8587          * something.
8588          */
8589         if (rec->found_rec || rec->num_duplicates > 1)
8590                 return 0;
8591
8592         /* Shouldn't happen but just in case */
8593         BUG_ON(!rec->num_duplicates);
8594
8595         /*
8596          * So this happens if we end up with a backref that doesn't match the
8597          * actual extent entry.  So either the backref is bad or the extent
8598          * entry is bad.  Either way we want to have the extent_record actually
8599          * reflect what we found in the extent_tree, so we need to take the
8600          * duplicate out and use that as the extent_record since the only way we
8601          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8602          */
8603         remove_cache_extent(extent_cache, &rec->cache);
8604
8605         good = to_extent_record(rec->dups.next);
8606         list_del_init(&good->list);
8607         INIT_LIST_HEAD(&good->backrefs);
8608         INIT_LIST_HEAD(&good->dups);
8609         good->cache.start = good->start;
8610         good->cache.size = good->nr;
8611         good->content_checked = 0;
8612         good->owner_ref_checked = 0;
8613         good->num_duplicates = 0;
8614         good->refs = rec->refs;
8615         list_splice_init(&rec->backrefs, &good->backrefs);
8616         while (1) {
8617                 cache = lookup_cache_extent(extent_cache, good->start,
8618                                             good->nr);
8619                 if (!cache)
8620                         break;
8621                 tmp = container_of(cache, struct extent_record, cache);
8622
8623                 /*
8624                  * If we find another overlapping extent and it's found_rec is
8625                  * set then it's a duplicate and we need to try and delete
8626                  * something.
8627                  */
8628                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8629                         if (list_empty(&good->list))
8630                                 list_add_tail(&good->list,
8631                                               &duplicate_extents);
8632                         good->num_duplicates += tmp->num_duplicates + 1;
8633                         list_splice_init(&tmp->dups, &good->dups);
8634                         list_del_init(&tmp->list);
8635                         list_add_tail(&tmp->list, &good->dups);
8636                         remove_cache_extent(extent_cache, &tmp->cache);
8637                         continue;
8638                 }
8639
8640                 /*
8641                  * Ok we have another non extent item backed extent rec, so lets
8642                  * just add it to this extent and carry on like we did above.
8643                  */
8644                 good->refs += tmp->refs;
8645                 list_splice_init(&tmp->backrefs, &good->backrefs);
8646                 remove_cache_extent(extent_cache, &tmp->cache);
8647                 free(tmp);
8648         }
8649         ret = insert_cache_extent(extent_cache, &good->cache);
8650         BUG_ON(ret);
8651         free(rec);
8652         return good->num_duplicates ? 0 : 1;
8653 }
8654
8655 static int delete_duplicate_records(struct btrfs_root *root,
8656                                     struct extent_record *rec)
8657 {
8658         struct btrfs_trans_handle *trans;
8659         LIST_HEAD(delete_list);
8660         struct btrfs_path path;
8661         struct extent_record *tmp, *good, *n;
8662         int nr_del = 0;
8663         int ret = 0, err;
8664         struct btrfs_key key;
8665
8666         btrfs_init_path(&path);
8667
8668         good = rec;
8669         /* Find the record that covers all of the duplicates. */
8670         list_for_each_entry(tmp, &rec->dups, list) {
8671                 if (good->start < tmp->start)
8672                         continue;
8673                 if (good->nr > tmp->nr)
8674                         continue;
8675
8676                 if (tmp->start + tmp->nr < good->start + good->nr) {
8677                         fprintf(stderr, "Ok we have overlapping extents that "
8678                                 "aren't completely covered by each other, this "
8679                                 "is going to require more careful thought.  "
8680                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8681                                 tmp->start, tmp->nr, good->start, good->nr);
8682                         abort();
8683                 }
8684                 good = tmp;
8685         }
8686
8687         if (good != rec)
8688                 list_add_tail(&rec->list, &delete_list);
8689
8690         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8691                 if (tmp == good)
8692                         continue;
8693                 list_move_tail(&tmp->list, &delete_list);
8694         }
8695
8696         root = root->fs_info->extent_root;
8697         trans = btrfs_start_transaction(root, 1);
8698         if (IS_ERR(trans)) {
8699                 ret = PTR_ERR(trans);
8700                 goto out;
8701         }
8702
8703         list_for_each_entry(tmp, &delete_list, list) {
8704                 if (tmp->found_rec == 0)
8705                         continue;
8706                 key.objectid = tmp->start;
8707                 key.type = BTRFS_EXTENT_ITEM_KEY;
8708                 key.offset = tmp->nr;
8709
8710                 /* Shouldn't happen but just in case */
8711                 if (tmp->metadata) {
8712                         fprintf(stderr, "Well this shouldn't happen, extent "
8713                                 "record overlaps but is metadata? "
8714                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8715                         abort();
8716                 }
8717
8718                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8719                 if (ret) {
8720                         if (ret > 0)
8721                                 ret = -EINVAL;
8722                         break;
8723                 }
8724                 ret = btrfs_del_item(trans, root, &path);
8725                 if (ret)
8726                         break;
8727                 btrfs_release_path(&path);
8728                 nr_del++;
8729         }
8730         err = btrfs_commit_transaction(trans, root);
8731         if (err && !ret)
8732                 ret = err;
8733 out:
8734         while (!list_empty(&delete_list)) {
8735                 tmp = to_extent_record(delete_list.next);
8736                 list_del_init(&tmp->list);
8737                 if (tmp == rec)
8738                         continue;
8739                 free(tmp);
8740         }
8741
8742         while (!list_empty(&rec->dups)) {
8743                 tmp = to_extent_record(rec->dups.next);
8744                 list_del_init(&tmp->list);
8745                 free(tmp);
8746         }
8747
8748         btrfs_release_path(&path);
8749
8750         if (!ret && !nr_del)
8751                 rec->num_duplicates = 0;
8752
8753         return ret ? ret : nr_del;
8754 }
8755
8756 static int find_possible_backrefs(struct btrfs_fs_info *info,
8757                                   struct btrfs_path *path,
8758                                   struct cache_tree *extent_cache,
8759                                   struct extent_record *rec)
8760 {
8761         struct btrfs_root *root;
8762         struct extent_backref *back;
8763         struct data_backref *dback;
8764         struct cache_extent *cache;
8765         struct btrfs_file_extent_item *fi;
8766         struct btrfs_key key;
8767         u64 bytenr, bytes;
8768         int ret;
8769
8770         list_for_each_entry(back, &rec->backrefs, list) {
8771                 /* Don't care about full backrefs (poor unloved backrefs) */
8772                 if (back->full_backref || !back->is_data)
8773                         continue;
8774
8775                 dback = to_data_backref(back);
8776
8777                 /* We found this one, we don't need to do a lookup */
8778                 if (dback->found_ref)
8779                         continue;
8780
8781                 key.objectid = dback->root;
8782                 key.type = BTRFS_ROOT_ITEM_KEY;
8783                 key.offset = (u64)-1;
8784
8785                 root = btrfs_read_fs_root(info, &key);
8786
8787                 /* No root, definitely a bad ref, skip */
8788                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8789                         continue;
8790                 /* Other err, exit */
8791                 if (IS_ERR(root))
8792                         return PTR_ERR(root);
8793
8794                 key.objectid = dback->owner;
8795                 key.type = BTRFS_EXTENT_DATA_KEY;
8796                 key.offset = dback->offset;
8797                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8798                 if (ret) {
8799                         btrfs_release_path(path);
8800                         if (ret < 0)
8801                                 return ret;
8802                         /* Didn't find it, we can carry on */
8803                         ret = 0;
8804                         continue;
8805                 }
8806
8807                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8808                                     struct btrfs_file_extent_item);
8809                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8810                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8811                 btrfs_release_path(path);
8812                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8813                 if (cache) {
8814                         struct extent_record *tmp;
8815                         tmp = container_of(cache, struct extent_record, cache);
8816
8817                         /*
8818                          * If we found an extent record for the bytenr for this
8819                          * particular backref then we can't add it to our
8820                          * current extent record.  We only want to add backrefs
8821                          * that don't have a corresponding extent item in the
8822                          * extent tree since they likely belong to this record
8823                          * and we need to fix it if it doesn't match bytenrs.
8824                          */
8825                         if  (tmp->found_rec)
8826                                 continue;
8827                 }
8828
8829                 dback->found_ref += 1;
8830                 dback->disk_bytenr = bytenr;
8831                 dback->bytes = bytes;
8832
8833                 /*
8834                  * Set this so the verify backref code knows not to trust the
8835                  * values in this backref.
8836                  */
8837                 back->broken = 1;
8838         }
8839
8840         return 0;
8841 }
8842
8843 /*
8844  * Record orphan data ref into corresponding root.
8845  *
8846  * Return 0 if the extent item contains data ref and recorded.
8847  * Return 1 if the extent item contains no useful data ref
8848  *   On that case, it may contains only shared_dataref or metadata backref
8849  *   or the file extent exists(this should be handled by the extent bytenr
8850  *   recovery routine)
8851  * Return <0 if something goes wrong.
8852  */
8853 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8854                                       struct extent_record *rec)
8855 {
8856         struct btrfs_key key;
8857         struct btrfs_root *dest_root;
8858         struct extent_backref *back;
8859         struct data_backref *dback;
8860         struct orphan_data_extent *orphan;
8861         struct btrfs_path path;
8862         int recorded_data_ref = 0;
8863         int ret = 0;
8864
8865         if (rec->metadata)
8866                 return 1;
8867         btrfs_init_path(&path);
8868         list_for_each_entry(back, &rec->backrefs, list) {
8869                 if (back->full_backref || !back->is_data ||
8870                     !back->found_extent_tree)
8871                         continue;
8872                 dback = to_data_backref(back);
8873                 if (dback->found_ref)
8874                         continue;
8875                 key.objectid = dback->root;
8876                 key.type = BTRFS_ROOT_ITEM_KEY;
8877                 key.offset = (u64)-1;
8878
8879                 dest_root = btrfs_read_fs_root(fs_info, &key);
8880
8881                 /* For non-exist root we just skip it */
8882                 if (IS_ERR(dest_root) || !dest_root)
8883                         continue;
8884
8885                 key.objectid = dback->owner;
8886                 key.type = BTRFS_EXTENT_DATA_KEY;
8887                 key.offset = dback->offset;
8888
8889                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8890                 btrfs_release_path(&path);
8891                 /*
8892                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8893                  * we need to record it for inode/file extent rebuild.
8894                  * For ret > 0, we record it only for file extent rebuild.
8895                  * For ret == 0, the file extent exists but only bytenr
8896                  * mismatch, let the original bytenr fix routine to handle,
8897                  * don't record it.
8898                  */
8899                 if (ret == 0)
8900                         continue;
8901                 ret = 0;
8902                 orphan = malloc(sizeof(*orphan));
8903                 if (!orphan) {
8904                         ret = -ENOMEM;
8905                         goto out;
8906                 }
8907                 INIT_LIST_HEAD(&orphan->list);
8908                 orphan->root = dback->root;
8909                 orphan->objectid = dback->owner;
8910                 orphan->offset = dback->offset;
8911                 orphan->disk_bytenr = rec->cache.start;
8912                 orphan->disk_len = rec->cache.size;
8913                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8914                 recorded_data_ref = 1;
8915         }
8916 out:
8917         btrfs_release_path(&path);
8918         if (!ret)
8919                 return !recorded_data_ref;
8920         else
8921                 return ret;
8922 }
8923
8924 /*
8925  * when an incorrect extent item is found, this will delete
8926  * all of the existing entries for it and recreate them
8927  * based on what the tree scan found.
8928  */
8929 static int fixup_extent_refs(struct btrfs_fs_info *info,
8930                              struct cache_tree *extent_cache,
8931                              struct extent_record *rec)
8932 {
8933         struct btrfs_trans_handle *trans = NULL;
8934         int ret;
8935         struct btrfs_path path;
8936         struct list_head *cur = rec->backrefs.next;
8937         struct cache_extent *cache;
8938         struct extent_backref *back;
8939         int allocated = 0;
8940         u64 flags = 0;
8941
8942         if (rec->flag_block_full_backref)
8943                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8944
8945         btrfs_init_path(&path);
8946         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8947                 /*
8948                  * Sometimes the backrefs themselves are so broken they don't
8949                  * get attached to any meaningful rec, so first go back and
8950                  * check any of our backrefs that we couldn't find and throw
8951                  * them into the list if we find the backref so that
8952                  * verify_backrefs can figure out what to do.
8953                  */
8954                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8955                 if (ret < 0)
8956                         goto out;
8957         }
8958
8959         /* step one, make sure all of the backrefs agree */
8960         ret = verify_backrefs(info, &path, rec);
8961         if (ret < 0)
8962                 goto out;
8963
8964         trans = btrfs_start_transaction(info->extent_root, 1);
8965         if (IS_ERR(trans)) {
8966                 ret = PTR_ERR(trans);
8967                 goto out;
8968         }
8969
8970         /* step two, delete all the existing records */
8971         ret = delete_extent_records(trans, info->extent_root, &path,
8972                                     rec->start, rec->max_size);
8973
8974         if (ret < 0)
8975                 goto out;
8976
8977         /* was this block corrupt?  If so, don't add references to it */
8978         cache = lookup_cache_extent(info->corrupt_blocks,
8979                                     rec->start, rec->max_size);
8980         if (cache) {
8981                 ret = 0;
8982                 goto out;
8983         }
8984
8985         /* step three, recreate all the refs we did find */
8986         while(cur != &rec->backrefs) {
8987                 back = to_extent_backref(cur);
8988                 cur = cur->next;
8989
8990                 /*
8991                  * if we didn't find any references, don't create a
8992                  * new extent record
8993                  */
8994                 if (!back->found_ref)
8995                         continue;
8996
8997                 rec->bad_full_backref = 0;
8998                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
8999                 allocated = 1;
9000
9001                 if (ret)
9002                         goto out;
9003         }
9004 out:
9005         if (trans) {
9006                 int err = btrfs_commit_transaction(trans, info->extent_root);
9007                 if (!ret)
9008                         ret = err;
9009         }
9010
9011         btrfs_release_path(&path);
9012         return ret;
9013 }
9014
9015 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9016                               struct extent_record *rec)
9017 {
9018         struct btrfs_trans_handle *trans;
9019         struct btrfs_root *root = fs_info->extent_root;
9020         struct btrfs_path path;
9021         struct btrfs_extent_item *ei;
9022         struct btrfs_key key;
9023         u64 flags;
9024         int ret = 0;
9025
9026         key.objectid = rec->start;
9027         if (rec->metadata) {
9028                 key.type = BTRFS_METADATA_ITEM_KEY;
9029                 key.offset = rec->info_level;
9030         } else {
9031                 key.type = BTRFS_EXTENT_ITEM_KEY;
9032                 key.offset = rec->max_size;
9033         }
9034
9035         trans = btrfs_start_transaction(root, 0);
9036         if (IS_ERR(trans))
9037                 return PTR_ERR(trans);
9038
9039         btrfs_init_path(&path);
9040         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9041         if (ret < 0) {
9042                 btrfs_release_path(&path);
9043                 btrfs_commit_transaction(trans, root);
9044                 return ret;
9045         } else if (ret) {
9046                 fprintf(stderr, "Didn't find extent for %llu\n",
9047                         (unsigned long long)rec->start);
9048                 btrfs_release_path(&path);
9049                 btrfs_commit_transaction(trans, root);
9050                 return -ENOENT;
9051         }
9052
9053         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9054                             struct btrfs_extent_item);
9055         flags = btrfs_extent_flags(path.nodes[0], ei);
9056         if (rec->flag_block_full_backref) {
9057                 fprintf(stderr, "setting full backref on %llu\n",
9058                         (unsigned long long)key.objectid);
9059                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9060         } else {
9061                 fprintf(stderr, "clearing full backref on %llu\n",
9062                         (unsigned long long)key.objectid);
9063                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9064         }
9065         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9066         btrfs_mark_buffer_dirty(path.nodes[0]);
9067         btrfs_release_path(&path);
9068         return btrfs_commit_transaction(trans, root);
9069 }
9070
9071 /* right now we only prune from the extent allocation tree */
9072 static int prune_one_block(struct btrfs_trans_handle *trans,
9073                            struct btrfs_fs_info *info,
9074                            struct btrfs_corrupt_block *corrupt)
9075 {
9076         int ret;
9077         struct btrfs_path path;
9078         struct extent_buffer *eb;
9079         u64 found;
9080         int slot;
9081         int nritems;
9082         int level = corrupt->level + 1;
9083
9084         btrfs_init_path(&path);
9085 again:
9086         /* we want to stop at the parent to our busted block */
9087         path.lowest_level = level;
9088
9089         ret = btrfs_search_slot(trans, info->extent_root,
9090                                 &corrupt->key, &path, -1, 1);
9091
9092         if (ret < 0)
9093                 goto out;
9094
9095         eb = path.nodes[level];
9096         if (!eb) {
9097                 ret = -ENOENT;
9098                 goto out;
9099         }
9100
9101         /*
9102          * hopefully the search gave us the block we want to prune,
9103          * lets try that first
9104          */
9105         slot = path.slots[level];
9106         found =  btrfs_node_blockptr(eb, slot);
9107         if (found == corrupt->cache.start)
9108                 goto del_ptr;
9109
9110         nritems = btrfs_header_nritems(eb);
9111
9112         /* the search failed, lets scan this node and hope we find it */
9113         for (slot = 0; slot < nritems; slot++) {
9114                 found =  btrfs_node_blockptr(eb, slot);
9115                 if (found == corrupt->cache.start)
9116                         goto del_ptr;
9117         }
9118         /*
9119          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9120          * to this block
9121          */
9122         if (eb == info->extent_root->node) {
9123                 ret = -ENOENT;
9124                 goto out;
9125         } else {
9126                 level++;
9127                 btrfs_release_path(&path);
9128                 goto again;
9129         }
9130
9131 del_ptr:
9132         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9133         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
9134
9135 out:
9136         btrfs_release_path(&path);
9137         return ret;
9138 }
9139
9140 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9141 {
9142         struct btrfs_trans_handle *trans = NULL;
9143         struct cache_extent *cache;
9144         struct btrfs_corrupt_block *corrupt;
9145
9146         while (1) {
9147                 cache = search_cache_extent(info->corrupt_blocks, 0);
9148                 if (!cache)
9149                         break;
9150                 if (!trans) {
9151                         trans = btrfs_start_transaction(info->extent_root, 1);
9152                         if (IS_ERR(trans))
9153                                 return PTR_ERR(trans);
9154                 }
9155                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9156                 prune_one_block(trans, info, corrupt);
9157                 remove_cache_extent(info->corrupt_blocks, cache);
9158         }
9159         if (trans)
9160                 return btrfs_commit_transaction(trans, info->extent_root);
9161         return 0;
9162 }
9163
9164 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9165 {
9166         struct btrfs_block_group_cache *cache;
9167         u64 start, end;
9168         int ret;
9169
9170         while (1) {
9171                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9172                                             &start, &end, EXTENT_DIRTY);
9173                 if (ret)
9174                         break;
9175                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
9176                                    GFP_NOFS);
9177         }
9178
9179         start = 0;
9180         while (1) {
9181                 cache = btrfs_lookup_first_block_group(fs_info, start);
9182                 if (!cache)
9183                         break;
9184                 if (cache->cached)
9185                         cache->cached = 0;
9186                 start = cache->key.objectid + cache->key.offset;
9187         }
9188 }
9189
9190 static int check_extent_refs(struct btrfs_root *root,
9191                              struct cache_tree *extent_cache)
9192 {
9193         struct extent_record *rec;
9194         struct cache_extent *cache;
9195         int err = 0;
9196         int ret = 0;
9197         int fixed = 0;
9198         int had_dups = 0;
9199         int recorded = 0;
9200
9201         if (repair) {
9202                 /*
9203                  * if we're doing a repair, we have to make sure
9204                  * we don't allocate from the problem extents.
9205                  * In the worst case, this will be all the
9206                  * extents in the FS
9207                  */
9208                 cache = search_cache_extent(extent_cache, 0);
9209                 while(cache) {
9210                         rec = container_of(cache, struct extent_record, cache);
9211                         set_extent_dirty(root->fs_info->excluded_extents,
9212                                          rec->start,
9213                                          rec->start + rec->max_size - 1,
9214                                          GFP_NOFS);
9215                         cache = next_cache_extent(cache);
9216                 }
9217
9218                 /* pin down all the corrupted blocks too */
9219                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9220                 while(cache) {
9221                         set_extent_dirty(root->fs_info->excluded_extents,
9222                                          cache->start,
9223                                          cache->start + cache->size - 1,
9224                                          GFP_NOFS);
9225                         cache = next_cache_extent(cache);
9226                 }
9227                 prune_corrupt_blocks(root->fs_info);
9228                 reset_cached_block_groups(root->fs_info);
9229         }
9230
9231         reset_cached_block_groups(root->fs_info);
9232
9233         /*
9234          * We need to delete any duplicate entries we find first otherwise we
9235          * could mess up the extent tree when we have backrefs that actually
9236          * belong to a different extent item and not the weird duplicate one.
9237          */
9238         while (repair && !list_empty(&duplicate_extents)) {
9239                 rec = to_extent_record(duplicate_extents.next);
9240                 list_del_init(&rec->list);
9241
9242                 /* Sometimes we can find a backref before we find an actual
9243                  * extent, so we need to process it a little bit to see if there
9244                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9245                  * if this is a backref screwup.  If we need to delete stuff
9246                  * process_duplicates() will return 0, otherwise it will return
9247                  * 1 and we
9248                  */
9249                 if (process_duplicates(root, extent_cache, rec))
9250                         continue;
9251                 ret = delete_duplicate_records(root, rec);
9252                 if (ret < 0)
9253                         return ret;
9254                 /*
9255                  * delete_duplicate_records will return the number of entries
9256                  * deleted, so if it's greater than 0 then we know we actually
9257                  * did something and we need to remove.
9258                  */
9259                 if (ret)
9260                         had_dups = 1;
9261         }
9262
9263         if (had_dups)
9264                 return -EAGAIN;
9265
9266         while(1) {
9267                 int cur_err = 0;
9268
9269                 fixed = 0;
9270                 recorded = 0;
9271                 cache = search_cache_extent(extent_cache, 0);
9272                 if (!cache)
9273                         break;
9274                 rec = container_of(cache, struct extent_record, cache);
9275                 if (rec->num_duplicates) {
9276                         fprintf(stderr, "extent item %llu has multiple extent "
9277                                 "items\n", (unsigned long long)rec->start);
9278                         err = 1;
9279                         cur_err = 1;
9280                 }
9281
9282                 if (rec->refs != rec->extent_item_refs) {
9283                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9284                                 (unsigned long long)rec->start,
9285                                 (unsigned long long)rec->nr);
9286                         fprintf(stderr, "extent item %llu, found %llu\n",
9287                                 (unsigned long long)rec->extent_item_refs,
9288                                 (unsigned long long)rec->refs);
9289                         ret = record_orphan_data_extents(root->fs_info, rec);
9290                         if (ret < 0)
9291                                 goto repair_abort;
9292                         if (ret == 0) {
9293                                 recorded = 1;
9294                         } else {
9295                                 /*
9296                                  * we can't use the extent to repair file
9297                                  * extent, let the fallback method handle it.
9298                                  */
9299                                 if (!fixed && repair) {
9300                                         ret = fixup_extent_refs(
9301                                                         root->fs_info,
9302                                                         extent_cache, rec);
9303                                         if (ret)
9304                                                 goto repair_abort;
9305                                         fixed = 1;
9306                                 }
9307                         }
9308                         err = 1;
9309                         cur_err = 1;
9310                 }
9311                 if (all_backpointers_checked(rec, 1)) {
9312                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9313                                 (unsigned long long)rec->start,
9314                                 (unsigned long long)rec->nr);
9315
9316                         if (!fixed && !recorded && repair) {
9317                                 ret = fixup_extent_refs(root->fs_info,
9318                                                         extent_cache, rec);
9319                                 if (ret)
9320                                         goto repair_abort;
9321                                 fixed = 1;
9322                         }
9323                         cur_err = 1;
9324                         err = 1;
9325                 }
9326                 if (!rec->owner_ref_checked) {
9327                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9328                                 (unsigned long long)rec->start,
9329                                 (unsigned long long)rec->nr);
9330                         if (!fixed && !recorded && repair) {
9331                                 ret = fixup_extent_refs(root->fs_info,
9332                                                         extent_cache, rec);
9333                                 if (ret)
9334                                         goto repair_abort;
9335                                 fixed = 1;
9336                         }
9337                         err = 1;
9338                         cur_err = 1;
9339                 }
9340                 if (rec->bad_full_backref) {
9341                         fprintf(stderr, "bad full backref, on [%llu]\n",
9342                                 (unsigned long long)rec->start);
9343                         if (repair) {
9344                                 ret = fixup_extent_flags(root->fs_info, rec);
9345                                 if (ret)
9346                                         goto repair_abort;
9347                                 fixed = 1;
9348                         }
9349                         err = 1;
9350                         cur_err = 1;
9351                 }
9352                 /*
9353                  * Although it's not a extent ref's problem, we reuse this
9354                  * routine for error reporting.
9355                  * No repair function yet.
9356                  */
9357                 if (rec->crossing_stripes) {
9358                         fprintf(stderr,
9359                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9360                                 rec->start, rec->start + rec->max_size);
9361                         err = 1;
9362                         cur_err = 1;
9363                 }
9364
9365                 if (rec->wrong_chunk_type) {
9366                         fprintf(stderr,
9367                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9368                                 rec->start, rec->start + rec->max_size);
9369                         err = 1;
9370                         cur_err = 1;
9371                 }
9372
9373                 remove_cache_extent(extent_cache, cache);
9374                 free_all_extent_backrefs(rec);
9375                 if (!init_extent_tree && repair && (!cur_err || fixed))
9376                         clear_extent_dirty(root->fs_info->excluded_extents,
9377                                            rec->start,
9378                                            rec->start + rec->max_size - 1,
9379                                            GFP_NOFS);
9380                 free(rec);
9381         }
9382 repair_abort:
9383         if (repair) {
9384                 if (ret && ret != -EAGAIN) {
9385                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9386                         exit(1);
9387                 } else if (!ret) {
9388                         struct btrfs_trans_handle *trans;
9389
9390                         root = root->fs_info->extent_root;
9391                         trans = btrfs_start_transaction(root, 1);
9392                         if (IS_ERR(trans)) {
9393                                 ret = PTR_ERR(trans);
9394                                 goto repair_abort;
9395                         }
9396
9397                         btrfs_fix_block_accounting(trans, root);
9398                         ret = btrfs_commit_transaction(trans, root);
9399                         if (ret)
9400                                 goto repair_abort;
9401                 }
9402                 if (err)
9403                         fprintf(stderr, "repaired damaged extent references\n");
9404                 return ret;
9405         }
9406         return err;
9407 }
9408
9409 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9410 {
9411         u64 stripe_size;
9412
9413         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9414                 stripe_size = length;
9415                 stripe_size /= num_stripes;
9416         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9417                 stripe_size = length * 2;
9418                 stripe_size /= num_stripes;
9419         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9420                 stripe_size = length;
9421                 stripe_size /= (num_stripes - 1);
9422         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9423                 stripe_size = length;
9424                 stripe_size /= (num_stripes - 2);
9425         } else {
9426                 stripe_size = length;
9427         }
9428         return stripe_size;
9429 }
9430
9431 /*
9432  * Check the chunk with its block group/dev list ref:
9433  * Return 0 if all refs seems valid.
9434  * Return 1 if part of refs seems valid, need later check for rebuild ref
9435  * like missing block group and needs to search extent tree to rebuild them.
9436  * Return -1 if essential refs are missing and unable to rebuild.
9437  */
9438 static int check_chunk_refs(struct chunk_record *chunk_rec,
9439                             struct block_group_tree *block_group_cache,
9440                             struct device_extent_tree *dev_extent_cache,
9441                             int silent)
9442 {
9443         struct cache_extent *block_group_item;
9444         struct block_group_record *block_group_rec;
9445         struct cache_extent *dev_extent_item;
9446         struct device_extent_record *dev_extent_rec;
9447         u64 devid;
9448         u64 offset;
9449         u64 length;
9450         int metadump_v2 = 0;
9451         int i;
9452         int ret = 0;
9453
9454         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9455                                                chunk_rec->offset,
9456                                                chunk_rec->length);
9457         if (block_group_item) {
9458                 block_group_rec = container_of(block_group_item,
9459                                                struct block_group_record,
9460                                                cache);
9461                 if (chunk_rec->length != block_group_rec->offset ||
9462                     chunk_rec->offset != block_group_rec->objectid ||
9463                     (!metadump_v2 &&
9464                      chunk_rec->type_flags != block_group_rec->flags)) {
9465                         if (!silent)
9466                                 fprintf(stderr,
9467                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9468                                         chunk_rec->objectid,
9469                                         chunk_rec->type,
9470                                         chunk_rec->offset,
9471                                         chunk_rec->length,
9472                                         chunk_rec->offset,
9473                                         chunk_rec->type_flags,
9474                                         block_group_rec->objectid,
9475                                         block_group_rec->type,
9476                                         block_group_rec->offset,
9477                                         block_group_rec->offset,
9478                                         block_group_rec->objectid,
9479                                         block_group_rec->flags);
9480                         ret = -1;
9481                 } else {
9482                         list_del_init(&block_group_rec->list);
9483                         chunk_rec->bg_rec = block_group_rec;
9484                 }
9485         } else {
9486                 if (!silent)
9487                         fprintf(stderr,
9488                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9489                                 chunk_rec->objectid,
9490                                 chunk_rec->type,
9491                                 chunk_rec->offset,
9492                                 chunk_rec->length,
9493                                 chunk_rec->offset,
9494                                 chunk_rec->type_flags);
9495                 ret = 1;
9496         }
9497
9498         if (metadump_v2)
9499                 return ret;
9500
9501         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9502                                     chunk_rec->num_stripes);
9503         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9504                 devid = chunk_rec->stripes[i].devid;
9505                 offset = chunk_rec->stripes[i].offset;
9506                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9507                                                        devid, offset, length);
9508                 if (dev_extent_item) {
9509                         dev_extent_rec = container_of(dev_extent_item,
9510                                                 struct device_extent_record,
9511                                                 cache);
9512                         if (dev_extent_rec->objectid != devid ||
9513                             dev_extent_rec->offset != offset ||
9514                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9515                             dev_extent_rec->length != length) {
9516                                 if (!silent)
9517                                         fprintf(stderr,
9518                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9519                                                 chunk_rec->objectid,
9520                                                 chunk_rec->type,
9521                                                 chunk_rec->offset,
9522                                                 chunk_rec->stripes[i].devid,
9523                                                 chunk_rec->stripes[i].offset,
9524                                                 dev_extent_rec->objectid,
9525                                                 dev_extent_rec->offset,
9526                                                 dev_extent_rec->length);
9527                                 ret = -1;
9528                         } else {
9529                                 list_move(&dev_extent_rec->chunk_list,
9530                                           &chunk_rec->dextents);
9531                         }
9532                 } else {
9533                         if (!silent)
9534                                 fprintf(stderr,
9535                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9536                                         chunk_rec->objectid,
9537                                         chunk_rec->type,
9538                                         chunk_rec->offset,
9539                                         chunk_rec->stripes[i].devid,
9540                                         chunk_rec->stripes[i].offset);
9541                         ret = -1;
9542                 }
9543         }
9544         return ret;
9545 }
9546
9547 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9548 int check_chunks(struct cache_tree *chunk_cache,
9549                  struct block_group_tree *block_group_cache,
9550                  struct device_extent_tree *dev_extent_cache,
9551                  struct list_head *good, struct list_head *bad,
9552                  struct list_head *rebuild, int silent)
9553 {
9554         struct cache_extent *chunk_item;
9555         struct chunk_record *chunk_rec;
9556         struct block_group_record *bg_rec;
9557         struct device_extent_record *dext_rec;
9558         int err;
9559         int ret = 0;
9560
9561         chunk_item = first_cache_extent(chunk_cache);
9562         while (chunk_item) {
9563                 chunk_rec = container_of(chunk_item, struct chunk_record,
9564                                          cache);
9565                 err = check_chunk_refs(chunk_rec, block_group_cache,
9566                                        dev_extent_cache, silent);
9567                 if (err < 0)
9568                         ret = err;
9569                 if (err == 0 && good)
9570                         list_add_tail(&chunk_rec->list, good);
9571                 if (err > 0 && rebuild)
9572                         list_add_tail(&chunk_rec->list, rebuild);
9573                 if (err < 0 && bad)
9574                         list_add_tail(&chunk_rec->list, bad);
9575                 chunk_item = next_cache_extent(chunk_item);
9576         }
9577
9578         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9579                 if (!silent)
9580                         fprintf(stderr,
9581                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9582                                 bg_rec->objectid,
9583                                 bg_rec->offset,
9584                                 bg_rec->flags);
9585                 if (!ret)
9586                         ret = 1;
9587         }
9588
9589         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9590                             chunk_list) {
9591                 if (!silent)
9592                         fprintf(stderr,
9593                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9594                                 dext_rec->objectid,
9595                                 dext_rec->offset,
9596                                 dext_rec->length);
9597                 if (!ret)
9598                         ret = 1;
9599         }
9600         return ret;
9601 }
9602
9603
9604 static int check_device_used(struct device_record *dev_rec,
9605                              struct device_extent_tree *dext_cache)
9606 {
9607         struct cache_extent *cache;
9608         struct device_extent_record *dev_extent_rec;
9609         u64 total_byte = 0;
9610
9611         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9612         while (cache) {
9613                 dev_extent_rec = container_of(cache,
9614                                               struct device_extent_record,
9615                                               cache);
9616                 if (dev_extent_rec->objectid != dev_rec->devid)
9617                         break;
9618
9619                 list_del_init(&dev_extent_rec->device_list);
9620                 total_byte += dev_extent_rec->length;
9621                 cache = next_cache_extent(cache);
9622         }
9623
9624         if (total_byte != dev_rec->byte_used) {
9625                 fprintf(stderr,
9626                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9627                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9628                         dev_rec->type, dev_rec->offset);
9629                 return -1;
9630         } else {
9631                 return 0;
9632         }
9633 }
9634
9635 /* check btrfs_dev_item -> btrfs_dev_extent */
9636 static int check_devices(struct rb_root *dev_cache,
9637                          struct device_extent_tree *dev_extent_cache)
9638 {
9639         struct rb_node *dev_node;
9640         struct device_record *dev_rec;
9641         struct device_extent_record *dext_rec;
9642         int err;
9643         int ret = 0;
9644
9645         dev_node = rb_first(dev_cache);
9646         while (dev_node) {
9647                 dev_rec = container_of(dev_node, struct device_record, node);
9648                 err = check_device_used(dev_rec, dev_extent_cache);
9649                 if (err)
9650                         ret = err;
9651
9652                 dev_node = rb_next(dev_node);
9653         }
9654         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9655                             device_list) {
9656                 fprintf(stderr,
9657                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9658                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9659                 if (!ret)
9660                         ret = 1;
9661         }
9662         return ret;
9663 }
9664
9665 static int add_root_item_to_list(struct list_head *head,
9666                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9667                                   u8 level, u8 drop_level,
9668                                   int level_size, struct btrfs_key *drop_key)
9669 {
9670
9671         struct root_item_record *ri_rec;
9672         ri_rec = malloc(sizeof(*ri_rec));
9673         if (!ri_rec)
9674                 return -ENOMEM;
9675         ri_rec->bytenr = bytenr;
9676         ri_rec->objectid = objectid;
9677         ri_rec->level = level;
9678         ri_rec->level_size = level_size;
9679         ri_rec->drop_level = drop_level;
9680         ri_rec->last_snapshot = last_snapshot;
9681         if (drop_key)
9682                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9683         list_add_tail(&ri_rec->list, head);
9684
9685         return 0;
9686 }
9687
9688 static void free_root_item_list(struct list_head *list)
9689 {
9690         struct root_item_record *ri_rec;
9691
9692         while (!list_empty(list)) {
9693                 ri_rec = list_first_entry(list, struct root_item_record,
9694                                           list);
9695                 list_del_init(&ri_rec->list);
9696                 free(ri_rec);
9697         }
9698 }
9699
9700 static int deal_root_from_list(struct list_head *list,
9701                                struct btrfs_root *root,
9702                                struct block_info *bits,
9703                                int bits_nr,
9704                                struct cache_tree *pending,
9705                                struct cache_tree *seen,
9706                                struct cache_tree *reada,
9707                                struct cache_tree *nodes,
9708                                struct cache_tree *extent_cache,
9709                                struct cache_tree *chunk_cache,
9710                                struct rb_root *dev_cache,
9711                                struct block_group_tree *block_group_cache,
9712                                struct device_extent_tree *dev_extent_cache)
9713 {
9714         int ret = 0;
9715         u64 last;
9716
9717         while (!list_empty(list)) {
9718                 struct root_item_record *rec;
9719                 struct extent_buffer *buf;
9720                 rec = list_entry(list->next,
9721                                  struct root_item_record, list);
9722                 last = 0;
9723                 buf = read_tree_block(root->fs_info->tree_root,
9724                                       rec->bytenr, rec->level_size, 0);
9725                 if (!extent_buffer_uptodate(buf)) {
9726                         free_extent_buffer(buf);
9727                         ret = -EIO;
9728                         break;
9729                 }
9730                 ret = add_root_to_pending(buf, extent_cache, pending,
9731                                     seen, nodes, rec->objectid);
9732                 if (ret < 0)
9733                         break;
9734                 /*
9735                  * To rebuild extent tree, we need deal with snapshot
9736                  * one by one, otherwise we deal with node firstly which
9737                  * can maximize readahead.
9738                  */
9739                 while (1) {
9740                         ret = run_next_block(root, bits, bits_nr, &last,
9741                                              pending, seen, reada, nodes,
9742                                              extent_cache, chunk_cache,
9743                                              dev_cache, block_group_cache,
9744                                              dev_extent_cache, rec);
9745                         if (ret != 0)
9746                                 break;
9747                 }
9748                 free_extent_buffer(buf);
9749                 list_del(&rec->list);
9750                 free(rec);
9751                 if (ret < 0)
9752                         break;
9753         }
9754         while (ret >= 0) {
9755                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9756                                      reada, nodes, extent_cache, chunk_cache,
9757                                      dev_cache, block_group_cache,
9758                                      dev_extent_cache, NULL);
9759                 if (ret != 0) {
9760                         if (ret > 0)
9761                                 ret = 0;
9762                         break;
9763                 }
9764         }
9765         return ret;
9766 }
9767
9768 static int check_chunks_and_extents(struct btrfs_root *root)
9769 {
9770         struct rb_root dev_cache;
9771         struct cache_tree chunk_cache;
9772         struct block_group_tree block_group_cache;
9773         struct device_extent_tree dev_extent_cache;
9774         struct cache_tree extent_cache;
9775         struct cache_tree seen;
9776         struct cache_tree pending;
9777         struct cache_tree reada;
9778         struct cache_tree nodes;
9779         struct extent_io_tree excluded_extents;
9780         struct cache_tree corrupt_blocks;
9781         struct btrfs_path path;
9782         struct btrfs_key key;
9783         struct btrfs_key found_key;
9784         int ret, err = 0;
9785         struct block_info *bits;
9786         int bits_nr;
9787         struct extent_buffer *leaf;
9788         int slot;
9789         struct btrfs_root_item ri;
9790         struct list_head dropping_trees;
9791         struct list_head normal_trees;
9792         struct btrfs_root *root1;
9793         u64 objectid;
9794         u32 level_size;
9795         u8 level;
9796
9797         dev_cache = RB_ROOT;
9798         cache_tree_init(&chunk_cache);
9799         block_group_tree_init(&block_group_cache);
9800         device_extent_tree_init(&dev_extent_cache);
9801
9802         cache_tree_init(&extent_cache);
9803         cache_tree_init(&seen);
9804         cache_tree_init(&pending);
9805         cache_tree_init(&nodes);
9806         cache_tree_init(&reada);
9807         cache_tree_init(&corrupt_blocks);
9808         extent_io_tree_init(&excluded_extents);
9809         INIT_LIST_HEAD(&dropping_trees);
9810         INIT_LIST_HEAD(&normal_trees);
9811
9812         if (repair) {
9813                 root->fs_info->excluded_extents = &excluded_extents;
9814                 root->fs_info->fsck_extent_cache = &extent_cache;
9815                 root->fs_info->free_extent_hook = free_extent_hook;
9816                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9817         }
9818
9819         bits_nr = 1024;
9820         bits = malloc(bits_nr * sizeof(struct block_info));
9821         if (!bits) {
9822                 perror("malloc");
9823                 exit(1);
9824         }
9825
9826         if (ctx.progress_enabled) {
9827                 ctx.tp = TASK_EXTENTS;
9828                 task_start(ctx.info);
9829         }
9830
9831 again:
9832         root1 = root->fs_info->tree_root;
9833         level = btrfs_header_level(root1->node);
9834         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9835                                     root1->node->start, 0, level, 0,
9836                                     root1->nodesize, NULL);
9837         if (ret < 0)
9838                 goto out;
9839         root1 = root->fs_info->chunk_root;
9840         level = btrfs_header_level(root1->node);
9841         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9842                                     root1->node->start, 0, level, 0,
9843                                     root1->nodesize, NULL);
9844         if (ret < 0)
9845                 goto out;
9846         btrfs_init_path(&path);
9847         key.offset = 0;
9848         key.objectid = 0;
9849         key.type = BTRFS_ROOT_ITEM_KEY;
9850         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9851                                         &key, &path, 0, 0);
9852         if (ret < 0)
9853                 goto out;
9854         while(1) {
9855                 leaf = path.nodes[0];
9856                 slot = path.slots[0];
9857                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9858                         ret = btrfs_next_leaf(root, &path);
9859                         if (ret != 0)
9860                                 break;
9861                         leaf = path.nodes[0];
9862                         slot = path.slots[0];
9863                 }
9864                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9865                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9866                         unsigned long offset;
9867                         u64 last_snapshot;
9868
9869                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9870                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9871                         last_snapshot = btrfs_root_last_snapshot(&ri);
9872                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9873                                 level = btrfs_root_level(&ri);
9874                                 level_size = root->nodesize;
9875                                 ret = add_root_item_to_list(&normal_trees,
9876                                                 found_key.objectid,
9877                                                 btrfs_root_bytenr(&ri),
9878                                                 last_snapshot, level,
9879                                                 0, level_size, NULL);
9880                                 if (ret < 0)
9881                                         goto out;
9882                         } else {
9883                                 level = btrfs_root_level(&ri);
9884                                 level_size = root->nodesize;
9885                                 objectid = found_key.objectid;
9886                                 btrfs_disk_key_to_cpu(&found_key,
9887                                                       &ri.drop_progress);
9888                                 ret = add_root_item_to_list(&dropping_trees,
9889                                                 objectid,
9890                                                 btrfs_root_bytenr(&ri),
9891                                                 last_snapshot, level,
9892                                                 ri.drop_level,
9893                                                 level_size, &found_key);
9894                                 if (ret < 0)
9895                                         goto out;
9896                         }
9897                 }
9898                 path.slots[0]++;
9899         }
9900         btrfs_release_path(&path);
9901
9902         /*
9903          * check_block can return -EAGAIN if it fixes something, please keep
9904          * this in mind when dealing with return values from these functions, if
9905          * we get -EAGAIN we want to fall through and restart the loop.
9906          */
9907         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9908                                   &seen, &reada, &nodes, &extent_cache,
9909                                   &chunk_cache, &dev_cache, &block_group_cache,
9910                                   &dev_extent_cache);
9911         if (ret < 0) {
9912                 if (ret == -EAGAIN)
9913                         goto loop;
9914                 goto out;
9915         }
9916         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9917                                   &pending, &seen, &reada, &nodes,
9918                                   &extent_cache, &chunk_cache, &dev_cache,
9919                                   &block_group_cache, &dev_extent_cache);
9920         if (ret < 0) {
9921                 if (ret == -EAGAIN)
9922                         goto loop;
9923                 goto out;
9924         }
9925
9926         ret = check_chunks(&chunk_cache, &block_group_cache,
9927                            &dev_extent_cache, NULL, NULL, NULL, 0);
9928         if (ret) {
9929                 if (ret == -EAGAIN)
9930                         goto loop;
9931                 err = ret;
9932         }
9933
9934         ret = check_extent_refs(root, &extent_cache);
9935         if (ret < 0) {
9936                 if (ret == -EAGAIN)
9937                         goto loop;
9938                 goto out;
9939         }
9940
9941         ret = check_devices(&dev_cache, &dev_extent_cache);
9942         if (ret && err)
9943                 ret = err;
9944
9945 out:
9946         task_stop(ctx.info);
9947         if (repair) {
9948                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9949                 extent_io_tree_cleanup(&excluded_extents);
9950                 root->fs_info->fsck_extent_cache = NULL;
9951                 root->fs_info->free_extent_hook = NULL;
9952                 root->fs_info->corrupt_blocks = NULL;
9953                 root->fs_info->excluded_extents = NULL;
9954         }
9955         free(bits);
9956         free_chunk_cache_tree(&chunk_cache);
9957         free_device_cache_tree(&dev_cache);
9958         free_block_group_tree(&block_group_cache);
9959         free_device_extent_tree(&dev_extent_cache);
9960         free_extent_cache_tree(&seen);
9961         free_extent_cache_tree(&pending);
9962         free_extent_cache_tree(&reada);
9963         free_extent_cache_tree(&nodes);
9964         return ret;
9965 loop:
9966         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9967         free_extent_cache_tree(&seen);
9968         free_extent_cache_tree(&pending);
9969         free_extent_cache_tree(&reada);
9970         free_extent_cache_tree(&nodes);
9971         free_chunk_cache_tree(&chunk_cache);
9972         free_block_group_tree(&block_group_cache);
9973         free_device_cache_tree(&dev_cache);
9974         free_device_extent_tree(&dev_extent_cache);
9975         free_extent_record_cache(root->fs_info, &extent_cache);
9976         free_root_item_list(&normal_trees);
9977         free_root_item_list(&dropping_trees);
9978         extent_io_tree_cleanup(&excluded_extents);
9979         goto again;
9980 }
9981
9982 /*
9983  * Check backrefs of a tree block given by @bytenr or @eb.
9984  *
9985  * @root:       the root containing the @bytenr or @eb
9986  * @eb:         tree block extent buffer, can be NULL
9987  * @bytenr:     bytenr of the tree block to search
9988  * @level:      tree level of the tree block
9989  * @owner:      owner of the tree block
9990  *
9991  * Return >0 for any error found and output error message
9992  * Return 0 for no error found
9993  */
9994 static int check_tree_block_ref(struct btrfs_root *root,
9995                                 struct extent_buffer *eb, u64 bytenr,
9996                                 int level, u64 owner)
9997 {
9998         struct btrfs_key key;
9999         struct btrfs_root *extent_root = root->fs_info->extent_root;
10000         struct btrfs_path path;
10001         struct btrfs_extent_item *ei;
10002         struct btrfs_extent_inline_ref *iref;
10003         struct extent_buffer *leaf;
10004         unsigned long end;
10005         unsigned long ptr;
10006         int slot;
10007         int skinny_level;
10008         int type;
10009         u32 nodesize = root->nodesize;
10010         u32 item_size;
10011         u64 offset;
10012         int found_ref = 0;
10013         int err = 0;
10014         int ret;
10015
10016         btrfs_init_path(&path);
10017         key.objectid = bytenr;
10018         if (btrfs_fs_incompat(root->fs_info,
10019                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
10020                 key.type = BTRFS_METADATA_ITEM_KEY;
10021         else
10022                 key.type = BTRFS_EXTENT_ITEM_KEY;
10023         key.offset = (u64)-1;
10024
10025         /* Search for the backref in extent tree */
10026         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10027         if (ret < 0) {
10028                 err |= BACKREF_MISSING;
10029                 goto out;
10030         }
10031         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10032         if (ret) {
10033                 err |= BACKREF_MISSING;
10034                 goto out;
10035         }
10036
10037         leaf = path.nodes[0];
10038         slot = path.slots[0];
10039         btrfs_item_key_to_cpu(leaf, &key, slot);
10040
10041         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10042
10043         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10044                 skinny_level = (int)key.offset;
10045                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10046         } else {
10047                 struct btrfs_tree_block_info *info;
10048
10049                 info = (struct btrfs_tree_block_info *)(ei + 1);
10050                 skinny_level = btrfs_tree_block_level(leaf, info);
10051                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10052         }
10053
10054         if (eb) {
10055                 u64 header_gen;
10056                 u64 extent_gen;
10057
10058                 if (!(btrfs_extent_flags(leaf, ei) &
10059                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10060                         error(
10061                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10062                                 key.objectid, nodesize,
10063                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10064                         err = BACKREF_MISMATCH;
10065                 }
10066                 header_gen = btrfs_header_generation(eb);
10067                 extent_gen = btrfs_extent_generation(leaf, ei);
10068                 if (header_gen != extent_gen) {
10069                         error(
10070         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10071                                 key.objectid, nodesize, header_gen,
10072                                 extent_gen);
10073                         err = BACKREF_MISMATCH;
10074                 }
10075                 if (level != skinny_level) {
10076                         error(
10077                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10078                                 key.objectid, nodesize, level, skinny_level);
10079                         err = BACKREF_MISMATCH;
10080                 }
10081                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10082                         error(
10083                         "extent[%llu %u] is referred by other roots than %llu",
10084                                 key.objectid, nodesize, root->objectid);
10085                         err = BACKREF_MISMATCH;
10086                 }
10087         }
10088
10089         /*
10090          * Iterate the extent/metadata item to find the exact backref
10091          */
10092         item_size = btrfs_item_size_nr(leaf, slot);
10093         ptr = (unsigned long)iref;
10094         end = (unsigned long)ei + item_size;
10095         while (ptr < end) {
10096                 iref = (struct btrfs_extent_inline_ref *)ptr;
10097                 type = btrfs_extent_inline_ref_type(leaf, iref);
10098                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10099
10100                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10101                         (offset == root->objectid || offset == owner)) {
10102                         found_ref = 1;
10103                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10104                         /* Check if the backref points to valid referencer */
10105                         found_ref = !check_tree_block_ref(root, NULL, offset,
10106                                                           level + 1, owner);
10107                 }
10108
10109                 if (found_ref)
10110                         break;
10111                 ptr += btrfs_extent_inline_ref_size(type);
10112         }
10113
10114         /*
10115          * Inlined extent item doesn't have what we need, check
10116          * TREE_BLOCK_REF_KEY
10117          */
10118         if (!found_ref) {
10119                 btrfs_release_path(&path);
10120                 key.objectid = bytenr;
10121                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10122                 key.offset = root->objectid;
10123
10124                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10125                 if (!ret)
10126                         found_ref = 1;
10127         }
10128         if (!found_ref)
10129                 err |= BACKREF_MISSING;
10130 out:
10131         btrfs_release_path(&path);
10132         if (eb && (err & BACKREF_MISSING))
10133                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10134                         bytenr, nodesize, owner, level);
10135         return err;
10136 }
10137
10138 /*
10139  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10140  *
10141  * Return >0 any error found and output error message
10142  * Return 0 for no error found
10143  */
10144 static int check_extent_data_item(struct btrfs_root *root,
10145                                   struct extent_buffer *eb, int slot)
10146 {
10147         struct btrfs_file_extent_item *fi;
10148         struct btrfs_path path;
10149         struct btrfs_root *extent_root = root->fs_info->extent_root;
10150         struct btrfs_key fi_key;
10151         struct btrfs_key dbref_key;
10152         struct extent_buffer *leaf;
10153         struct btrfs_extent_item *ei;
10154         struct btrfs_extent_inline_ref *iref;
10155         struct btrfs_extent_data_ref *dref;
10156         u64 owner;
10157         u64 file_extent_gen;
10158         u64 disk_bytenr;
10159         u64 disk_num_bytes;
10160         u64 extent_num_bytes;
10161         u64 extent_flags;
10162         u64 extent_gen;
10163         u32 item_size;
10164         unsigned long end;
10165         unsigned long ptr;
10166         int type;
10167         u64 ref_root;
10168         int found_dbackref = 0;
10169         int err = 0;
10170         int ret;
10171
10172         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10173         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10174         file_extent_gen = btrfs_file_extent_generation(eb, fi);
10175
10176         /* Nothing to check for hole and inline data extents */
10177         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10178             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10179                 return 0;
10180
10181         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10182         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10183         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10184
10185         /* Check unaligned disk_num_bytes and num_bytes */
10186         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10187                 error(
10188 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10189                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10190                         root->sectorsize);
10191                 err |= BYTES_UNALIGNED;
10192         } else {
10193                 data_bytes_allocated += disk_num_bytes;
10194         }
10195         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10196                 error(
10197 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10198                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10199                         root->sectorsize);
10200                 err |= BYTES_UNALIGNED;
10201         } else {
10202                 data_bytes_referenced += extent_num_bytes;
10203         }
10204         owner = btrfs_header_owner(eb);
10205
10206         /* Check the extent item of the file extent in extent tree */
10207         btrfs_init_path(&path);
10208         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10209         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10210         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10211
10212         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10213         if (ret) {
10214                 err |= BACKREF_MISSING;
10215                 goto error;
10216         }
10217
10218         leaf = path.nodes[0];
10219         slot = path.slots[0];
10220         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10221
10222         extent_flags = btrfs_extent_flags(leaf, ei);
10223         extent_gen = btrfs_extent_generation(leaf, ei);
10224
10225         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10226                 error(
10227                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10228                     disk_bytenr, disk_num_bytes,
10229                     BTRFS_EXTENT_FLAG_DATA);
10230                 err |= BACKREF_MISMATCH;
10231         }
10232
10233         if (file_extent_gen < extent_gen) {
10234                 error(
10235 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
10236                         disk_bytenr, disk_num_bytes, file_extent_gen,
10237                         extent_gen);
10238                 err |= BACKREF_MISMATCH;
10239         }
10240
10241         /* Check data backref inside that extent item */
10242         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10243         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10244         ptr = (unsigned long)iref;
10245         end = (unsigned long)ei + item_size;
10246         while (ptr < end) {
10247                 iref = (struct btrfs_extent_inline_ref *)ptr;
10248                 type = btrfs_extent_inline_ref_type(leaf, iref);
10249                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10250
10251                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10252                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10253                         if (ref_root == owner || ref_root == root->objectid)
10254                                 found_dbackref = 1;
10255                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10256                         found_dbackref = !check_tree_block_ref(root, NULL,
10257                                 btrfs_extent_inline_ref_offset(leaf, iref),
10258                                 0, owner);
10259                 }
10260
10261                 if (found_dbackref)
10262                         break;
10263                 ptr += btrfs_extent_inline_ref_size(type);
10264         }
10265
10266         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
10267         if (!found_dbackref) {
10268                 btrfs_release_path(&path);
10269
10270                 btrfs_init_path(&path);
10271                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10272                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10273                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10274                                 fi_key.objectid, fi_key.offset);
10275
10276                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10277                                         &dbref_key, &path, 0, 0);
10278                 if (!ret)
10279                         found_dbackref = 1;
10280         }
10281
10282         if (!found_dbackref)
10283                 err |= BACKREF_MISSING;
10284 error:
10285         btrfs_release_path(&path);
10286         if (err & BACKREF_MISSING) {
10287                 error("data extent[%llu %llu] backref lost",
10288                       disk_bytenr, disk_num_bytes);
10289         }
10290         return err;
10291 }
10292
10293 /*
10294  * Get real tree block level for the case like shared block
10295  * Return >= 0 as tree level
10296  * Return <0 for error
10297  */
10298 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10299 {
10300         struct extent_buffer *eb;
10301         struct btrfs_path path;
10302         struct btrfs_key key;
10303         struct btrfs_extent_item *ei;
10304         u64 flags;
10305         u64 transid;
10306         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10307         u8 backref_level;
10308         u8 header_level;
10309         int ret;
10310
10311         /* Search extent tree for extent generation and level */
10312         key.objectid = bytenr;
10313         key.type = BTRFS_METADATA_ITEM_KEY;
10314         key.offset = (u64)-1;
10315
10316         btrfs_init_path(&path);
10317         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10318         if (ret < 0)
10319                 goto release_out;
10320         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10321         if (ret < 0)
10322                 goto release_out;
10323         if (ret > 0) {
10324                 ret = -ENOENT;
10325                 goto release_out;
10326         }
10327
10328         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10329         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10330                             struct btrfs_extent_item);
10331         flags = btrfs_extent_flags(path.nodes[0], ei);
10332         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10333                 ret = -ENOENT;
10334                 goto release_out;
10335         }
10336
10337         /* Get transid for later read_tree_block() check */
10338         transid = btrfs_extent_generation(path.nodes[0], ei);
10339
10340         /* Get backref level as one source */
10341         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10342                 backref_level = key.offset;
10343         } else {
10344                 struct btrfs_tree_block_info *info;
10345
10346                 info = (struct btrfs_tree_block_info *)(ei + 1);
10347                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10348         }
10349         btrfs_release_path(&path);
10350
10351         /* Get level from tree block as an alternative source */
10352         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10353         if (!extent_buffer_uptodate(eb)) {
10354                 free_extent_buffer(eb);
10355                 return -EIO;
10356         }
10357         header_level = btrfs_header_level(eb);
10358         free_extent_buffer(eb);
10359
10360         if (header_level != backref_level)
10361                 return -EIO;
10362         return header_level;
10363
10364 release_out:
10365         btrfs_release_path(&path);
10366         return ret;
10367 }
10368
10369 /*
10370  * Check if a tree block backref is valid (points to a valid tree block)
10371  * if level == -1, level will be resolved
10372  * Return >0 for any error found and print error message
10373  */
10374 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10375                                     u64 bytenr, int level)
10376 {
10377         struct btrfs_root *root;
10378         struct btrfs_key key;
10379         struct btrfs_path path;
10380         struct extent_buffer *eb;
10381         struct extent_buffer *node;
10382         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10383         int err = 0;
10384         int ret;
10385
10386         /* Query level for level == -1 special case */
10387         if (level == -1)
10388                 level = query_tree_block_level(fs_info, bytenr);
10389         if (level < 0) {
10390                 err |= REFERENCER_MISSING;
10391                 goto out;
10392         }
10393
10394         key.objectid = root_id;
10395         key.type = BTRFS_ROOT_ITEM_KEY;
10396         key.offset = (u64)-1;
10397
10398         root = btrfs_read_fs_root(fs_info, &key);
10399         if (IS_ERR(root)) {
10400                 err |= REFERENCER_MISSING;
10401                 goto out;
10402         }
10403
10404         /* Read out the tree block to get item/node key */
10405         eb = read_tree_block(root, bytenr, root->nodesize, 0);
10406         if (!extent_buffer_uptodate(eb)) {
10407                 err |= REFERENCER_MISSING;
10408                 free_extent_buffer(eb);
10409                 goto out;
10410         }
10411
10412         /* Empty tree, no need to check key */
10413         if (!btrfs_header_nritems(eb) && !level) {
10414                 free_extent_buffer(eb);
10415                 goto out;
10416         }
10417
10418         if (level)
10419                 btrfs_node_key_to_cpu(eb, &key, 0);
10420         else
10421                 btrfs_item_key_to_cpu(eb, &key, 0);
10422
10423         free_extent_buffer(eb);
10424
10425         btrfs_init_path(&path);
10426         path.lowest_level = level;
10427         /* Search with the first key, to ensure we can reach it */
10428         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10429         if (ret < 0) {
10430                 err |= REFERENCER_MISSING;
10431                 goto release_out;
10432         }
10433
10434         node = path.nodes[level];
10435         if (btrfs_header_bytenr(node) != bytenr) {
10436                 error(
10437         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10438                         bytenr, nodesize, bytenr,
10439                         btrfs_header_bytenr(node));
10440                 err |= REFERENCER_MISMATCH;
10441         }
10442         if (btrfs_header_level(node) != level) {
10443                 error(
10444         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10445                         bytenr, nodesize, level,
10446                         btrfs_header_level(node));
10447                 err |= REFERENCER_MISMATCH;
10448         }
10449
10450 release_out:
10451         btrfs_release_path(&path);
10452 out:
10453         if (err & REFERENCER_MISSING) {
10454                 if (level < 0)
10455                         error("extent [%llu %d] lost referencer (owner: %llu)",
10456                                 bytenr, nodesize, root_id);
10457                 else
10458                         error(
10459                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10460                                 bytenr, nodesize, root_id, level);
10461         }
10462
10463         return err;
10464 }
10465
10466 /*
10467  * Check referencer for shared block backref
10468  * If level == -1, this function will resolve the level.
10469  */
10470 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10471                                      u64 parent, u64 bytenr, int level)
10472 {
10473         struct extent_buffer *eb;
10474         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10475         u32 nr;
10476         int found_parent = 0;
10477         int i;
10478
10479         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10480         if (!extent_buffer_uptodate(eb))
10481                 goto out;
10482
10483         if (level == -1)
10484                 level = query_tree_block_level(fs_info, bytenr);
10485         if (level < 0)
10486                 goto out;
10487
10488         if (level + 1 != btrfs_header_level(eb))
10489                 goto out;
10490
10491         nr = btrfs_header_nritems(eb);
10492         for (i = 0; i < nr; i++) {
10493                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10494                         found_parent = 1;
10495                         break;
10496                 }
10497         }
10498 out:
10499         free_extent_buffer(eb);
10500         if (!found_parent) {
10501                 error(
10502         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10503                         bytenr, nodesize, parent, level);
10504                 return REFERENCER_MISSING;
10505         }
10506         return 0;
10507 }
10508
10509 /*
10510  * Check referencer for normal (inlined) data ref
10511  * If len == 0, it will be resolved by searching in extent tree
10512  */
10513 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10514                                      u64 root_id, u64 objectid, u64 offset,
10515                                      u64 bytenr, u64 len, u32 count)
10516 {
10517         struct btrfs_root *root;
10518         struct btrfs_root *extent_root = fs_info->extent_root;
10519         struct btrfs_key key;
10520         struct btrfs_path path;
10521         struct extent_buffer *leaf;
10522         struct btrfs_file_extent_item *fi;
10523         u32 found_count = 0;
10524         int slot;
10525         int ret = 0;
10526
10527         if (!len) {
10528                 key.objectid = bytenr;
10529                 key.type = BTRFS_EXTENT_ITEM_KEY;
10530                 key.offset = (u64)-1;
10531
10532                 btrfs_init_path(&path);
10533                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10534                 if (ret < 0)
10535                         goto out;
10536                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10537                 if (ret)
10538                         goto out;
10539                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10540                 if (key.objectid != bytenr ||
10541                     key.type != BTRFS_EXTENT_ITEM_KEY)
10542                         goto out;
10543                 len = key.offset;
10544                 btrfs_release_path(&path);
10545         }
10546         key.objectid = root_id;
10547         key.type = BTRFS_ROOT_ITEM_KEY;
10548         key.offset = (u64)-1;
10549         btrfs_init_path(&path);
10550
10551         root = btrfs_read_fs_root(fs_info, &key);
10552         if (IS_ERR(root))
10553                 goto out;
10554
10555         key.objectid = objectid;
10556         key.type = BTRFS_EXTENT_DATA_KEY;
10557         /*
10558          * It can be nasty as data backref offset is
10559          * file offset - file extent offset, which is smaller or
10560          * equal to original backref offset.  The only special case is
10561          * overflow.  So we need to special check and do further search.
10562          */
10563         key.offset = offset & (1ULL << 63) ? 0 : offset;
10564
10565         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10566         if (ret < 0)
10567                 goto out;
10568
10569         /*
10570          * Search afterwards to get correct one
10571          * NOTE: As we must do a comprehensive check on the data backref to
10572          * make sure the dref count also matches, we must iterate all file
10573          * extents for that inode.
10574          */
10575         while (1) {
10576                 leaf = path.nodes[0];
10577                 slot = path.slots[0];
10578
10579                 btrfs_item_key_to_cpu(leaf, &key, slot);
10580                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10581                         break;
10582                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10583                 /*
10584                  * Except normal disk bytenr and disk num bytes, we still
10585                  * need to do extra check on dbackref offset as
10586                  * dbackref offset = file_offset - file_extent_offset
10587                  */
10588                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10589                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10590                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10591                     offset)
10592                         found_count++;
10593
10594                 ret = btrfs_next_item(root, &path);
10595                 if (ret)
10596                         break;
10597         }
10598 out:
10599         btrfs_release_path(&path);
10600         if (found_count != count) {
10601                 error(
10602 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10603                         bytenr, len, root_id, objectid, offset, count, found_count);
10604                 return REFERENCER_MISSING;
10605         }
10606         return 0;
10607 }
10608
10609 /*
10610  * Check if the referencer of a shared data backref exists
10611  */
10612 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10613                                      u64 parent, u64 bytenr)
10614 {
10615         struct extent_buffer *eb;
10616         struct btrfs_key key;
10617         struct btrfs_file_extent_item *fi;
10618         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10619         u32 nr;
10620         int found_parent = 0;
10621         int i;
10622
10623         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10624         if (!extent_buffer_uptodate(eb))
10625                 goto out;
10626
10627         nr = btrfs_header_nritems(eb);
10628         for (i = 0; i < nr; i++) {
10629                 btrfs_item_key_to_cpu(eb, &key, i);
10630                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10631                         continue;
10632
10633                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10634                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10635                         continue;
10636
10637                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10638                         found_parent = 1;
10639                         break;
10640                 }
10641         }
10642
10643 out:
10644         free_extent_buffer(eb);
10645         if (!found_parent) {
10646                 error("shared extent %llu referencer lost (parent: %llu)",
10647                         bytenr, parent);
10648                 return REFERENCER_MISSING;
10649         }
10650         return 0;
10651 }
10652
10653 /*
10654  * This function will check a given extent item, including its backref and
10655  * itself (like crossing stripe boundary and type)
10656  *
10657  * Since we don't use extent_record anymore, introduce new error bit
10658  */
10659 static int check_extent_item(struct btrfs_fs_info *fs_info,
10660                              struct extent_buffer *eb, int slot)
10661 {
10662         struct btrfs_extent_item *ei;
10663         struct btrfs_extent_inline_ref *iref;
10664         struct btrfs_extent_data_ref *dref;
10665         unsigned long end;
10666         unsigned long ptr;
10667         int type;
10668         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10669         u32 item_size = btrfs_item_size_nr(eb, slot);
10670         u64 flags;
10671         u64 offset;
10672         int metadata = 0;
10673         int level;
10674         struct btrfs_key key;
10675         int ret;
10676         int err = 0;
10677
10678         btrfs_item_key_to_cpu(eb, &key, slot);
10679         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10680                 bytes_used += key.offset;
10681         else
10682                 bytes_used += nodesize;
10683
10684         if (item_size < sizeof(*ei)) {
10685                 /*
10686                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10687                  * old thing when on disk format is still un-determined.
10688                  * No need to care about it anymore
10689                  */
10690                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10691                 return -ENOTTY;
10692         }
10693
10694         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10695         flags = btrfs_extent_flags(eb, ei);
10696
10697         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10698                 metadata = 1;
10699         if (metadata && check_crossing_stripes(global_info, key.objectid,
10700                                                eb->len)) {
10701                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10702                       key.objectid, key.objectid + nodesize);
10703                 err |= CROSSING_STRIPE_BOUNDARY;
10704         }
10705
10706         ptr = (unsigned long)(ei + 1);
10707
10708         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10709                 /* Old EXTENT_ITEM metadata */
10710                 struct btrfs_tree_block_info *info;
10711
10712                 info = (struct btrfs_tree_block_info *)ptr;
10713                 level = btrfs_tree_block_level(eb, info);
10714                 ptr += sizeof(struct btrfs_tree_block_info);
10715         } else {
10716                 /* New METADATA_ITEM */
10717                 level = key.offset;
10718         }
10719         end = (unsigned long)ei + item_size;
10720
10721         if (ptr >= end) {
10722                 err |= ITEM_SIZE_MISMATCH;
10723                 goto out;
10724         }
10725
10726         /* Now check every backref in this extent item */
10727 next:
10728         iref = (struct btrfs_extent_inline_ref *)ptr;
10729         type = btrfs_extent_inline_ref_type(eb, iref);
10730         offset = btrfs_extent_inline_ref_offset(eb, iref);
10731         switch (type) {
10732         case BTRFS_TREE_BLOCK_REF_KEY:
10733                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10734                                                level);
10735                 err |= ret;
10736                 break;
10737         case BTRFS_SHARED_BLOCK_REF_KEY:
10738                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10739                                                  level);
10740                 err |= ret;
10741                 break;
10742         case BTRFS_EXTENT_DATA_REF_KEY:
10743                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10744                 ret = check_extent_data_backref(fs_info,
10745                                 btrfs_extent_data_ref_root(eb, dref),
10746                                 btrfs_extent_data_ref_objectid(eb, dref),
10747                                 btrfs_extent_data_ref_offset(eb, dref),
10748                                 key.objectid, key.offset,
10749                                 btrfs_extent_data_ref_count(eb, dref));
10750                 err |= ret;
10751                 break;
10752         case BTRFS_SHARED_DATA_REF_KEY:
10753                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10754                 err |= ret;
10755                 break;
10756         default:
10757                 error("extent[%llu %d %llu] has unknown ref type: %d",
10758                         key.objectid, key.type, key.offset, type);
10759                 err |= UNKNOWN_TYPE;
10760                 goto out;
10761         }
10762
10763         ptr += btrfs_extent_inline_ref_size(type);
10764         if (ptr < end)
10765                 goto next;
10766
10767 out:
10768         return err;
10769 }
10770
10771 /*
10772  * Check if a dev extent item is referred correctly by its chunk
10773  */
10774 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10775                                  struct extent_buffer *eb, int slot)
10776 {
10777         struct btrfs_root *chunk_root = fs_info->chunk_root;
10778         struct btrfs_dev_extent *ptr;
10779         struct btrfs_path path;
10780         struct btrfs_key chunk_key;
10781         struct btrfs_key devext_key;
10782         struct btrfs_chunk *chunk;
10783         struct extent_buffer *l;
10784         int num_stripes;
10785         u64 length;
10786         int i;
10787         int found_chunk = 0;
10788         int ret;
10789
10790         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10791         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10792         length = btrfs_dev_extent_length(eb, ptr);
10793
10794         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10795         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10796         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10797
10798         btrfs_init_path(&path);
10799         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10800         if (ret)
10801                 goto out;
10802
10803         l = path.nodes[0];
10804         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10805         if (btrfs_chunk_length(l, chunk) != length)
10806                 goto out;
10807
10808         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10809         for (i = 0; i < num_stripes; i++) {
10810                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10811                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10812
10813                 if (devid == devext_key.objectid &&
10814                     offset == devext_key.offset) {
10815                         found_chunk = 1;
10816                         break;
10817                 }
10818         }
10819 out:
10820         btrfs_release_path(&path);
10821         if (!found_chunk) {
10822                 error(
10823                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10824                         devext_key.objectid, devext_key.offset, length);
10825                 return REFERENCER_MISSING;
10826         }
10827         return 0;
10828 }
10829
10830 /*
10831  * Check if the used space is correct with the dev item
10832  */
10833 static int check_dev_item(struct btrfs_fs_info *fs_info,
10834                           struct extent_buffer *eb, int slot)
10835 {
10836         struct btrfs_root *dev_root = fs_info->dev_root;
10837         struct btrfs_dev_item *dev_item;
10838         struct btrfs_path path;
10839         struct btrfs_key key;
10840         struct btrfs_dev_extent *ptr;
10841         u64 dev_id;
10842         u64 used;
10843         u64 total = 0;
10844         int ret;
10845
10846         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10847         dev_id = btrfs_device_id(eb, dev_item);
10848         used = btrfs_device_bytes_used(eb, dev_item);
10849
10850         key.objectid = dev_id;
10851         key.type = BTRFS_DEV_EXTENT_KEY;
10852         key.offset = 0;
10853
10854         btrfs_init_path(&path);
10855         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10856         if (ret < 0) {
10857                 btrfs_item_key_to_cpu(eb, &key, slot);
10858                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10859                         key.objectid, key.type, key.offset);
10860                 btrfs_release_path(&path);
10861                 return REFERENCER_MISSING;
10862         }
10863
10864         /* Iterate dev_extents to calculate the used space of a device */
10865         while (1) {
10866                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10867
10868                 if (key.objectid > dev_id)
10869                         break;
10870                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10871                         goto next;
10872
10873                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10874                                      struct btrfs_dev_extent);
10875                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10876 next:
10877                 ret = btrfs_next_item(dev_root, &path);
10878                 if (ret)
10879                         break;
10880         }
10881         btrfs_release_path(&path);
10882
10883         if (used != total) {
10884                 btrfs_item_key_to_cpu(eb, &key, slot);
10885                 error(
10886 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10887                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10888                         BTRFS_DEV_EXTENT_KEY, dev_id);
10889                 return ACCOUNTING_MISMATCH;
10890         }
10891         return 0;
10892 }
10893
10894 /*
10895  * Check a block group item with its referener (chunk) and its used space
10896  * with extent/metadata item
10897  */
10898 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10899                                   struct extent_buffer *eb, int slot)
10900 {
10901         struct btrfs_root *extent_root = fs_info->extent_root;
10902         struct btrfs_root *chunk_root = fs_info->chunk_root;
10903         struct btrfs_block_group_item *bi;
10904         struct btrfs_block_group_item bg_item;
10905         struct btrfs_path path;
10906         struct btrfs_key bg_key;
10907         struct btrfs_key chunk_key;
10908         struct btrfs_key extent_key;
10909         struct btrfs_chunk *chunk;
10910         struct extent_buffer *leaf;
10911         struct btrfs_extent_item *ei;
10912         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10913         u64 flags;
10914         u64 bg_flags;
10915         u64 used;
10916         u64 total = 0;
10917         int ret;
10918         int err = 0;
10919
10920         btrfs_item_key_to_cpu(eb, &bg_key, slot);
10921         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10922         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10923         used = btrfs_block_group_used(&bg_item);
10924         bg_flags = btrfs_block_group_flags(&bg_item);
10925
10926         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10927         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10928         chunk_key.offset = bg_key.objectid;
10929
10930         btrfs_init_path(&path);
10931         /* Search for the referencer chunk */
10932         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10933         if (ret) {
10934                 error(
10935                 "block group[%llu %llu] did not find the related chunk item",
10936                         bg_key.objectid, bg_key.offset);
10937                 err |= REFERENCER_MISSING;
10938         } else {
10939                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10940                                         struct btrfs_chunk);
10941                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10942                                                 bg_key.offset) {
10943                         error(
10944         "block group[%llu %llu] related chunk item length does not match",
10945                                 bg_key.objectid, bg_key.offset);
10946                         err |= REFERENCER_MISMATCH;
10947                 }
10948         }
10949         btrfs_release_path(&path);
10950
10951         /* Search from the block group bytenr */
10952         extent_key.objectid = bg_key.objectid;
10953         extent_key.type = 0;
10954         extent_key.offset = 0;
10955
10956         btrfs_init_path(&path);
10957         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10958         if (ret < 0)
10959                 goto out;
10960
10961         /* Iterate extent tree to account used space */
10962         while (1) {
10963                 leaf = path.nodes[0];
10964                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
10965                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
10966                         break;
10967
10968                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
10969                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
10970                         goto next;
10971                 if (extent_key.objectid < bg_key.objectid)
10972                         goto next;
10973
10974                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
10975                         total += nodesize;
10976                 else
10977                         total += extent_key.offset;
10978
10979                 ei = btrfs_item_ptr(leaf, path.slots[0],
10980                                     struct btrfs_extent_item);
10981                 flags = btrfs_extent_flags(leaf, ei);
10982                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
10983                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
10984                                 error(
10985                         "bad extent[%llu, %llu) type mismatch with chunk",
10986                                         extent_key.objectid,
10987                                         extent_key.objectid + extent_key.offset);
10988                                 err |= CHUNK_TYPE_MISMATCH;
10989                         }
10990                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
10991                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
10992                                     BTRFS_BLOCK_GROUP_METADATA))) {
10993                                 error(
10994                         "bad extent[%llu, %llu) type mismatch with chunk",
10995                                         extent_key.objectid,
10996                                         extent_key.objectid + nodesize);
10997                                 err |= CHUNK_TYPE_MISMATCH;
10998                         }
10999                 }
11000 next:
11001                 ret = btrfs_next_item(extent_root, &path);
11002                 if (ret)
11003                         break;
11004         }
11005
11006 out:
11007         btrfs_release_path(&path);
11008
11009         if (total != used) {
11010                 error(
11011                 "block group[%llu %llu] used %llu but extent items used %llu",
11012                         bg_key.objectid, bg_key.offset, used, total);
11013                 err |= ACCOUNTING_MISMATCH;
11014         }
11015         return err;
11016 }
11017
11018 /*
11019  * Check a chunk item.
11020  * Including checking all referred dev_extents and block group
11021  */
11022 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11023                             struct extent_buffer *eb, int slot)
11024 {
11025         struct btrfs_root *extent_root = fs_info->extent_root;
11026         struct btrfs_root *dev_root = fs_info->dev_root;
11027         struct btrfs_path path;
11028         struct btrfs_key chunk_key;
11029         struct btrfs_key bg_key;
11030         struct btrfs_key devext_key;
11031         struct btrfs_chunk *chunk;
11032         struct extent_buffer *leaf;
11033         struct btrfs_block_group_item *bi;
11034         struct btrfs_block_group_item bg_item;
11035         struct btrfs_dev_extent *ptr;
11036         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11037         u64 length;
11038         u64 chunk_end;
11039         u64 type;
11040         u64 profile;
11041         int num_stripes;
11042         u64 offset;
11043         u64 objectid;
11044         int i;
11045         int ret;
11046         int err = 0;
11047
11048         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11049         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11050         length = btrfs_chunk_length(eb, chunk);
11051         chunk_end = chunk_key.offset + length;
11052         if (!IS_ALIGNED(length, sectorsize)) {
11053                 error("chunk[%llu %llu) not aligned to %u",
11054                         chunk_key.offset, chunk_end, sectorsize);
11055                 err |= BYTES_UNALIGNED;
11056                 goto out;
11057         }
11058
11059         type = btrfs_chunk_type(eb, chunk);
11060         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11061         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11062                 error("chunk[%llu %llu) has no chunk type",
11063                         chunk_key.offset, chunk_end);
11064                 err |= UNKNOWN_TYPE;
11065         }
11066         if (profile && (profile & (profile - 1))) {
11067                 error("chunk[%llu %llu) multiple profiles detected: %llx",
11068                         chunk_key.offset, chunk_end, profile);
11069                 err |= UNKNOWN_TYPE;
11070         }
11071
11072         bg_key.objectid = chunk_key.offset;
11073         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11074         bg_key.offset = length;
11075
11076         btrfs_init_path(&path);
11077         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11078         if (ret) {
11079                 error(
11080                 "chunk[%llu %llu) did not find the related block group item",
11081                         chunk_key.offset, chunk_end);
11082                 err |= REFERENCER_MISSING;
11083         } else{
11084                 leaf = path.nodes[0];
11085                 bi = btrfs_item_ptr(leaf, path.slots[0],
11086                                     struct btrfs_block_group_item);
11087                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11088                                    sizeof(bg_item));
11089                 if (btrfs_block_group_flags(&bg_item) != type) {
11090                         error(
11091 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11092                                 chunk_key.offset, chunk_end, type,
11093                                 btrfs_block_group_flags(&bg_item));
11094                         err |= REFERENCER_MISSING;
11095                 }
11096         }
11097
11098         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11099         for (i = 0; i < num_stripes; i++) {
11100                 btrfs_release_path(&path);
11101                 btrfs_init_path(&path);
11102                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11103                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11104                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11105
11106                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11107                                         0, 0);
11108                 if (ret)
11109                         goto not_match_dev;
11110
11111                 leaf = path.nodes[0];
11112                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11113                                      struct btrfs_dev_extent);
11114                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11115                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11116                 if (objectid != chunk_key.objectid ||
11117                     offset != chunk_key.offset ||
11118                     btrfs_dev_extent_length(leaf, ptr) != length)
11119                         goto not_match_dev;
11120                 continue;
11121 not_match_dev:
11122                 err |= BACKREF_MISSING;
11123                 error(
11124                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11125                         chunk_key.objectid, chunk_end, i);
11126                 continue;
11127         }
11128         btrfs_release_path(&path);
11129 out:
11130         return err;
11131 }
11132
11133 /*
11134  * Main entry function to check known items and update related accounting info
11135  */
11136 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11137 {
11138         struct btrfs_fs_info *fs_info = root->fs_info;
11139         struct btrfs_key key;
11140         int slot = 0;
11141         int type;
11142         struct btrfs_extent_data_ref *dref;
11143         int ret;
11144         int err = 0;
11145
11146 next:
11147         btrfs_item_key_to_cpu(eb, &key, slot);
11148         type = key.type;
11149
11150         switch (type) {
11151         case BTRFS_EXTENT_DATA_KEY:
11152                 ret = check_extent_data_item(root, eb, slot);
11153                 err |= ret;
11154                 break;
11155         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11156                 ret = check_block_group_item(fs_info, eb, slot);
11157                 err |= ret;
11158                 break;
11159         case BTRFS_DEV_ITEM_KEY:
11160                 ret = check_dev_item(fs_info, eb, slot);
11161                 err |= ret;
11162                 break;
11163         case BTRFS_CHUNK_ITEM_KEY:
11164                 ret = check_chunk_item(fs_info, eb, slot);
11165                 err |= ret;
11166                 break;
11167         case BTRFS_DEV_EXTENT_KEY:
11168                 ret = check_dev_extent_item(fs_info, eb, slot);
11169                 err |= ret;
11170                 break;
11171         case BTRFS_EXTENT_ITEM_KEY:
11172         case BTRFS_METADATA_ITEM_KEY:
11173                 ret = check_extent_item(fs_info, eb, slot);
11174                 err |= ret;
11175                 break;
11176         case BTRFS_EXTENT_CSUM_KEY:
11177                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11178                 break;
11179         case BTRFS_TREE_BLOCK_REF_KEY:
11180                 ret = check_tree_block_backref(fs_info, key.offset,
11181                                                key.objectid, -1);
11182                 err |= ret;
11183                 break;
11184         case BTRFS_EXTENT_DATA_REF_KEY:
11185                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11186                 ret = check_extent_data_backref(fs_info,
11187                                 btrfs_extent_data_ref_root(eb, dref),
11188                                 btrfs_extent_data_ref_objectid(eb, dref),
11189                                 btrfs_extent_data_ref_offset(eb, dref),
11190                                 key.objectid, 0,
11191                                 btrfs_extent_data_ref_count(eb, dref));
11192                 err |= ret;
11193                 break;
11194         case BTRFS_SHARED_BLOCK_REF_KEY:
11195                 ret = check_shared_block_backref(fs_info, key.offset,
11196                                                  key.objectid, -1);
11197                 err |= ret;
11198                 break;
11199         case BTRFS_SHARED_DATA_REF_KEY:
11200                 ret = check_shared_data_backref(fs_info, key.offset,
11201                                                 key.objectid);
11202                 err |= ret;
11203                 break;
11204         default:
11205                 break;
11206         }
11207
11208         if (++slot < btrfs_header_nritems(eb))
11209                 goto next;
11210
11211         return err;
11212 }
11213
11214 /*
11215  * Helper function for later fs/subvol tree check.  To determine if a tree
11216  * block should be checked.
11217  * This function will ensure only the direct referencer with lowest rootid to
11218  * check a fs/subvolume tree block.
11219  *
11220  * Backref check at extent tree would detect errors like missing subvolume
11221  * tree, so we can do aggressive check to reduce duplicated checks.
11222  */
11223 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11224 {
11225         struct btrfs_root *extent_root = root->fs_info->extent_root;
11226         struct btrfs_key key;
11227         struct btrfs_path path;
11228         struct extent_buffer *leaf;
11229         int slot;
11230         struct btrfs_extent_item *ei;
11231         unsigned long ptr;
11232         unsigned long end;
11233         int type;
11234         u32 item_size;
11235         u64 offset;
11236         struct btrfs_extent_inline_ref *iref;
11237         int ret;
11238
11239         btrfs_init_path(&path);
11240         key.objectid = btrfs_header_bytenr(eb);
11241         key.type = BTRFS_METADATA_ITEM_KEY;
11242         key.offset = (u64)-1;
11243
11244         /*
11245          * Any failure in backref resolving means we can't determine
11246          * whom the tree block belongs to.
11247          * So in that case, we need to check that tree block
11248          */
11249         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11250         if (ret < 0)
11251                 goto need_check;
11252
11253         ret = btrfs_previous_extent_item(extent_root, &path,
11254                                          btrfs_header_bytenr(eb));
11255         if (ret)
11256                 goto need_check;
11257
11258         leaf = path.nodes[0];
11259         slot = path.slots[0];
11260         btrfs_item_key_to_cpu(leaf, &key, slot);
11261         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11262
11263         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11264                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11265         } else {
11266                 struct btrfs_tree_block_info *info;
11267
11268                 info = (struct btrfs_tree_block_info *)(ei + 1);
11269                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11270         }
11271
11272         item_size = btrfs_item_size_nr(leaf, slot);
11273         ptr = (unsigned long)iref;
11274         end = (unsigned long)ei + item_size;
11275         while (ptr < end) {
11276                 iref = (struct btrfs_extent_inline_ref *)ptr;
11277                 type = btrfs_extent_inline_ref_type(leaf, iref);
11278                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11279
11280                 /*
11281                  * We only check the tree block if current root is
11282                  * the lowest referencer of it.
11283                  */
11284                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11285                     offset < root->objectid) {
11286                         btrfs_release_path(&path);
11287                         return 0;
11288                 }
11289
11290                 ptr += btrfs_extent_inline_ref_size(type);
11291         }
11292         /*
11293          * Normally we should also check keyed tree block ref, but that may be
11294          * very time consuming.  Inlined ref should already make us skip a lot
11295          * of refs now.  So skip search keyed tree block ref.
11296          */
11297
11298 need_check:
11299         btrfs_release_path(&path);
11300         return 1;
11301 }
11302
11303 /*
11304  * Traversal function for tree block. We will do:
11305  * 1) Skip shared fs/subvolume tree blocks
11306  * 2) Update related bytes accounting
11307  * 3) Pre-order traversal
11308  */
11309 static int traverse_tree_block(struct btrfs_root *root,
11310                                 struct extent_buffer *node)
11311 {
11312         struct extent_buffer *eb;
11313         struct btrfs_key key;
11314         struct btrfs_key drop_key;
11315         int level;
11316         u64 nr;
11317         int i;
11318         int err = 0;
11319         int ret;
11320
11321         /*
11322          * Skip shared fs/subvolume tree block, in that case they will
11323          * be checked by referencer with lowest rootid
11324          */
11325         if (is_fstree(root->objectid) && !should_check(root, node))
11326                 return 0;
11327
11328         /* Update bytes accounting */
11329         total_btree_bytes += node->len;
11330         if (fs_root_objectid(btrfs_header_owner(node)))
11331                 total_fs_tree_bytes += node->len;
11332         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11333                 total_extent_tree_bytes += node->len;
11334         if (!found_old_backref &&
11335             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11336             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11337             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11338                 found_old_backref = 1;
11339
11340         /* pre-order tranversal, check itself first */
11341         level = btrfs_header_level(node);
11342         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11343                                    btrfs_header_level(node),
11344                                    btrfs_header_owner(node));
11345         err |= ret;
11346         if (err)
11347                 error(
11348         "check %s failed root %llu bytenr %llu level %d, force continue check",
11349                         level ? "node":"leaf", root->objectid,
11350                         btrfs_header_bytenr(node), btrfs_header_level(node));
11351
11352         if (!level) {
11353                 btree_space_waste += btrfs_leaf_free_space(root, node);
11354                 ret = check_leaf_items(root, node);
11355                 err |= ret;
11356                 return err;
11357         }
11358
11359         nr = btrfs_header_nritems(node);
11360         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11361         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11362                 sizeof(struct btrfs_key_ptr);
11363
11364         /* Then check all its children */
11365         for (i = 0; i < nr; i++) {
11366                 u64 blocknr = btrfs_node_blockptr(node, i);
11367
11368                 btrfs_node_key_to_cpu(node, &key, i);
11369                 if (level == root->root_item.drop_level &&
11370                     is_dropped_key(&key, &drop_key))
11371                         continue;
11372
11373                 /*
11374                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11375                  * to call the function itself.
11376                  */
11377                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11378                 if (extent_buffer_uptodate(eb)) {
11379                         ret = traverse_tree_block(root, eb);
11380                         err |= ret;
11381                 }
11382                 free_extent_buffer(eb);
11383         }
11384
11385         return err;
11386 }
11387
11388 /*
11389  * Low memory usage version check_chunks_and_extents.
11390  */
11391 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11392 {
11393         struct btrfs_path path;
11394         struct btrfs_key key;
11395         struct btrfs_root *root1;
11396         struct btrfs_root *cur_root;
11397         int err = 0;
11398         int ret;
11399
11400         root1 = root->fs_info->chunk_root;
11401         ret = traverse_tree_block(root1, root1->node);
11402         err |= ret;
11403
11404         root1 = root->fs_info->tree_root;
11405         ret = traverse_tree_block(root1, root1->node);
11406         err |= ret;
11407
11408         btrfs_init_path(&path);
11409         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11410         key.offset = 0;
11411         key.type = BTRFS_ROOT_ITEM_KEY;
11412
11413         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11414         if (ret) {
11415                 error("cannot find extent treet in tree_root");
11416                 goto out;
11417         }
11418
11419         while (1) {
11420                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11421                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11422                         goto next;
11423                 key.offset = (u64)-1;
11424
11425                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11426                 if (IS_ERR(cur_root) || !cur_root) {
11427                         error("failed to read tree: %lld", key.objectid);
11428                         goto next;
11429                 }
11430
11431                 ret = traverse_tree_block(cur_root, cur_root->node);
11432                 err |= ret;
11433
11434 next:
11435                 ret = btrfs_next_item(root1, &path);
11436                 if (ret)
11437                         goto out;
11438         }
11439
11440 out:
11441         btrfs_release_path(&path);
11442         return err;
11443 }
11444
11445 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11446                            struct btrfs_root *root, int overwrite)
11447 {
11448         struct extent_buffer *c;
11449         struct extent_buffer *old = root->node;
11450         int level;
11451         int ret;
11452         struct btrfs_disk_key disk_key = {0,0,0};
11453
11454         level = 0;
11455
11456         if (overwrite) {
11457                 c = old;
11458                 extent_buffer_get(c);
11459                 goto init;
11460         }
11461         c = btrfs_alloc_free_block(trans, root,
11462                                    root->nodesize,
11463                                    root->root_key.objectid,
11464                                    &disk_key, level, 0, 0);
11465         if (IS_ERR(c)) {
11466                 c = old;
11467                 extent_buffer_get(c);
11468                 overwrite = 1;
11469         }
11470 init:
11471         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11472         btrfs_set_header_level(c, level);
11473         btrfs_set_header_bytenr(c, c->start);
11474         btrfs_set_header_generation(c, trans->transid);
11475         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11476         btrfs_set_header_owner(c, root->root_key.objectid);
11477
11478         write_extent_buffer(c, root->fs_info->fsid,
11479                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11480
11481         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11482                             btrfs_header_chunk_tree_uuid(c),
11483                             BTRFS_UUID_SIZE);
11484
11485         btrfs_mark_buffer_dirty(c);
11486         /*
11487          * this case can happen in the following case:
11488          *
11489          * 1.overwrite previous root.
11490          *
11491          * 2.reinit reloc data root, this is because we skip pin
11492          * down reloc data tree before which means we can allocate
11493          * same block bytenr here.
11494          */
11495         if (old->start == c->start) {
11496                 btrfs_set_root_generation(&root->root_item,
11497                                           trans->transid);
11498                 root->root_item.level = btrfs_header_level(root->node);
11499                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11500                                         &root->root_key, &root->root_item);
11501                 if (ret) {
11502                         free_extent_buffer(c);
11503                         return ret;
11504                 }
11505         }
11506         free_extent_buffer(old);
11507         root->node = c;
11508         add_root_to_dirty_list(root);
11509         return 0;
11510 }
11511
11512 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11513                                 struct extent_buffer *eb, int tree_root)
11514 {
11515         struct extent_buffer *tmp;
11516         struct btrfs_root_item *ri;
11517         struct btrfs_key key;
11518         u64 bytenr;
11519         u32 nodesize;
11520         int level = btrfs_header_level(eb);
11521         int nritems;
11522         int ret;
11523         int i;
11524
11525         /*
11526          * If we have pinned this block before, don't pin it again.
11527          * This can not only avoid forever loop with broken filesystem
11528          * but also give us some speedups.
11529          */
11530         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11531                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11532                 return 0;
11533
11534         btrfs_pin_extent(fs_info, eb->start, eb->len);
11535
11536         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11537         nritems = btrfs_header_nritems(eb);
11538         for (i = 0; i < nritems; i++) {
11539                 if (level == 0) {
11540                         btrfs_item_key_to_cpu(eb, &key, i);
11541                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11542                                 continue;
11543                         /* Skip the extent root and reloc roots */
11544                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11545                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11546                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11547                                 continue;
11548                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11549                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11550
11551                         /*
11552                          * If at any point we start needing the real root we
11553                          * will have to build a stump root for the root we are
11554                          * in, but for now this doesn't actually use the root so
11555                          * just pass in extent_root.
11556                          */
11557                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11558                                               nodesize, 0);
11559                         if (!extent_buffer_uptodate(tmp)) {
11560                                 fprintf(stderr, "Error reading root block\n");
11561                                 return -EIO;
11562                         }
11563                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11564                         free_extent_buffer(tmp);
11565                         if (ret)
11566                                 return ret;
11567                 } else {
11568                         bytenr = btrfs_node_blockptr(eb, i);
11569
11570                         /* If we aren't the tree root don't read the block */
11571                         if (level == 1 && !tree_root) {
11572                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11573                                 continue;
11574                         }
11575
11576                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11577                                               nodesize, 0);
11578                         if (!extent_buffer_uptodate(tmp)) {
11579                                 fprintf(stderr, "Error reading tree block\n");
11580                                 return -EIO;
11581                         }
11582                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11583                         free_extent_buffer(tmp);
11584                         if (ret)
11585                                 return ret;
11586                 }
11587         }
11588
11589         return 0;
11590 }
11591
11592 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11593 {
11594         int ret;
11595
11596         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11597         if (ret)
11598                 return ret;
11599
11600         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11601 }
11602
11603 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11604 {
11605         struct btrfs_block_group_cache *cache;
11606         struct btrfs_path path;
11607         struct extent_buffer *leaf;
11608         struct btrfs_chunk *chunk;
11609         struct btrfs_key key;
11610         int ret;
11611         u64 start;
11612
11613         btrfs_init_path(&path);
11614         key.objectid = 0;
11615         key.type = BTRFS_CHUNK_ITEM_KEY;
11616         key.offset = 0;
11617         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11618         if (ret < 0) {
11619                 btrfs_release_path(&path);
11620                 return ret;
11621         }
11622
11623         /*
11624          * We do this in case the block groups were screwed up and had alloc
11625          * bits that aren't actually set on the chunks.  This happens with
11626          * restored images every time and could happen in real life I guess.
11627          */
11628         fs_info->avail_data_alloc_bits = 0;
11629         fs_info->avail_metadata_alloc_bits = 0;
11630         fs_info->avail_system_alloc_bits = 0;
11631
11632         /* First we need to create the in-memory block groups */
11633         while (1) {
11634                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11635                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11636                         if (ret < 0) {
11637                                 btrfs_release_path(&path);
11638                                 return ret;
11639                         }
11640                         if (ret) {
11641                                 ret = 0;
11642                                 break;
11643                         }
11644                 }
11645                 leaf = path.nodes[0];
11646                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11647                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11648                         path.slots[0]++;
11649                         continue;
11650                 }
11651
11652                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11653                 btrfs_add_block_group(fs_info, 0,
11654                                       btrfs_chunk_type(leaf, chunk),
11655                                       key.objectid, key.offset,
11656                                       btrfs_chunk_length(leaf, chunk));
11657                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11658                                  key.offset + btrfs_chunk_length(leaf, chunk),
11659                                  GFP_NOFS);
11660                 path.slots[0]++;
11661         }
11662         start = 0;
11663         while (1) {
11664                 cache = btrfs_lookup_first_block_group(fs_info, start);
11665                 if (!cache)
11666                         break;
11667                 cache->cached = 1;
11668                 start = cache->key.objectid + cache->key.offset;
11669         }
11670
11671         btrfs_release_path(&path);
11672         return 0;
11673 }
11674
11675 static int reset_balance(struct btrfs_trans_handle *trans,
11676                          struct btrfs_fs_info *fs_info)
11677 {
11678         struct btrfs_root *root = fs_info->tree_root;
11679         struct btrfs_path path;
11680         struct extent_buffer *leaf;
11681         struct btrfs_key key;
11682         int del_slot, del_nr = 0;
11683         int ret;
11684         int found = 0;
11685
11686         btrfs_init_path(&path);
11687         key.objectid = BTRFS_BALANCE_OBJECTID;
11688         key.type = BTRFS_BALANCE_ITEM_KEY;
11689         key.offset = 0;
11690         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11691         if (ret) {
11692                 if (ret > 0)
11693                         ret = 0;
11694                 if (!ret)
11695                         goto reinit_data_reloc;
11696                 else
11697                         goto out;
11698         }
11699
11700         ret = btrfs_del_item(trans, root, &path);
11701         if (ret)
11702                 goto out;
11703         btrfs_release_path(&path);
11704
11705         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11706         key.type = BTRFS_ROOT_ITEM_KEY;
11707         key.offset = 0;
11708         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11709         if (ret < 0)
11710                 goto out;
11711         while (1) {
11712                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11713                         if (!found)
11714                                 break;
11715
11716                         if (del_nr) {
11717                                 ret = btrfs_del_items(trans, root, &path,
11718                                                       del_slot, del_nr);
11719                                 del_nr = 0;
11720                                 if (ret)
11721                                         goto out;
11722                         }
11723                         key.offset++;
11724                         btrfs_release_path(&path);
11725
11726                         found = 0;
11727                         ret = btrfs_search_slot(trans, root, &key, &path,
11728                                                 -1, 1);
11729                         if (ret < 0)
11730                                 goto out;
11731                         continue;
11732                 }
11733                 found = 1;
11734                 leaf = path.nodes[0];
11735                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11736                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11737                         break;
11738                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11739                         path.slots[0]++;
11740                         continue;
11741                 }
11742                 if (!del_nr) {
11743                         del_slot = path.slots[0];
11744                         del_nr = 1;
11745                 } else {
11746                         del_nr++;
11747                 }
11748                 path.slots[0]++;
11749         }
11750
11751         if (del_nr) {
11752                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11753                 if (ret)
11754                         goto out;
11755         }
11756         btrfs_release_path(&path);
11757
11758 reinit_data_reloc:
11759         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11760         key.type = BTRFS_ROOT_ITEM_KEY;
11761         key.offset = (u64)-1;
11762         root = btrfs_read_fs_root(fs_info, &key);
11763         if (IS_ERR(root)) {
11764                 fprintf(stderr, "Error reading data reloc tree\n");
11765                 ret = PTR_ERR(root);
11766                 goto out;
11767         }
11768         record_root_in_trans(trans, root);
11769         ret = btrfs_fsck_reinit_root(trans, root, 0);
11770         if (ret)
11771                 goto out;
11772         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11773 out:
11774         btrfs_release_path(&path);
11775         return ret;
11776 }
11777
11778 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11779                               struct btrfs_fs_info *fs_info)
11780 {
11781         u64 start = 0;
11782         int ret;
11783
11784         /*
11785          * The only reason we don't do this is because right now we're just
11786          * walking the trees we find and pinning down their bytes, we don't look
11787          * at any of the leaves.  In order to do mixed groups we'd have to check
11788          * the leaves of any fs roots and pin down the bytes for any file
11789          * extents we find.  Not hard but why do it if we don't have to?
11790          */
11791         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
11792                 fprintf(stderr, "We don't support re-initing the extent tree "
11793                         "for mixed block groups yet, please notify a btrfs "
11794                         "developer you want to do this so they can add this "
11795                         "functionality.\n");
11796                 return -EINVAL;
11797         }
11798
11799         /*
11800          * first we need to walk all of the trees except the extent tree and pin
11801          * down the bytes that are in use so we don't overwrite any existing
11802          * metadata.
11803          */
11804         ret = pin_metadata_blocks(fs_info);
11805         if (ret) {
11806                 fprintf(stderr, "error pinning down used bytes\n");
11807                 return ret;
11808         }
11809
11810         /*
11811          * Need to drop all the block groups since we're going to recreate all
11812          * of them again.
11813          */
11814         btrfs_free_block_groups(fs_info);
11815         ret = reset_block_groups(fs_info);
11816         if (ret) {
11817                 fprintf(stderr, "error resetting the block groups\n");
11818                 return ret;
11819         }
11820
11821         /* Ok we can allocate now, reinit the extent root */
11822         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11823         if (ret) {
11824                 fprintf(stderr, "extent root initialization failed\n");
11825                 /*
11826                  * When the transaction code is updated we should end the
11827                  * transaction, but for now progs only knows about commit so
11828                  * just return an error.
11829                  */
11830                 return ret;
11831         }
11832
11833         /*
11834          * Now we have all the in-memory block groups setup so we can make
11835          * allocations properly, and the metadata we care about is safe since we
11836          * pinned all of it above.
11837          */
11838         while (1) {
11839                 struct btrfs_block_group_cache *cache;
11840
11841                 cache = btrfs_lookup_first_block_group(fs_info, start);
11842                 if (!cache)
11843                         break;
11844                 start = cache->key.objectid + cache->key.offset;
11845                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11846                                         &cache->key, &cache->item,
11847                                         sizeof(cache->item));
11848                 if (ret) {
11849                         fprintf(stderr, "Error adding block group\n");
11850                         return ret;
11851                 }
11852                 btrfs_extent_post_op(trans, fs_info->extent_root);
11853         }
11854
11855         ret = reset_balance(trans, fs_info);
11856         if (ret)
11857                 fprintf(stderr, "error resetting the pending balance\n");
11858
11859         return ret;
11860 }
11861
11862 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11863 {
11864         struct btrfs_path path;
11865         struct btrfs_trans_handle *trans;
11866         struct btrfs_key key;
11867         int ret;
11868
11869         printf("Recowing metadata block %llu\n", eb->start);
11870         key.objectid = btrfs_header_owner(eb);
11871         key.type = BTRFS_ROOT_ITEM_KEY;
11872         key.offset = (u64)-1;
11873
11874         root = btrfs_read_fs_root(root->fs_info, &key);
11875         if (IS_ERR(root)) {
11876                 fprintf(stderr, "Couldn't find owner root %llu\n",
11877                         key.objectid);
11878                 return PTR_ERR(root);
11879         }
11880
11881         trans = btrfs_start_transaction(root, 1);
11882         if (IS_ERR(trans))
11883                 return PTR_ERR(trans);
11884
11885         btrfs_init_path(&path);
11886         path.lowest_level = btrfs_header_level(eb);
11887         if (path.lowest_level)
11888                 btrfs_node_key_to_cpu(eb, &key, 0);
11889         else
11890                 btrfs_item_key_to_cpu(eb, &key, 0);
11891
11892         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11893         btrfs_commit_transaction(trans, root);
11894         btrfs_release_path(&path);
11895         return ret;
11896 }
11897
11898 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11899 {
11900         struct btrfs_path path;
11901         struct btrfs_trans_handle *trans;
11902         struct btrfs_key key;
11903         int ret;
11904
11905         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11906                bad->key.type, bad->key.offset);
11907         key.objectid = bad->root_id;
11908         key.type = BTRFS_ROOT_ITEM_KEY;
11909         key.offset = (u64)-1;
11910
11911         root = btrfs_read_fs_root(root->fs_info, &key);
11912         if (IS_ERR(root)) {
11913                 fprintf(stderr, "Couldn't find owner root %llu\n",
11914                         key.objectid);
11915                 return PTR_ERR(root);
11916         }
11917
11918         trans = btrfs_start_transaction(root, 1);
11919         if (IS_ERR(trans))
11920                 return PTR_ERR(trans);
11921
11922         btrfs_init_path(&path);
11923         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11924         if (ret) {
11925                 if (ret > 0)
11926                         ret = 0;
11927                 goto out;
11928         }
11929         ret = btrfs_del_item(trans, root, &path);
11930 out:
11931         btrfs_commit_transaction(trans, root);
11932         btrfs_release_path(&path);
11933         return ret;
11934 }
11935
11936 static int zero_log_tree(struct btrfs_root *root)
11937 {
11938         struct btrfs_trans_handle *trans;
11939         int ret;
11940
11941         trans = btrfs_start_transaction(root, 1);
11942         if (IS_ERR(trans)) {
11943                 ret = PTR_ERR(trans);
11944                 return ret;
11945         }
11946         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11947         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11948         ret = btrfs_commit_transaction(trans, root);
11949         return ret;
11950 }
11951
11952 static int populate_csum(struct btrfs_trans_handle *trans,
11953                          struct btrfs_root *csum_root, char *buf, u64 start,
11954                          u64 len)
11955 {
11956         u64 offset = 0;
11957         u64 sectorsize;
11958         int ret = 0;
11959
11960         while (offset < len) {
11961                 sectorsize = csum_root->sectorsize;
11962                 ret = read_extent_data(csum_root, buf, start + offset,
11963                                        &sectorsize, 0);
11964                 if (ret)
11965                         break;
11966                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
11967                                             start + offset, buf, sectorsize);
11968                 if (ret)
11969                         break;
11970                 offset += sectorsize;
11971         }
11972         return ret;
11973 }
11974
11975 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
11976                                       struct btrfs_root *csum_root,
11977                                       struct btrfs_root *cur_root)
11978 {
11979         struct btrfs_path path;
11980         struct btrfs_key key;
11981         struct extent_buffer *node;
11982         struct btrfs_file_extent_item *fi;
11983         char *buf = NULL;
11984         u64 start = 0;
11985         u64 len = 0;
11986         int slot = 0;
11987         int ret = 0;
11988
11989         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
11990         if (!buf)
11991                 return -ENOMEM;
11992
11993         btrfs_init_path(&path);
11994         key.objectid = 0;
11995         key.offset = 0;
11996         key.type = 0;
11997         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
11998         if (ret < 0)
11999                 goto out;
12000         /* Iterate all regular file extents and fill its csum */
12001         while (1) {
12002                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12003
12004                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12005                         goto next;
12006                 node = path.nodes[0];
12007                 slot = path.slots[0];
12008                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12009                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12010                         goto next;
12011                 start = btrfs_file_extent_disk_bytenr(node, fi);
12012                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12013
12014                 ret = populate_csum(trans, csum_root, buf, start, len);
12015                 if (ret == -EEXIST)
12016                         ret = 0;
12017                 if (ret < 0)
12018                         goto out;
12019 next:
12020                 /*
12021                  * TODO: if next leaf is corrupted, jump to nearest next valid
12022                  * leaf.
12023                  */
12024                 ret = btrfs_next_item(cur_root, &path);
12025                 if (ret < 0)
12026                         goto out;
12027                 if (ret > 0) {
12028                         ret = 0;
12029                         goto out;
12030                 }
12031         }
12032
12033 out:
12034         btrfs_release_path(&path);
12035         free(buf);
12036         return ret;
12037 }
12038
12039 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12040                                   struct btrfs_root *csum_root)
12041 {
12042         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12043         struct btrfs_path path;
12044         struct btrfs_root *tree_root = fs_info->tree_root;
12045         struct btrfs_root *cur_root;
12046         struct extent_buffer *node;
12047         struct btrfs_key key;
12048         int slot = 0;
12049         int ret = 0;
12050
12051         btrfs_init_path(&path);
12052         key.objectid = BTRFS_FS_TREE_OBJECTID;
12053         key.offset = 0;
12054         key.type = BTRFS_ROOT_ITEM_KEY;
12055         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12056         if (ret < 0)
12057                 goto out;
12058         if (ret > 0) {
12059                 ret = -ENOENT;
12060                 goto out;
12061         }
12062
12063         while (1) {
12064                 node = path.nodes[0];
12065                 slot = path.slots[0];
12066                 btrfs_item_key_to_cpu(node, &key, slot);
12067                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12068                         goto out;
12069                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12070                         goto next;
12071                 if (!is_fstree(key.objectid))
12072                         goto next;
12073                 key.offset = (u64)-1;
12074
12075                 cur_root = btrfs_read_fs_root(fs_info, &key);
12076                 if (IS_ERR(cur_root) || !cur_root) {
12077                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12078                                 key.objectid);
12079                         goto out;
12080                 }
12081                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12082                                 cur_root);
12083                 if (ret < 0)
12084                         goto out;
12085 next:
12086                 ret = btrfs_next_item(tree_root, &path);
12087                 if (ret > 0) {
12088                         ret = 0;
12089                         goto out;
12090                 }
12091                 if (ret < 0)
12092                         goto out;
12093         }
12094
12095 out:
12096         btrfs_release_path(&path);
12097         return ret;
12098 }
12099
12100 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12101                                       struct btrfs_root *csum_root)
12102 {
12103         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12104         struct btrfs_path path;
12105         struct btrfs_extent_item *ei;
12106         struct extent_buffer *leaf;
12107         char *buf;
12108         struct btrfs_key key;
12109         int ret;
12110
12111         btrfs_init_path(&path);
12112         key.objectid = 0;
12113         key.type = BTRFS_EXTENT_ITEM_KEY;
12114         key.offset = 0;
12115         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12116         if (ret < 0) {
12117                 btrfs_release_path(&path);
12118                 return ret;
12119         }
12120
12121         buf = malloc(csum_root->sectorsize);
12122         if (!buf) {
12123                 btrfs_release_path(&path);
12124                 return -ENOMEM;
12125         }
12126
12127         while (1) {
12128                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12129                         ret = btrfs_next_leaf(extent_root, &path);
12130                         if (ret < 0)
12131                                 break;
12132                         if (ret) {
12133                                 ret = 0;
12134                                 break;
12135                         }
12136                 }
12137                 leaf = path.nodes[0];
12138
12139                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12140                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12141                         path.slots[0]++;
12142                         continue;
12143                 }
12144
12145                 ei = btrfs_item_ptr(leaf, path.slots[0],
12146                                     struct btrfs_extent_item);
12147                 if (!(btrfs_extent_flags(leaf, ei) &
12148                       BTRFS_EXTENT_FLAG_DATA)) {
12149                         path.slots[0]++;
12150                         continue;
12151                 }
12152
12153                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12154                                     key.offset);
12155                 if (ret)
12156                         break;
12157                 path.slots[0]++;
12158         }
12159
12160         btrfs_release_path(&path);
12161         free(buf);
12162         return ret;
12163 }
12164
12165 /*
12166  * Recalculate the csum and put it into the csum tree.
12167  *
12168  * Extent tree init will wipe out all the extent info, so in that case, we
12169  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12170  * will use fs/subvol trees to init the csum tree.
12171  */
12172 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12173                           struct btrfs_root *csum_root,
12174                           int search_fs_tree)
12175 {
12176         if (search_fs_tree)
12177                 return fill_csum_tree_from_fs(trans, csum_root);
12178         else
12179                 return fill_csum_tree_from_extent(trans, csum_root);
12180 }
12181
12182 static void free_roots_info_cache(void)
12183 {
12184         if (!roots_info_cache)
12185                 return;
12186
12187         while (!cache_tree_empty(roots_info_cache)) {
12188                 struct cache_extent *entry;
12189                 struct root_item_info *rii;
12190
12191                 entry = first_cache_extent(roots_info_cache);
12192                 if (!entry)
12193                         break;
12194                 remove_cache_extent(roots_info_cache, entry);
12195                 rii = container_of(entry, struct root_item_info, cache_extent);
12196                 free(rii);
12197         }
12198
12199         free(roots_info_cache);
12200         roots_info_cache = NULL;
12201 }
12202
12203 static int build_roots_info_cache(struct btrfs_fs_info *info)
12204 {
12205         int ret = 0;
12206         struct btrfs_key key;
12207         struct extent_buffer *leaf;
12208         struct btrfs_path path;
12209
12210         if (!roots_info_cache) {
12211                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12212                 if (!roots_info_cache)
12213                         return -ENOMEM;
12214                 cache_tree_init(roots_info_cache);
12215         }
12216
12217         btrfs_init_path(&path);
12218         key.objectid = 0;
12219         key.type = BTRFS_EXTENT_ITEM_KEY;
12220         key.offset = 0;
12221         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12222         if (ret < 0)
12223                 goto out;
12224         leaf = path.nodes[0];
12225
12226         while (1) {
12227                 struct btrfs_key found_key;
12228                 struct btrfs_extent_item *ei;
12229                 struct btrfs_extent_inline_ref *iref;
12230                 int slot = path.slots[0];
12231                 int type;
12232                 u64 flags;
12233                 u64 root_id;
12234                 u8 level;
12235                 struct cache_extent *entry;
12236                 struct root_item_info *rii;
12237
12238                 if (slot >= btrfs_header_nritems(leaf)) {
12239                         ret = btrfs_next_leaf(info->extent_root, &path);
12240                         if (ret < 0) {
12241                                 break;
12242                         } else if (ret) {
12243                                 ret = 0;
12244                                 break;
12245                         }
12246                         leaf = path.nodes[0];
12247                         slot = path.slots[0];
12248                 }
12249
12250                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12251
12252                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12253                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12254                         goto next;
12255
12256                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12257                 flags = btrfs_extent_flags(leaf, ei);
12258
12259                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12260                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12261                         goto next;
12262
12263                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12264                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12265                         level = found_key.offset;
12266                 } else {
12267                         struct btrfs_tree_block_info *binfo;
12268
12269                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12270                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12271                         level = btrfs_tree_block_level(leaf, binfo);
12272                 }
12273
12274                 /*
12275                  * For a root extent, it must be of the following type and the
12276                  * first (and only one) iref in the item.
12277                  */
12278                 type = btrfs_extent_inline_ref_type(leaf, iref);
12279                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12280                         goto next;
12281
12282                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12283                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12284                 if (!entry) {
12285                         rii = malloc(sizeof(struct root_item_info));
12286                         if (!rii) {
12287                                 ret = -ENOMEM;
12288                                 goto out;
12289                         }
12290                         rii->cache_extent.start = root_id;
12291                         rii->cache_extent.size = 1;
12292                         rii->level = (u8)-1;
12293                         entry = &rii->cache_extent;
12294                         ret = insert_cache_extent(roots_info_cache, entry);
12295                         ASSERT(ret == 0);
12296                 } else {
12297                         rii = container_of(entry, struct root_item_info,
12298                                            cache_extent);
12299                 }
12300
12301                 ASSERT(rii->cache_extent.start == root_id);
12302                 ASSERT(rii->cache_extent.size == 1);
12303
12304                 if (level > rii->level || rii->level == (u8)-1) {
12305                         rii->level = level;
12306                         rii->bytenr = found_key.objectid;
12307                         rii->gen = btrfs_extent_generation(leaf, ei);
12308                         rii->node_count = 1;
12309                 } else if (level == rii->level) {
12310                         rii->node_count++;
12311                 }
12312 next:
12313                 path.slots[0]++;
12314         }
12315
12316 out:
12317         btrfs_release_path(&path);
12318
12319         return ret;
12320 }
12321
12322 static int maybe_repair_root_item(struct btrfs_fs_info *info,
12323                                   struct btrfs_path *path,
12324                                   const struct btrfs_key *root_key,
12325                                   const int read_only_mode)
12326 {
12327         const u64 root_id = root_key->objectid;
12328         struct cache_extent *entry;
12329         struct root_item_info *rii;
12330         struct btrfs_root_item ri;
12331         unsigned long offset;
12332
12333         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12334         if (!entry) {
12335                 fprintf(stderr,
12336                         "Error: could not find extent items for root %llu\n",
12337                         root_key->objectid);
12338                 return -ENOENT;
12339         }
12340
12341         rii = container_of(entry, struct root_item_info, cache_extent);
12342         ASSERT(rii->cache_extent.start == root_id);
12343         ASSERT(rii->cache_extent.size == 1);
12344
12345         if (rii->node_count != 1) {
12346                 fprintf(stderr,
12347                         "Error: could not find btree root extent for root %llu\n",
12348                         root_id);
12349                 return -ENOENT;
12350         }
12351
12352         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12353         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12354
12355         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12356             btrfs_root_level(&ri) != rii->level ||
12357             btrfs_root_generation(&ri) != rii->gen) {
12358
12359                 /*
12360                  * If we're in repair mode but our caller told us to not update
12361                  * the root item, i.e. just check if it needs to be updated, don't
12362                  * print this message, since the caller will call us again shortly
12363                  * for the same root item without read only mode (the caller will
12364                  * open a transaction first).
12365                  */
12366                 if (!(read_only_mode && repair))
12367                         fprintf(stderr,
12368                                 "%sroot item for root %llu,"
12369                                 " current bytenr %llu, current gen %llu, current level %u,"
12370                                 " new bytenr %llu, new gen %llu, new level %u\n",
12371                                 (read_only_mode ? "" : "fixing "),
12372                                 root_id,
12373                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12374                                 btrfs_root_level(&ri),
12375                                 rii->bytenr, rii->gen, rii->level);
12376
12377                 if (btrfs_root_generation(&ri) > rii->gen) {
12378                         fprintf(stderr,
12379                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12380                                 root_id, btrfs_root_generation(&ri), rii->gen);
12381                         return -EINVAL;
12382                 }
12383
12384                 if (!read_only_mode) {
12385                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12386                         btrfs_set_root_level(&ri, rii->level);
12387                         btrfs_set_root_generation(&ri, rii->gen);
12388                         write_extent_buffer(path->nodes[0], &ri,
12389                                             offset, sizeof(ri));
12390                 }
12391
12392                 return 1;
12393         }
12394
12395         return 0;
12396 }
12397
12398 /*
12399  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12400  * caused read-only snapshots to be corrupted if they were created at a moment
12401  * when the source subvolume/snapshot had orphan items. The issue was that the
12402  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12403  * node instead of the post orphan cleanup root node.
12404  * So this function, and its callees, just detects and fixes those cases. Even
12405  * though the regression was for read-only snapshots, this function applies to
12406  * any snapshot/subvolume root.
12407  * This must be run before any other repair code - not doing it so, makes other
12408  * repair code delete or modify backrefs in the extent tree for example, which
12409  * will result in an inconsistent fs after repairing the root items.
12410  */
12411 static int repair_root_items(struct btrfs_fs_info *info)
12412 {
12413         struct btrfs_path path;
12414         struct btrfs_key key;
12415         struct extent_buffer *leaf;
12416         struct btrfs_trans_handle *trans = NULL;
12417         int ret = 0;
12418         int bad_roots = 0;
12419         int need_trans = 0;
12420
12421         btrfs_init_path(&path);
12422
12423         ret = build_roots_info_cache(info);
12424         if (ret)
12425                 goto out;
12426
12427         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12428         key.type = BTRFS_ROOT_ITEM_KEY;
12429         key.offset = 0;
12430
12431 again:
12432         /*
12433          * Avoid opening and committing transactions if a leaf doesn't have
12434          * any root items that need to be fixed, so that we avoid rotating
12435          * backup roots unnecessarily.
12436          */
12437         if (need_trans) {
12438                 trans = btrfs_start_transaction(info->tree_root, 1);
12439                 if (IS_ERR(trans)) {
12440                         ret = PTR_ERR(trans);
12441                         goto out;
12442                 }
12443         }
12444
12445         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12446                                 0, trans ? 1 : 0);
12447         if (ret < 0)
12448                 goto out;
12449         leaf = path.nodes[0];
12450
12451         while (1) {
12452                 struct btrfs_key found_key;
12453
12454                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12455                         int no_more_keys = find_next_key(&path, &key);
12456
12457                         btrfs_release_path(&path);
12458                         if (trans) {
12459                                 ret = btrfs_commit_transaction(trans,
12460                                                                info->tree_root);
12461                                 trans = NULL;
12462                                 if (ret < 0)
12463                                         goto out;
12464                         }
12465                         need_trans = 0;
12466                         if (no_more_keys)
12467                                 break;
12468                         goto again;
12469                 }
12470
12471                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12472
12473                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12474                         goto next;
12475                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12476                         goto next;
12477
12478                 ret = maybe_repair_root_item(info, &path, &found_key,
12479                                              trans ? 0 : 1);
12480                 if (ret < 0)
12481                         goto out;
12482                 if (ret) {
12483                         if (!trans && repair) {
12484                                 need_trans = 1;
12485                                 key = found_key;
12486                                 btrfs_release_path(&path);
12487                                 goto again;
12488                         }
12489                         bad_roots++;
12490                 }
12491 next:
12492                 path.slots[0]++;
12493         }
12494         ret = 0;
12495 out:
12496         free_roots_info_cache();
12497         btrfs_release_path(&path);
12498         if (trans)
12499                 btrfs_commit_transaction(trans, info->tree_root);
12500         if (ret < 0)
12501                 return ret;
12502
12503         return bad_roots;
12504 }
12505
12506 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12507 {
12508         struct btrfs_trans_handle *trans;
12509         struct btrfs_block_group_cache *bg_cache;
12510         u64 current = 0;
12511         int ret = 0;
12512
12513         /* Clear all free space cache inodes and its extent data */
12514         while (1) {
12515                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12516                 if (!bg_cache)
12517                         break;
12518                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12519                 if (ret < 0)
12520                         return ret;
12521                 current = bg_cache->key.objectid + bg_cache->key.offset;
12522         }
12523
12524         /* Don't forget to set cache_generation to -1 */
12525         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12526         if (IS_ERR(trans)) {
12527                 error("failed to update super block cache generation");
12528                 return PTR_ERR(trans);
12529         }
12530         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12531         btrfs_commit_transaction(trans, fs_info->tree_root);
12532
12533         return ret;
12534 }
12535
12536 const char * const cmd_check_usage[] = {
12537         "btrfs check [options] <device>",
12538         "Check structural integrity of a filesystem (unmounted).",
12539         "Check structural integrity of an unmounted filesystem. Verify internal",
12540         "trees' consistency and item connectivity. In the repair mode try to",
12541         "fix the problems found. ",
12542         "WARNING: the repair mode is considered dangerous",
12543         "",
12544         "-s|--super <superblock>     use this superblock copy",
12545         "-b|--backup                 use the first valid backup root copy",
12546         "--repair                    try to repair the filesystem",
12547         "--readonly                  run in read-only mode (default)",
12548         "--init-csum-tree            create a new CRC tree",
12549         "--init-extent-tree          create a new extent tree",
12550         "--mode <MODE>               allows choice of memory/IO trade-offs",
12551         "                            where MODE is one of:",
12552         "                            original - read inodes and extents to memory (requires",
12553         "                                       more memory, does less IO)",
12554         "                            lowmem   - try to use less memory but read blocks again",
12555         "                                       when needed",
12556         "--check-data-csum           verify checksums of data blocks",
12557         "-Q|--qgroup-report          print a report on qgroup consistency",
12558         "-E|--subvol-extents <subvolid>",
12559         "                            print subvolume extents and sharing state",
12560         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12561         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12562         "-p|--progress               indicate progress",
12563         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12564         NULL
12565 };
12566
12567 int cmd_check(int argc, char **argv)
12568 {
12569         struct cache_tree root_cache;
12570         struct btrfs_root *root;
12571         struct btrfs_fs_info *info;
12572         u64 bytenr = 0;
12573         u64 subvolid = 0;
12574         u64 tree_root_bytenr = 0;
12575         u64 chunk_root_bytenr = 0;
12576         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12577         int ret;
12578         int err = 0;
12579         u64 num;
12580         int init_csum_tree = 0;
12581         int readonly = 0;
12582         int clear_space_cache = 0;
12583         int qgroup_report = 0;
12584         int qgroups_repaired = 0;
12585         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12586
12587         while(1) {
12588                 int c;
12589                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12590                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12591                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12592                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12593                 static const struct option long_options[] = {
12594                         { "super", required_argument, NULL, 's' },
12595                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12596                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12597                         { "init-csum-tree", no_argument, NULL,
12598                                 GETOPT_VAL_INIT_CSUM },
12599                         { "init-extent-tree", no_argument, NULL,
12600                                 GETOPT_VAL_INIT_EXTENT },
12601                         { "check-data-csum", no_argument, NULL,
12602                                 GETOPT_VAL_CHECK_CSUM },
12603                         { "backup", no_argument, NULL, 'b' },
12604                         { "subvol-extents", required_argument, NULL, 'E' },
12605                         { "qgroup-report", no_argument, NULL, 'Q' },
12606                         { "tree-root", required_argument, NULL, 'r' },
12607                         { "chunk-root", required_argument, NULL,
12608                                 GETOPT_VAL_CHUNK_TREE },
12609                         { "progress", no_argument, NULL, 'p' },
12610                         { "mode", required_argument, NULL,
12611                                 GETOPT_VAL_MODE },
12612                         { "clear-space-cache", required_argument, NULL,
12613                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12614                         { NULL, 0, NULL, 0}
12615                 };
12616
12617                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12618                 if (c < 0)
12619                         break;
12620                 switch(c) {
12621                         case 'a': /* ignored */ break;
12622                         case 'b':
12623                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12624                                 break;
12625                         case 's':
12626                                 num = arg_strtou64(optarg);
12627                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12628                                         error(
12629                                         "super mirror should be less than %d",
12630                                                 BTRFS_SUPER_MIRROR_MAX);
12631                                         exit(1);
12632                                 }
12633                                 bytenr = btrfs_sb_offset(((int)num));
12634                                 printf("using SB copy %llu, bytenr %llu\n", num,
12635                                        (unsigned long long)bytenr);
12636                                 break;
12637                         case 'Q':
12638                                 qgroup_report = 1;
12639                                 break;
12640                         case 'E':
12641                                 subvolid = arg_strtou64(optarg);
12642                                 break;
12643                         case 'r':
12644                                 tree_root_bytenr = arg_strtou64(optarg);
12645                                 break;
12646                         case GETOPT_VAL_CHUNK_TREE:
12647                                 chunk_root_bytenr = arg_strtou64(optarg);
12648                                 break;
12649                         case 'p':
12650                                 ctx.progress_enabled = true;
12651                                 break;
12652                         case '?':
12653                         case 'h':
12654                                 usage(cmd_check_usage);
12655                         case GETOPT_VAL_REPAIR:
12656                                 printf("enabling repair mode\n");
12657                                 repair = 1;
12658                                 ctree_flags |= OPEN_CTREE_WRITES;
12659                                 break;
12660                         case GETOPT_VAL_READONLY:
12661                                 readonly = 1;
12662                                 break;
12663                         case GETOPT_VAL_INIT_CSUM:
12664                                 printf("Creating a new CRC tree\n");
12665                                 init_csum_tree = 1;
12666                                 repair = 1;
12667                                 ctree_flags |= OPEN_CTREE_WRITES;
12668                                 break;
12669                         case GETOPT_VAL_INIT_EXTENT:
12670                                 init_extent_tree = 1;
12671                                 ctree_flags |= (OPEN_CTREE_WRITES |
12672                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12673                                 repair = 1;
12674                                 break;
12675                         case GETOPT_VAL_CHECK_CSUM:
12676                                 check_data_csum = 1;
12677                                 break;
12678                         case GETOPT_VAL_MODE:
12679                                 check_mode = parse_check_mode(optarg);
12680                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12681                                         error("unknown mode: %s", optarg);
12682                                         exit(1);
12683                                 }
12684                                 break;
12685                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12686                                 if (strcmp(optarg, "v1") == 0) {
12687                                         clear_space_cache = 1;
12688                                 } else if (strcmp(optarg, "v2") == 0) {
12689                                         clear_space_cache = 2;
12690                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12691                                 } else {
12692                                         error(
12693                 "invalid argument to --clear-space-cache, must be v1 or v2");
12694                                         exit(1);
12695                                 }
12696                                 ctree_flags |= OPEN_CTREE_WRITES;
12697                                 break;
12698                 }
12699         }
12700
12701         if (check_argc_exact(argc - optind, 1))
12702                 usage(cmd_check_usage);
12703
12704         if (ctx.progress_enabled) {
12705                 ctx.tp = TASK_NOTHING;
12706                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12707         }
12708
12709         /* This check is the only reason for --readonly to exist */
12710         if (readonly && repair) {
12711                 error("repair options are not compatible with --readonly");
12712                 exit(1);
12713         }
12714
12715         /*
12716          * Not supported yet
12717          */
12718         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12719                 error("low memory mode doesn't support repair yet");
12720                 exit(1);
12721         }
12722
12723         radix_tree_init();
12724         cache_tree_init(&root_cache);
12725
12726         if((ret = check_mounted(argv[optind])) < 0) {
12727                 error("could not check mount status: %s", strerror(-ret));
12728                 err |= !!ret;
12729                 goto err_out;
12730         } else if(ret) {
12731                 error("%s is currently mounted, aborting", argv[optind]);
12732                 ret = -EBUSY;
12733                 err |= !!ret;
12734                 goto err_out;
12735         }
12736
12737         /* only allow partial opening under repair mode */
12738         if (repair)
12739                 ctree_flags |= OPEN_CTREE_PARTIAL;
12740
12741         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12742                                   chunk_root_bytenr, ctree_flags);
12743         if (!info) {
12744                 error("cannot open file system");
12745                 ret = -EIO;
12746                 err |= !!ret;
12747                 goto err_out;
12748         }
12749
12750         global_info = info;
12751         root = info->fs_root;
12752         if (clear_space_cache == 1) {
12753                 if (btrfs_fs_compat_ro(info,
12754                                 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
12755                         error(
12756                 "free space cache v2 detected, use --clear-space-cache v2");
12757                         ret = 1;
12758                         goto close_out;
12759                 }
12760                 printf("Clearing free space cache\n");
12761                 ret = clear_free_space_cache(info);
12762                 if (ret) {
12763                         error("failed to clear free space cache");
12764                         ret = 1;
12765                 } else {
12766                         printf("Free space cache cleared\n");
12767                 }
12768                 goto close_out;
12769         } else if (clear_space_cache == 2) {
12770                 if (!btrfs_fs_compat_ro(info,
12771                                         BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
12772                         printf("no free space cache v2 to clear\n");
12773                         ret = 0;
12774                         goto close_out;
12775                 }
12776                 printf("Clear free space cache v2\n");
12777                 ret = btrfs_clear_free_space_tree(info);
12778                 if (ret) {
12779                         error("failed to clear free space cache v2: %d", ret);
12780                         ret = 1;
12781                 } else {
12782                         printf("free space cache v2 cleared\n");
12783                 }
12784                 goto close_out;
12785         }
12786
12787         /*
12788          * repair mode will force us to commit transaction which
12789          * will make us fail to load log tree when mounting.
12790          */
12791         if (repair && btrfs_super_log_root(info->super_copy)) {
12792                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12793                 if (!ret) {
12794                         ret = 1;
12795                         err |= !!ret;
12796                         goto close_out;
12797                 }
12798                 ret = zero_log_tree(root);
12799                 err |= !!ret;
12800                 if (ret) {
12801                         error("failed to zero log tree: %d", ret);
12802                         goto close_out;
12803                 }
12804         }
12805
12806         uuid_unparse(info->super_copy->fsid, uuidbuf);
12807         if (qgroup_report) {
12808                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12809                        uuidbuf);
12810                 ret = qgroup_verify_all(info);
12811                 err |= !!ret;
12812                 if (ret == 0)
12813                         report_qgroups(1);
12814                 goto close_out;
12815         }
12816         if (subvolid) {
12817                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12818                        subvolid, argv[optind], uuidbuf);
12819                 ret = print_extent_state(info, subvolid);
12820                 err |= !!ret;
12821                 goto close_out;
12822         }
12823         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12824
12825         if (!extent_buffer_uptodate(info->tree_root->node) ||
12826             !extent_buffer_uptodate(info->dev_root->node) ||
12827             !extent_buffer_uptodate(info->chunk_root->node)) {
12828                 error("critical roots corrupted, unable to check the filesystem");
12829                 err |= !!ret;
12830                 ret = -EIO;
12831                 goto close_out;
12832         }
12833
12834         if (init_extent_tree || init_csum_tree) {
12835                 struct btrfs_trans_handle *trans;
12836
12837                 trans = btrfs_start_transaction(info->extent_root, 0);
12838                 if (IS_ERR(trans)) {
12839                         error("error starting transaction");
12840                         ret = PTR_ERR(trans);
12841                         err |= !!ret;
12842                         goto close_out;
12843                 }
12844
12845                 if (init_extent_tree) {
12846                         printf("Creating a new extent tree\n");
12847                         ret = reinit_extent_tree(trans, info);
12848                         err |= !!ret;
12849                         if (ret)
12850                                 goto close_out;
12851                 }
12852
12853                 if (init_csum_tree) {
12854                         printf("Reinitialize checksum tree\n");
12855                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12856                         if (ret) {
12857                                 error("checksum tree initialization failed: %d",
12858                                                 ret);
12859                                 ret = -EIO;
12860                                 err |= !!ret;
12861                                 goto close_out;
12862                         }
12863
12864                         ret = fill_csum_tree(trans, info->csum_root,
12865                                              init_extent_tree);
12866                         err |= !!ret;
12867                         if (ret) {
12868                                 error("checksum tree refilling failed: %d", ret);
12869                                 return -EIO;
12870                         }
12871                 }
12872                 /*
12873                  * Ok now we commit and run the normal fsck, which will add
12874                  * extent entries for all of the items it finds.
12875                  */
12876                 ret = btrfs_commit_transaction(trans, info->extent_root);
12877                 err |= !!ret;
12878                 if (ret)
12879                         goto close_out;
12880         }
12881         if (!extent_buffer_uptodate(info->extent_root->node)) {
12882                 error("critical: extent_root, unable to check the filesystem");
12883                 ret = -EIO;
12884                 err |= !!ret;
12885                 goto close_out;
12886         }
12887         if (!extent_buffer_uptodate(info->csum_root->node)) {
12888                 error("critical: csum_root, unable to check the filesystem");
12889                 ret = -EIO;
12890                 err |= !!ret;
12891                 goto close_out;
12892         }
12893
12894         if (!ctx.progress_enabled)
12895                 fprintf(stderr, "checking extents\n");
12896         if (check_mode == CHECK_MODE_LOWMEM)
12897                 ret = check_chunks_and_extents_v2(root);
12898         else
12899                 ret = check_chunks_and_extents(root);
12900         err |= !!ret;
12901         if (ret)
12902                 error(
12903                 "errors found in extent allocation tree or chunk allocation");
12904
12905         ret = repair_root_items(info);
12906         err |= !!ret;
12907         if (ret < 0)
12908                 goto close_out;
12909         if (repair) {
12910                 fprintf(stderr, "Fixed %d roots.\n", ret);
12911                 ret = 0;
12912         } else if (ret > 0) {
12913                 fprintf(stderr,
12914                        "Found %d roots with an outdated root item.\n",
12915                        ret);
12916                 fprintf(stderr,
12917                         "Please run a filesystem check with the option --repair to fix them.\n");
12918                 ret = 1;
12919                 err |= !!ret;
12920                 goto close_out;
12921         }
12922
12923         if (!ctx.progress_enabled) {
12924                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
12925                         fprintf(stderr, "checking free space tree\n");
12926                 else
12927                         fprintf(stderr, "checking free space cache\n");
12928         }
12929         ret = check_space_cache(root);
12930         err |= !!ret;
12931         if (ret)
12932                 goto out;
12933
12934         /*
12935          * We used to have to have these hole extents in between our real
12936          * extents so if we don't have this flag set we need to make sure there
12937          * are no gaps in the file extents for inodes, otherwise we can just
12938          * ignore it when this happens.
12939          */
12940         no_holes = btrfs_fs_incompat(root->fs_info,
12941                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
12942         if (!ctx.progress_enabled)
12943                 fprintf(stderr, "checking fs roots\n");
12944         if (check_mode == CHECK_MODE_LOWMEM)
12945                 ret = check_fs_roots_v2(root->fs_info);
12946         else
12947                 ret = check_fs_roots(root, &root_cache);
12948         err |= !!ret;
12949         if (ret)
12950                 goto out;
12951
12952         fprintf(stderr, "checking csums\n");
12953         ret = check_csums(root);
12954         err |= !!ret;
12955         if (ret)
12956                 goto out;
12957
12958         fprintf(stderr, "checking root refs\n");
12959         /* For low memory mode, check_fs_roots_v2 handles root refs */
12960         if (check_mode != CHECK_MODE_LOWMEM) {
12961                 ret = check_root_refs(root, &root_cache);
12962                 err |= !!ret;
12963                 if (ret)
12964                         goto out;
12965         }
12966
12967         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
12968                 struct extent_buffer *eb;
12969
12970                 eb = list_first_entry(&root->fs_info->recow_ebs,
12971                                       struct extent_buffer, recow);
12972                 list_del_init(&eb->recow);
12973                 ret = recow_extent_buffer(root, eb);
12974                 err |= !!ret;
12975                 if (ret)
12976                         break;
12977         }
12978
12979         while (!list_empty(&delete_items)) {
12980                 struct bad_item *bad;
12981
12982                 bad = list_first_entry(&delete_items, struct bad_item, list);
12983                 list_del_init(&bad->list);
12984                 if (repair) {
12985                         ret = delete_bad_item(root, bad);
12986                         err |= !!ret;
12987                 }
12988                 free(bad);
12989         }
12990
12991         if (info->quota_enabled) {
12992                 fprintf(stderr, "checking quota groups\n");
12993                 ret = qgroup_verify_all(info);
12994                 err |= !!ret;
12995                 if (ret)
12996                         goto out;
12997                 report_qgroups(0);
12998                 ret = repair_qgroups(info, &qgroups_repaired);
12999                 err |= !!ret;
13000                 if (err)
13001                         goto out;
13002                 ret = 0;
13003         }
13004
13005         if (!list_empty(&root->fs_info->recow_ebs)) {
13006                 error("transid errors in file system");
13007                 ret = 1;
13008                 err |= !!ret;
13009         }
13010 out:
13011         if (found_old_backref) { /*
13012                  * there was a disk format change when mixed
13013                  * backref was in testing tree. The old format
13014                  * existed about one week.
13015                  */
13016                 printf("\n * Found old mixed backref format. "
13017                        "The old format is not supported! *"
13018                        "\n * Please mount the FS in readonly mode, "
13019                        "backup data and re-format the FS. *\n\n");
13020                 err |= 1;
13021         }
13022         printf("found %llu bytes used err is %d\n",
13023                (unsigned long long)bytes_used, ret);
13024         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13025         printf("total tree bytes: %llu\n",
13026                (unsigned long long)total_btree_bytes);
13027         printf("total fs tree bytes: %llu\n",
13028                (unsigned long long)total_fs_tree_bytes);
13029         printf("total extent tree bytes: %llu\n",
13030                (unsigned long long)total_extent_tree_bytes);
13031         printf("btree space waste bytes: %llu\n",
13032                (unsigned long long)btree_space_waste);
13033         printf("file data blocks allocated: %llu\n referenced %llu\n",
13034                 (unsigned long long)data_bytes_allocated,
13035                 (unsigned long long)data_bytes_referenced);
13036
13037         free_qgroup_counts();
13038         free_root_recs_tree(&root_cache);
13039 close_out:
13040         close_ctree(root);
13041 err_out:
13042         if (ctx.progress_enabled)
13043                 task_deinit(ctx.info);
13044
13045         return err;
13046 }