btrfs-progs: check: use on-stack path buffer in check_fs_first_inode
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44 #include "hash.h"
45
46 enum task_position {
47         TASK_EXTENTS,
48         TASK_FREE_SPACE,
49         TASK_FS_ROOTS,
50         TASK_NOTHING, /* have to be the last element */
51 };
52
53 struct task_ctx {
54         int progress_enabled;
55         enum task_position tp;
56
57         struct task_info *info;
58 };
59
60 static u64 bytes_used = 0;
61 static u64 total_csum_bytes = 0;
62 static u64 total_btree_bytes = 0;
63 static u64 total_fs_tree_bytes = 0;
64 static u64 total_extent_tree_bytes = 0;
65 static u64 btree_space_waste = 0;
66 static u64 data_bytes_allocated = 0;
67 static u64 data_bytes_referenced = 0;
68 static int found_old_backref = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct list_head list;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
97 {
98         return list_entry(entry, struct extent_backref, list);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133
134 static inline struct data_backref* to_data_backref(struct extent_backref *back)
135 {
136         return container_of(back, struct data_backref, node);
137 }
138
139 /*
140  * Much like data_backref, just removed the undetermined members
141  * and change it to use list_head.
142  * During extent scan, it is stored in root->orphan_data_extent.
143  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
144  */
145 struct orphan_data_extent {
146         struct list_head list;
147         u64 root;
148         u64 objectid;
149         u64 offset;
150         u64 disk_bytenr;
151         u64 disk_len;
152 };
153
154 struct tree_backref {
155         struct extent_backref node;
156         union {
157                 u64 parent;
158                 u64 root;
159         };
160 };
161
162 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
163 {
164         return container_of(back, struct tree_backref, node);
165 }
166
167 /* Explicit initialization for extent_record::flag_block_full_backref */
168 enum { FLAG_UNSET = 2 };
169
170 struct extent_record {
171         struct list_head backrefs;
172         struct list_head dups;
173         struct list_head list;
174         struct cache_extent cache;
175         struct btrfs_disk_key parent_key;
176         u64 start;
177         u64 max_size;
178         u64 nr;
179         u64 refs;
180         u64 extent_item_refs;
181         u64 generation;
182         u64 parent_generation;
183         u64 info_objectid;
184         u32 num_duplicates;
185         u8 info_level;
186         unsigned int flag_block_full_backref:2;
187         unsigned int found_rec:1;
188         unsigned int content_checked:1;
189         unsigned int owner_ref_checked:1;
190         unsigned int is_root:1;
191         unsigned int metadata:1;
192         unsigned int bad_full_backref:1;
193         unsigned int crossing_stripes:1;
194         unsigned int wrong_chunk_type:1;
195 };
196
197 static inline struct extent_record* to_extent_record(struct list_head *entry)
198 {
199         return container_of(entry, struct extent_record, list);
200 }
201
202 struct inode_backref {
203         struct list_head list;
204         unsigned int found_dir_item:1;
205         unsigned int found_dir_index:1;
206         unsigned int found_inode_ref:1;
207         u8 filetype;
208         u8 ref_type;
209         int errors;
210         u64 dir;
211         u64 index;
212         u16 namelen;
213         char name[0];
214 };
215
216 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
217 {
218         return list_entry(entry, struct inode_backref, list);
219 }
220
221 struct root_item_record {
222         struct list_head list;
223         u64 objectid;
224         u64 bytenr;
225         u64 last_snapshot;
226         u8 level;
227         u8 drop_level;
228         int level_size;
229         struct btrfs_key drop_key;
230 };
231
232 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
233 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
234 #define REF_ERR_NO_INODE_REF            (1 << 2)
235 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
236 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
237 #define REF_ERR_DUP_INODE_REF           (1 << 5)
238 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
239 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
240 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
241 #define REF_ERR_NO_ROOT_REF             (1 << 9)
242 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
243 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
244 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
245
246 struct file_extent_hole {
247         struct rb_node node;
248         u64 start;
249         u64 len;
250 };
251
252 struct inode_record {
253         struct list_head backrefs;
254         unsigned int checked:1;
255         unsigned int merging:1;
256         unsigned int found_inode_item:1;
257         unsigned int found_dir_item:1;
258         unsigned int found_file_extent:1;
259         unsigned int found_csum_item:1;
260         unsigned int some_csum_missing:1;
261         unsigned int nodatasum:1;
262         int errors;
263
264         u64 ino;
265         u32 nlink;
266         u32 imode;
267         u64 isize;
268         u64 nbytes;
269
270         u32 found_link;
271         u64 found_size;
272         u64 extent_start;
273         u64 extent_end;
274         struct rb_root holes;
275         struct list_head orphan_extents;
276
277         u32 refs;
278 };
279
280 #define I_ERR_NO_INODE_ITEM             (1 << 0)
281 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
282 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
283 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
284 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
285 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
286 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
287 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
288 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
289 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
290 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
291 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
292 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
293 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
294 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
295
296 struct root_backref {
297         struct list_head list;
298         unsigned int found_dir_item:1;
299         unsigned int found_dir_index:1;
300         unsigned int found_back_ref:1;
301         unsigned int found_forward_ref:1;
302         unsigned int reachable:1;
303         int errors;
304         u64 ref_root;
305         u64 dir;
306         u64 index;
307         u16 namelen;
308         char name[0];
309 };
310
311 static inline struct root_backref* to_root_backref(struct list_head *entry)
312 {
313         return list_entry(entry, struct root_backref, list);
314 }
315
316 struct root_record {
317         struct list_head backrefs;
318         struct cache_extent cache;
319         unsigned int found_root_item:1;
320         u64 objectid;
321         u32 found_ref;
322 };
323
324 struct ptr_node {
325         struct cache_extent cache;
326         void *data;
327 };
328
329 struct shared_node {
330         struct cache_extent cache;
331         struct cache_tree root_cache;
332         struct cache_tree inode_cache;
333         struct inode_record *current;
334         u32 refs;
335 };
336
337 struct block_info {
338         u64 start;
339         u32 size;
340 };
341
342 struct walk_control {
343         struct cache_tree shared;
344         struct shared_node *nodes[BTRFS_MAX_LEVEL];
345         int active_node;
346         int root_level;
347 };
348
349 struct bad_item {
350         struct btrfs_key key;
351         u64 root_id;
352         struct list_head list;
353 };
354
355 struct extent_entry {
356         u64 bytenr;
357         u64 bytes;
358         int count;
359         int broken;
360         struct list_head list;
361 };
362
363 struct root_item_info {
364         /* level of the root */
365         u8 level;
366         /* number of nodes at this level, must be 1 for a root */
367         int node_count;
368         u64 bytenr;
369         u64 gen;
370         struct cache_extent cache_extent;
371 };
372
373 /*
374  * Error bit for low memory mode check.
375  *
376  * Currently no caller cares about it yet.  Just internal use for error
377  * classification.
378  */
379 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
380 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
381 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
382 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
383 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
384 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
385 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
386 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
387 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
388 #define CHUNK_TYPE_MISMATCH     (1 << 8)
389
390 static void *print_status_check(void *p)
391 {
392         struct task_ctx *priv = p;
393         const char work_indicator[] = { '.', 'o', 'O', 'o' };
394         uint32_t count = 0;
395         static char *task_position_string[] = {
396                 "checking extents",
397                 "checking free space cache",
398                 "checking fs roots",
399         };
400
401         task_period_start(priv->info, 1000 /* 1s */);
402
403         if (priv->tp == TASK_NOTHING)
404                 return NULL;
405
406         while (1) {
407                 printf("%s [%c]\r", task_position_string[priv->tp],
408                                 work_indicator[count % 4]);
409                 count++;
410                 fflush(stdout);
411                 task_period_wait(priv->info);
412         }
413         return NULL;
414 }
415
416 static int print_status_return(void *p)
417 {
418         printf("\n");
419         fflush(stdout);
420
421         return 0;
422 }
423
424 static enum btrfs_check_mode parse_check_mode(const char *str)
425 {
426         if (strcmp(str, "lowmem") == 0)
427                 return CHECK_MODE_LOWMEM;
428         if (strcmp(str, "orig") == 0)
429                 return CHECK_MODE_ORIGINAL;
430         if (strcmp(str, "original") == 0)
431                 return CHECK_MODE_ORIGINAL;
432
433         return CHECK_MODE_UNKNOWN;
434 }
435
436 /* Compatible function to allow reuse of old codes */
437 static u64 first_extent_gap(struct rb_root *holes)
438 {
439         struct file_extent_hole *hole;
440
441         if (RB_EMPTY_ROOT(holes))
442                 return (u64)-1;
443
444         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
445         return hole->start;
446 }
447
448 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
449 {
450         struct file_extent_hole *hole1;
451         struct file_extent_hole *hole2;
452
453         hole1 = rb_entry(node1, struct file_extent_hole, node);
454         hole2 = rb_entry(node2, struct file_extent_hole, node);
455
456         if (hole1->start > hole2->start)
457                 return -1;
458         if (hole1->start < hole2->start)
459                 return 1;
460         /* Now hole1->start == hole2->start */
461         if (hole1->len >= hole2->len)
462                 /*
463                  * Hole 1 will be merge center
464                  * Same hole will be merged later
465                  */
466                 return -1;
467         /* Hole 2 will be merge center */
468         return 1;
469 }
470
471 /*
472  * Add a hole to the record
473  *
474  * This will do hole merge for copy_file_extent_holes(),
475  * which will ensure there won't be continuous holes.
476  */
477 static int add_file_extent_hole(struct rb_root *holes,
478                                 u64 start, u64 len)
479 {
480         struct file_extent_hole *hole;
481         struct file_extent_hole *prev = NULL;
482         struct file_extent_hole *next = NULL;
483
484         hole = malloc(sizeof(*hole));
485         if (!hole)
486                 return -ENOMEM;
487         hole->start = start;
488         hole->len = len;
489         /* Since compare will not return 0, no -EEXIST will happen */
490         rb_insert(holes, &hole->node, compare_hole);
491
492         /* simple merge with previous hole */
493         if (rb_prev(&hole->node))
494                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
495                                 node);
496         if (prev && prev->start + prev->len >= hole->start) {
497                 hole->len = hole->start + hole->len - prev->start;
498                 hole->start = prev->start;
499                 rb_erase(&prev->node, holes);
500                 free(prev);
501                 prev = NULL;
502         }
503
504         /* iterate merge with next holes */
505         while (1) {
506                 if (!rb_next(&hole->node))
507                         break;
508                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
509                                         node);
510                 if (hole->start + hole->len >= next->start) {
511                         if (hole->start + hole->len <= next->start + next->len)
512                                 hole->len = next->start + next->len -
513                                             hole->start;
514                         rb_erase(&next->node, holes);
515                         free(next);
516                         next = NULL;
517                 } else
518                         break;
519         }
520         return 0;
521 }
522
523 static int compare_hole_range(struct rb_node *node, void *data)
524 {
525         struct file_extent_hole *hole;
526         u64 start;
527
528         hole = (struct file_extent_hole *)data;
529         start = hole->start;
530
531         hole = rb_entry(node, struct file_extent_hole, node);
532         if (start < hole->start)
533                 return -1;
534         if (start >= hole->start && start < hole->start + hole->len)
535                 return 0;
536         return 1;
537 }
538
539 /*
540  * Delete a hole in the record
541  *
542  * This will do the hole split and is much restrict than add.
543  */
544 static int del_file_extent_hole(struct rb_root *holes,
545                                 u64 start, u64 len)
546 {
547         struct file_extent_hole *hole;
548         struct file_extent_hole tmp;
549         u64 prev_start = 0;
550         u64 prev_len = 0;
551         u64 next_start = 0;
552         u64 next_len = 0;
553         struct rb_node *node;
554         int have_prev = 0;
555         int have_next = 0;
556         int ret = 0;
557
558         tmp.start = start;
559         tmp.len = len;
560         node = rb_search(holes, &tmp, compare_hole_range, NULL);
561         if (!node)
562                 return -EEXIST;
563         hole = rb_entry(node, struct file_extent_hole, node);
564         if (start + len > hole->start + hole->len)
565                 return -EEXIST;
566
567         /*
568          * Now there will be no overlap, delete the hole and re-add the
569          * split(s) if they exists.
570          */
571         if (start > hole->start) {
572                 prev_start = hole->start;
573                 prev_len = start - hole->start;
574                 have_prev = 1;
575         }
576         if (hole->start + hole->len > start + len) {
577                 next_start = start + len;
578                 next_len = hole->start + hole->len - start - len;
579                 have_next = 1;
580         }
581         rb_erase(node, holes);
582         free(hole);
583         if (have_prev) {
584                 ret = add_file_extent_hole(holes, prev_start, prev_len);
585                 if (ret < 0)
586                         return ret;
587         }
588         if (have_next) {
589                 ret = add_file_extent_hole(holes, next_start, next_len);
590                 if (ret < 0)
591                         return ret;
592         }
593         return 0;
594 }
595
596 static int copy_file_extent_holes(struct rb_root *dst,
597                                   struct rb_root *src)
598 {
599         struct file_extent_hole *hole;
600         struct rb_node *node;
601         int ret = 0;
602
603         node = rb_first(src);
604         while (node) {
605                 hole = rb_entry(node, struct file_extent_hole, node);
606                 ret = add_file_extent_hole(dst, hole->start, hole->len);
607                 if (ret)
608                         break;
609                 node = rb_next(node);
610         }
611         return ret;
612 }
613
614 static void free_file_extent_holes(struct rb_root *holes)
615 {
616         struct rb_node *node;
617         struct file_extent_hole *hole;
618
619         node = rb_first(holes);
620         while (node) {
621                 hole = rb_entry(node, struct file_extent_hole, node);
622                 rb_erase(node, holes);
623                 free(hole);
624                 node = rb_first(holes);
625         }
626 }
627
628 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
629
630 static void record_root_in_trans(struct btrfs_trans_handle *trans,
631                                  struct btrfs_root *root)
632 {
633         if (root->last_trans != trans->transid) {
634                 root->track_dirty = 1;
635                 root->last_trans = trans->transid;
636                 root->commit_root = root->node;
637                 extent_buffer_get(root->node);
638         }
639 }
640
641 static u8 imode_to_type(u32 imode)
642 {
643 #define S_SHIFT 12
644         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
645                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
646                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
647                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
648                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
649                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
650                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
651                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
652         };
653
654         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
655 #undef S_SHIFT
656 }
657
658 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
659 {
660         struct device_record *rec1;
661         struct device_record *rec2;
662
663         rec1 = rb_entry(node1, struct device_record, node);
664         rec2 = rb_entry(node2, struct device_record, node);
665         if (rec1->devid > rec2->devid)
666                 return -1;
667         else if (rec1->devid < rec2->devid)
668                 return 1;
669         else
670                 return 0;
671 }
672
673 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
674 {
675         struct inode_record *rec;
676         struct inode_backref *backref;
677         struct inode_backref *orig;
678         struct inode_backref *tmp;
679         struct orphan_data_extent *src_orphan;
680         struct orphan_data_extent *dst_orphan;
681         struct rb_node *rb;
682         size_t size;
683         int ret;
684
685         rec = malloc(sizeof(*rec));
686         if (!rec)
687                 return ERR_PTR(-ENOMEM);
688         memcpy(rec, orig_rec, sizeof(*rec));
689         rec->refs = 1;
690         INIT_LIST_HEAD(&rec->backrefs);
691         INIT_LIST_HEAD(&rec->orphan_extents);
692         rec->holes = RB_ROOT;
693
694         list_for_each_entry(orig, &orig_rec->backrefs, list) {
695                 size = sizeof(*orig) + orig->namelen + 1;
696                 backref = malloc(size);
697                 if (!backref) {
698                         ret = -ENOMEM;
699                         goto cleanup;
700                 }
701                 memcpy(backref, orig, size);
702                 list_add_tail(&backref->list, &rec->backrefs);
703         }
704         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
705                 dst_orphan = malloc(sizeof(*dst_orphan));
706                 if (!dst_orphan) {
707                         ret = -ENOMEM;
708                         goto cleanup;
709                 }
710                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
711                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
712         }
713         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
714         if (ret < 0)
715                 goto cleanup_rb;
716
717         return rec;
718
719 cleanup_rb:
720         rb = rb_first(&rec->holes);
721         while (rb) {
722                 struct file_extent_hole *hole;
723
724                 hole = rb_entry(rb, struct file_extent_hole, node);
725                 rb = rb_next(rb);
726                 free(hole);
727         }
728
729 cleanup:
730         if (!list_empty(&rec->backrefs))
731                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
732                         list_del(&orig->list);
733                         free(orig);
734                 }
735
736         if (!list_empty(&rec->orphan_extents))
737                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
738                         list_del(&orig->list);
739                         free(orig);
740                 }
741
742         free(rec);
743
744         return ERR_PTR(ret);
745 }
746
747 static void print_orphan_data_extents(struct list_head *orphan_extents,
748                                       u64 objectid)
749 {
750         struct orphan_data_extent *orphan;
751
752         if (list_empty(orphan_extents))
753                 return;
754         printf("The following data extent is lost in tree %llu:\n",
755                objectid);
756         list_for_each_entry(orphan, orphan_extents, list) {
757                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
758                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
759                        orphan->disk_len);
760         }
761 }
762
763 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
764 {
765         u64 root_objectid = root->root_key.objectid;
766         int errors = rec->errors;
767
768         if (!errors)
769                 return;
770         /* reloc root errors, we print its corresponding fs root objectid*/
771         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
772                 root_objectid = root->root_key.offset;
773                 fprintf(stderr, "reloc");
774         }
775         fprintf(stderr, "root %llu inode %llu errors %x",
776                 (unsigned long long) root_objectid,
777                 (unsigned long long) rec->ino, rec->errors);
778
779         if (errors & I_ERR_NO_INODE_ITEM)
780                 fprintf(stderr, ", no inode item");
781         if (errors & I_ERR_NO_ORPHAN_ITEM)
782                 fprintf(stderr, ", no orphan item");
783         if (errors & I_ERR_DUP_INODE_ITEM)
784                 fprintf(stderr, ", dup inode item");
785         if (errors & I_ERR_DUP_DIR_INDEX)
786                 fprintf(stderr, ", dup dir index");
787         if (errors & I_ERR_ODD_DIR_ITEM)
788                 fprintf(stderr, ", odd dir item");
789         if (errors & I_ERR_ODD_FILE_EXTENT)
790                 fprintf(stderr, ", odd file extent");
791         if (errors & I_ERR_BAD_FILE_EXTENT)
792                 fprintf(stderr, ", bad file extent");
793         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
794                 fprintf(stderr, ", file extent overlap");
795         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
796                 fprintf(stderr, ", file extent discount");
797         if (errors & I_ERR_DIR_ISIZE_WRONG)
798                 fprintf(stderr, ", dir isize wrong");
799         if (errors & I_ERR_FILE_NBYTES_WRONG)
800                 fprintf(stderr, ", nbytes wrong");
801         if (errors & I_ERR_ODD_CSUM_ITEM)
802                 fprintf(stderr, ", odd csum item");
803         if (errors & I_ERR_SOME_CSUM_MISSING)
804                 fprintf(stderr, ", some csum missing");
805         if (errors & I_ERR_LINK_COUNT_WRONG)
806                 fprintf(stderr, ", link count wrong");
807         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
808                 fprintf(stderr, ", orphan file extent");
809         fprintf(stderr, "\n");
810         /* Print the orphan extents if needed */
811         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
812                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
813
814         /* Print the holes if needed */
815         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
816                 struct file_extent_hole *hole;
817                 struct rb_node *node;
818                 int found = 0;
819
820                 node = rb_first(&rec->holes);
821                 fprintf(stderr, "Found file extent holes:\n");
822                 while (node) {
823                         found = 1;
824                         hole = rb_entry(node, struct file_extent_hole, node);
825                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
826                                 hole->start, hole->len);
827                         node = rb_next(node);
828                 }
829                 if (!found)
830                         fprintf(stderr, "\tstart: 0, len: %llu\n",
831                                 round_up(rec->isize, root->sectorsize));
832         }
833 }
834
835 static void print_ref_error(int errors)
836 {
837         if (errors & REF_ERR_NO_DIR_ITEM)
838                 fprintf(stderr, ", no dir item");
839         if (errors & REF_ERR_NO_DIR_INDEX)
840                 fprintf(stderr, ", no dir index");
841         if (errors & REF_ERR_NO_INODE_REF)
842                 fprintf(stderr, ", no inode ref");
843         if (errors & REF_ERR_DUP_DIR_ITEM)
844                 fprintf(stderr, ", dup dir item");
845         if (errors & REF_ERR_DUP_DIR_INDEX)
846                 fprintf(stderr, ", dup dir index");
847         if (errors & REF_ERR_DUP_INODE_REF)
848                 fprintf(stderr, ", dup inode ref");
849         if (errors & REF_ERR_INDEX_UNMATCH)
850                 fprintf(stderr, ", index mismatch");
851         if (errors & REF_ERR_FILETYPE_UNMATCH)
852                 fprintf(stderr, ", filetype mismatch");
853         if (errors & REF_ERR_NAME_TOO_LONG)
854                 fprintf(stderr, ", name too long");
855         if (errors & REF_ERR_NO_ROOT_REF)
856                 fprintf(stderr, ", no root ref");
857         if (errors & REF_ERR_NO_ROOT_BACKREF)
858                 fprintf(stderr, ", no root backref");
859         if (errors & REF_ERR_DUP_ROOT_REF)
860                 fprintf(stderr, ", dup root ref");
861         if (errors & REF_ERR_DUP_ROOT_BACKREF)
862                 fprintf(stderr, ", dup root backref");
863         fprintf(stderr, "\n");
864 }
865
866 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
867                                           u64 ino, int mod)
868 {
869         struct ptr_node *node;
870         struct cache_extent *cache;
871         struct inode_record *rec = NULL;
872         int ret;
873
874         cache = lookup_cache_extent(inode_cache, ino, 1);
875         if (cache) {
876                 node = container_of(cache, struct ptr_node, cache);
877                 rec = node->data;
878                 if (mod && rec->refs > 1) {
879                         node->data = clone_inode_rec(rec);
880                         if (IS_ERR(node->data))
881                                 return node->data;
882                         rec->refs--;
883                         rec = node->data;
884                 }
885         } else if (mod) {
886                 rec = calloc(1, sizeof(*rec));
887                 if (!rec)
888                         return ERR_PTR(-ENOMEM);
889                 rec->ino = ino;
890                 rec->extent_start = (u64)-1;
891                 rec->refs = 1;
892                 INIT_LIST_HEAD(&rec->backrefs);
893                 INIT_LIST_HEAD(&rec->orphan_extents);
894                 rec->holes = RB_ROOT;
895
896                 node = malloc(sizeof(*node));
897                 if (!node) {
898                         free(rec);
899                         return ERR_PTR(-ENOMEM);
900                 }
901                 node->cache.start = ino;
902                 node->cache.size = 1;
903                 node->data = rec;
904
905                 if (ino == BTRFS_FREE_INO_OBJECTID)
906                         rec->found_link = 1;
907
908                 ret = insert_cache_extent(inode_cache, &node->cache);
909                 if (ret)
910                         return ERR_PTR(-EEXIST);
911         }
912         return rec;
913 }
914
915 static void free_orphan_data_extents(struct list_head *orphan_extents)
916 {
917         struct orphan_data_extent *orphan;
918
919         while (!list_empty(orphan_extents)) {
920                 orphan = list_entry(orphan_extents->next,
921                                     struct orphan_data_extent, list);
922                 list_del(&orphan->list);
923                 free(orphan);
924         }
925 }
926
927 static void free_inode_rec(struct inode_record *rec)
928 {
929         struct inode_backref *backref;
930
931         if (--rec->refs > 0)
932                 return;
933
934         while (!list_empty(&rec->backrefs)) {
935                 backref = to_inode_backref(rec->backrefs.next);
936                 list_del(&backref->list);
937                 free(backref);
938         }
939         free_orphan_data_extents(&rec->orphan_extents);
940         free_file_extent_holes(&rec->holes);
941         free(rec);
942 }
943
944 static int can_free_inode_rec(struct inode_record *rec)
945 {
946         if (!rec->errors && rec->checked && rec->found_inode_item &&
947             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
948                 return 1;
949         return 0;
950 }
951
952 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
953                                  struct inode_record *rec)
954 {
955         struct cache_extent *cache;
956         struct inode_backref *tmp, *backref;
957         struct ptr_node *node;
958         u8 filetype;
959
960         if (!rec->found_inode_item)
961                 return;
962
963         filetype = imode_to_type(rec->imode);
964         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
965                 if (backref->found_dir_item && backref->found_dir_index) {
966                         if (backref->filetype != filetype)
967                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
968                         if (!backref->errors && backref->found_inode_ref &&
969                             rec->nlink == rec->found_link) {
970                                 list_del(&backref->list);
971                                 free(backref);
972                         }
973                 }
974         }
975
976         if (!rec->checked || rec->merging)
977                 return;
978
979         if (S_ISDIR(rec->imode)) {
980                 if (rec->found_size != rec->isize)
981                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
982                 if (rec->found_file_extent)
983                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
984         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
985                 if (rec->found_dir_item)
986                         rec->errors |= I_ERR_ODD_DIR_ITEM;
987                 if (rec->found_size != rec->nbytes)
988                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
989                 if (rec->nlink > 0 && !no_holes &&
990                     (rec->extent_end < rec->isize ||
991                      first_extent_gap(&rec->holes) < rec->isize))
992                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
993         }
994
995         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
996                 if (rec->found_csum_item && rec->nodatasum)
997                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
998                 if (rec->some_csum_missing && !rec->nodatasum)
999                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1000         }
1001
1002         BUG_ON(rec->refs != 1);
1003         if (can_free_inode_rec(rec)) {
1004                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1005                 node = container_of(cache, struct ptr_node, cache);
1006                 BUG_ON(node->data != rec);
1007                 remove_cache_extent(inode_cache, &node->cache);
1008                 free(node);
1009                 free_inode_rec(rec);
1010         }
1011 }
1012
1013 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1014 {
1015         struct btrfs_path path;
1016         struct btrfs_key key;
1017         int ret;
1018
1019         key.objectid = BTRFS_ORPHAN_OBJECTID;
1020         key.type = BTRFS_ORPHAN_ITEM_KEY;
1021         key.offset = ino;
1022
1023         btrfs_init_path(&path);
1024         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1025         btrfs_release_path(&path);
1026         if (ret > 0)
1027                 ret = -ENOENT;
1028         return ret;
1029 }
1030
1031 static int process_inode_item(struct extent_buffer *eb,
1032                               int slot, struct btrfs_key *key,
1033                               struct shared_node *active_node)
1034 {
1035         struct inode_record *rec;
1036         struct btrfs_inode_item *item;
1037
1038         rec = active_node->current;
1039         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1040         if (rec->found_inode_item) {
1041                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1042                 return 1;
1043         }
1044         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1045         rec->nlink = btrfs_inode_nlink(eb, item);
1046         rec->isize = btrfs_inode_size(eb, item);
1047         rec->nbytes = btrfs_inode_nbytes(eb, item);
1048         rec->imode = btrfs_inode_mode(eb, item);
1049         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1050                 rec->nodatasum = 1;
1051         rec->found_inode_item = 1;
1052         if (rec->nlink == 0)
1053                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1054         maybe_free_inode_rec(&active_node->inode_cache, rec);
1055         return 0;
1056 }
1057
1058 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1059                                                 const char *name,
1060                                                 int namelen, u64 dir)
1061 {
1062         struct inode_backref *backref;
1063
1064         list_for_each_entry(backref, &rec->backrefs, list) {
1065                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1066                         break;
1067                 if (backref->dir != dir || backref->namelen != namelen)
1068                         continue;
1069                 if (memcmp(name, backref->name, namelen))
1070                         continue;
1071                 return backref;
1072         }
1073
1074         backref = malloc(sizeof(*backref) + namelen + 1);
1075         if (!backref)
1076                 return NULL;
1077         memset(backref, 0, sizeof(*backref));
1078         backref->dir = dir;
1079         backref->namelen = namelen;
1080         memcpy(backref->name, name, namelen);
1081         backref->name[namelen] = '\0';
1082         list_add_tail(&backref->list, &rec->backrefs);
1083         return backref;
1084 }
1085
1086 static int add_inode_backref(struct cache_tree *inode_cache,
1087                              u64 ino, u64 dir, u64 index,
1088                              const char *name, int namelen,
1089                              u8 filetype, u8 itemtype, int errors)
1090 {
1091         struct inode_record *rec;
1092         struct inode_backref *backref;
1093
1094         rec = get_inode_rec(inode_cache, ino, 1);
1095         BUG_ON(IS_ERR(rec));
1096         backref = get_inode_backref(rec, name, namelen, dir);
1097         BUG_ON(!backref);
1098         if (errors)
1099                 backref->errors |= errors;
1100         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1101                 if (backref->found_dir_index)
1102                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1103                 if (backref->found_inode_ref && backref->index != index)
1104                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1105                 if (backref->found_dir_item && backref->filetype != filetype)
1106                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1107
1108                 backref->index = index;
1109                 backref->filetype = filetype;
1110                 backref->found_dir_index = 1;
1111         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1112                 rec->found_link++;
1113                 if (backref->found_dir_item)
1114                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1115                 if (backref->found_dir_index && backref->filetype != filetype)
1116                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1117
1118                 backref->filetype = filetype;
1119                 backref->found_dir_item = 1;
1120         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1121                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1122                 if (backref->found_inode_ref)
1123                         backref->errors |= REF_ERR_DUP_INODE_REF;
1124                 if (backref->found_dir_index && backref->index != index)
1125                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1126                 else
1127                         backref->index = index;
1128
1129                 backref->ref_type = itemtype;
1130                 backref->found_inode_ref = 1;
1131         } else {
1132                 BUG_ON(1);
1133         }
1134
1135         maybe_free_inode_rec(inode_cache, rec);
1136         return 0;
1137 }
1138
1139 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1140                             struct cache_tree *dst_cache)
1141 {
1142         struct inode_backref *backref;
1143         u32 dir_count = 0;
1144         int ret = 0;
1145
1146         dst->merging = 1;
1147         list_for_each_entry(backref, &src->backrefs, list) {
1148                 if (backref->found_dir_index) {
1149                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1150                                         backref->index, backref->name,
1151                                         backref->namelen, backref->filetype,
1152                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1153                 }
1154                 if (backref->found_dir_item) {
1155                         dir_count++;
1156                         add_inode_backref(dst_cache, dst->ino,
1157                                         backref->dir, 0, backref->name,
1158                                         backref->namelen, backref->filetype,
1159                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1160                 }
1161                 if (backref->found_inode_ref) {
1162                         add_inode_backref(dst_cache, dst->ino,
1163                                         backref->dir, backref->index,
1164                                         backref->name, backref->namelen, 0,
1165                                         backref->ref_type, backref->errors);
1166                 }
1167         }
1168
1169         if (src->found_dir_item)
1170                 dst->found_dir_item = 1;
1171         if (src->found_file_extent)
1172                 dst->found_file_extent = 1;
1173         if (src->found_csum_item)
1174                 dst->found_csum_item = 1;
1175         if (src->some_csum_missing)
1176                 dst->some_csum_missing = 1;
1177         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1178                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1179                 if (ret < 0)
1180                         return ret;
1181         }
1182
1183         BUG_ON(src->found_link < dir_count);
1184         dst->found_link += src->found_link - dir_count;
1185         dst->found_size += src->found_size;
1186         if (src->extent_start != (u64)-1) {
1187                 if (dst->extent_start == (u64)-1) {
1188                         dst->extent_start = src->extent_start;
1189                         dst->extent_end = src->extent_end;
1190                 } else {
1191                         if (dst->extent_end > src->extent_start)
1192                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1193                         else if (dst->extent_end < src->extent_start) {
1194                                 ret = add_file_extent_hole(&dst->holes,
1195                                         dst->extent_end,
1196                                         src->extent_start - dst->extent_end);
1197                         }
1198                         if (dst->extent_end < src->extent_end)
1199                                 dst->extent_end = src->extent_end;
1200                 }
1201         }
1202
1203         dst->errors |= src->errors;
1204         if (src->found_inode_item) {
1205                 if (!dst->found_inode_item) {
1206                         dst->nlink = src->nlink;
1207                         dst->isize = src->isize;
1208                         dst->nbytes = src->nbytes;
1209                         dst->imode = src->imode;
1210                         dst->nodatasum = src->nodatasum;
1211                         dst->found_inode_item = 1;
1212                 } else {
1213                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1214                 }
1215         }
1216         dst->merging = 0;
1217
1218         return 0;
1219 }
1220
1221 static int splice_shared_node(struct shared_node *src_node,
1222                               struct shared_node *dst_node)
1223 {
1224         struct cache_extent *cache;
1225         struct ptr_node *node, *ins;
1226         struct cache_tree *src, *dst;
1227         struct inode_record *rec, *conflict;
1228         u64 current_ino = 0;
1229         int splice = 0;
1230         int ret;
1231
1232         if (--src_node->refs == 0)
1233                 splice = 1;
1234         if (src_node->current)
1235                 current_ino = src_node->current->ino;
1236
1237         src = &src_node->root_cache;
1238         dst = &dst_node->root_cache;
1239 again:
1240         cache = search_cache_extent(src, 0);
1241         while (cache) {
1242                 node = container_of(cache, struct ptr_node, cache);
1243                 rec = node->data;
1244                 cache = next_cache_extent(cache);
1245
1246                 if (splice) {
1247                         remove_cache_extent(src, &node->cache);
1248                         ins = node;
1249                 } else {
1250                         ins = malloc(sizeof(*ins));
1251                         BUG_ON(!ins);
1252                         ins->cache.start = node->cache.start;
1253                         ins->cache.size = node->cache.size;
1254                         ins->data = rec;
1255                         rec->refs++;
1256                 }
1257                 ret = insert_cache_extent(dst, &ins->cache);
1258                 if (ret == -EEXIST) {
1259                         conflict = get_inode_rec(dst, rec->ino, 1);
1260                         BUG_ON(IS_ERR(conflict));
1261                         merge_inode_recs(rec, conflict, dst);
1262                         if (rec->checked) {
1263                                 conflict->checked = 1;
1264                                 if (dst_node->current == conflict)
1265                                         dst_node->current = NULL;
1266                         }
1267                         maybe_free_inode_rec(dst, conflict);
1268                         free_inode_rec(rec);
1269                         free(ins);
1270                 } else {
1271                         BUG_ON(ret);
1272                 }
1273         }
1274
1275         if (src == &src_node->root_cache) {
1276                 src = &src_node->inode_cache;
1277                 dst = &dst_node->inode_cache;
1278                 goto again;
1279         }
1280
1281         if (current_ino > 0 && (!dst_node->current ||
1282             current_ino > dst_node->current->ino)) {
1283                 if (dst_node->current) {
1284                         dst_node->current->checked = 1;
1285                         maybe_free_inode_rec(dst, dst_node->current);
1286                 }
1287                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1288                 BUG_ON(IS_ERR(dst_node->current));
1289         }
1290         return 0;
1291 }
1292
1293 static void free_inode_ptr(struct cache_extent *cache)
1294 {
1295         struct ptr_node *node;
1296         struct inode_record *rec;
1297
1298         node = container_of(cache, struct ptr_node, cache);
1299         rec = node->data;
1300         free_inode_rec(rec);
1301         free(node);
1302 }
1303
1304 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1305
1306 static struct shared_node *find_shared_node(struct cache_tree *shared,
1307                                             u64 bytenr)
1308 {
1309         struct cache_extent *cache;
1310         struct shared_node *node;
1311
1312         cache = lookup_cache_extent(shared, bytenr, 1);
1313         if (cache) {
1314                 node = container_of(cache, struct shared_node, cache);
1315                 return node;
1316         }
1317         return NULL;
1318 }
1319
1320 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1321 {
1322         int ret;
1323         struct shared_node *node;
1324
1325         node = calloc(1, sizeof(*node));
1326         if (!node)
1327                 return -ENOMEM;
1328         node->cache.start = bytenr;
1329         node->cache.size = 1;
1330         cache_tree_init(&node->root_cache);
1331         cache_tree_init(&node->inode_cache);
1332         node->refs = refs;
1333
1334         ret = insert_cache_extent(shared, &node->cache);
1335
1336         return ret;
1337 }
1338
1339 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1340                              struct walk_control *wc, int level)
1341 {
1342         struct shared_node *node;
1343         struct shared_node *dest;
1344         int ret;
1345
1346         if (level == wc->active_node)
1347                 return 0;
1348
1349         BUG_ON(wc->active_node <= level);
1350         node = find_shared_node(&wc->shared, bytenr);
1351         if (!node) {
1352                 ret = add_shared_node(&wc->shared, bytenr, refs);
1353                 BUG_ON(ret);
1354                 node = find_shared_node(&wc->shared, bytenr);
1355                 wc->nodes[level] = node;
1356                 wc->active_node = level;
1357                 return 0;
1358         }
1359
1360         if (wc->root_level == wc->active_node &&
1361             btrfs_root_refs(&root->root_item) == 0) {
1362                 if (--node->refs == 0) {
1363                         free_inode_recs_tree(&node->root_cache);
1364                         free_inode_recs_tree(&node->inode_cache);
1365                         remove_cache_extent(&wc->shared, &node->cache);
1366                         free(node);
1367                 }
1368                 return 1;
1369         }
1370
1371         dest = wc->nodes[wc->active_node];
1372         splice_shared_node(node, dest);
1373         if (node->refs == 0) {
1374                 remove_cache_extent(&wc->shared, &node->cache);
1375                 free(node);
1376         }
1377         return 1;
1378 }
1379
1380 static int leave_shared_node(struct btrfs_root *root,
1381                              struct walk_control *wc, int level)
1382 {
1383         struct shared_node *node;
1384         struct shared_node *dest;
1385         int i;
1386
1387         if (level == wc->root_level)
1388                 return 0;
1389
1390         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1391                 if (wc->nodes[i])
1392                         break;
1393         }
1394         BUG_ON(i >= BTRFS_MAX_LEVEL);
1395
1396         node = wc->nodes[wc->active_node];
1397         wc->nodes[wc->active_node] = NULL;
1398         wc->active_node = i;
1399
1400         dest = wc->nodes[wc->active_node];
1401         if (wc->active_node < wc->root_level ||
1402             btrfs_root_refs(&root->root_item) > 0) {
1403                 BUG_ON(node->refs <= 1);
1404                 splice_shared_node(node, dest);
1405         } else {
1406                 BUG_ON(node->refs < 2);
1407                 node->refs--;
1408         }
1409         return 0;
1410 }
1411
1412 /*
1413  * Returns:
1414  * < 0 - on error
1415  * 1   - if the root with id child_root_id is a child of root parent_root_id
1416  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1417  *       has other root(s) as parent(s)
1418  * 2   - if the root child_root_id doesn't have any parent roots
1419  */
1420 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1421                          u64 child_root_id)
1422 {
1423         struct btrfs_path path;
1424         struct btrfs_key key;
1425         struct extent_buffer *leaf;
1426         int has_parent = 0;
1427         int ret;
1428
1429         btrfs_init_path(&path);
1430
1431         key.objectid = parent_root_id;
1432         key.type = BTRFS_ROOT_REF_KEY;
1433         key.offset = child_root_id;
1434         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1435                                 0, 0);
1436         if (ret < 0)
1437                 return ret;
1438         btrfs_release_path(&path);
1439         if (!ret)
1440                 return 1;
1441
1442         key.objectid = child_root_id;
1443         key.type = BTRFS_ROOT_BACKREF_KEY;
1444         key.offset = 0;
1445         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1446                                 0, 0);
1447         if (ret < 0)
1448                 goto out;
1449
1450         while (1) {
1451                 leaf = path.nodes[0];
1452                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1453                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1454                         if (ret)
1455                                 break;
1456                         leaf = path.nodes[0];
1457                 }
1458
1459                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1460                 if (key.objectid != child_root_id ||
1461                     key.type != BTRFS_ROOT_BACKREF_KEY)
1462                         break;
1463
1464                 has_parent = 1;
1465
1466                 if (key.offset == parent_root_id) {
1467                         btrfs_release_path(&path);
1468                         return 1;
1469                 }
1470
1471                 path.slots[0]++;
1472         }
1473 out:
1474         btrfs_release_path(&path);
1475         if (ret < 0)
1476                 return ret;
1477         return has_parent ? 0 : 2;
1478 }
1479
1480 static int process_dir_item(struct btrfs_root *root,
1481                             struct extent_buffer *eb,
1482                             int slot, struct btrfs_key *key,
1483                             struct shared_node *active_node)
1484 {
1485         u32 total;
1486         u32 cur = 0;
1487         u32 len;
1488         u32 name_len;
1489         u32 data_len;
1490         int error;
1491         int nritems = 0;
1492         u8 filetype;
1493         struct btrfs_dir_item *di;
1494         struct inode_record *rec;
1495         struct cache_tree *root_cache;
1496         struct cache_tree *inode_cache;
1497         struct btrfs_key location;
1498         char namebuf[BTRFS_NAME_LEN];
1499
1500         root_cache = &active_node->root_cache;
1501         inode_cache = &active_node->inode_cache;
1502         rec = active_node->current;
1503         rec->found_dir_item = 1;
1504
1505         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506         total = btrfs_item_size_nr(eb, slot);
1507         while (cur < total) {
1508                 nritems++;
1509                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510                 name_len = btrfs_dir_name_len(eb, di);
1511                 data_len = btrfs_dir_data_len(eb, di);
1512                 filetype = btrfs_dir_type(eb, di);
1513
1514                 rec->found_size += name_len;
1515                 if (name_len <= BTRFS_NAME_LEN) {
1516                         len = name_len;
1517                         error = 0;
1518                 } else {
1519                         len = BTRFS_NAME_LEN;
1520                         error = REF_ERR_NAME_TOO_LONG;
1521                 }
1522                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1523
1524                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525                         add_inode_backref(inode_cache, location.objectid,
1526                                           key->objectid, key->offset, namebuf,
1527                                           len, filetype, key->type, error);
1528                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529                         add_inode_backref(root_cache, location.objectid,
1530                                           key->objectid, key->offset,
1531                                           namebuf, len, filetype,
1532                                           key->type, error);
1533                 } else {
1534                         fprintf(stderr, "invalid location in dir item %u\n",
1535                                 location.type);
1536                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537                                           key->objectid, key->offset, namebuf,
1538                                           len, filetype, key->type, error);
1539                 }
1540
1541                 len = sizeof(*di) + name_len + data_len;
1542                 di = (struct btrfs_dir_item *)((char *)di + len);
1543                 cur += len;
1544         }
1545         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1547
1548         return 0;
1549 }
1550
1551 static int process_inode_ref(struct extent_buffer *eb,
1552                              int slot, struct btrfs_key *key,
1553                              struct shared_node *active_node)
1554 {
1555         u32 total;
1556         u32 cur = 0;
1557         u32 len;
1558         u32 name_len;
1559         u64 index;
1560         int error;
1561         struct cache_tree *inode_cache;
1562         struct btrfs_inode_ref *ref;
1563         char namebuf[BTRFS_NAME_LEN];
1564
1565         inode_cache = &active_node->inode_cache;
1566
1567         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568         total = btrfs_item_size_nr(eb, slot);
1569         while (cur < total) {
1570                 name_len = btrfs_inode_ref_name_len(eb, ref);
1571                 index = btrfs_inode_ref_index(eb, ref);
1572                 if (name_len <= BTRFS_NAME_LEN) {
1573                         len = name_len;
1574                         error = 0;
1575                 } else {
1576                         len = BTRFS_NAME_LEN;
1577                         error = REF_ERR_NAME_TOO_LONG;
1578                 }
1579                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1580                 add_inode_backref(inode_cache, key->objectid, key->offset,
1581                                   index, namebuf, len, 0, key->type, error);
1582
1583                 len = sizeof(*ref) + name_len;
1584                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1585                 cur += len;
1586         }
1587         return 0;
1588 }
1589
1590 static int process_inode_extref(struct extent_buffer *eb,
1591                                 int slot, struct btrfs_key *key,
1592                                 struct shared_node *active_node)
1593 {
1594         u32 total;
1595         u32 cur = 0;
1596         u32 len;
1597         u32 name_len;
1598         u64 index;
1599         u64 parent;
1600         int error;
1601         struct cache_tree *inode_cache;
1602         struct btrfs_inode_extref *extref;
1603         char namebuf[BTRFS_NAME_LEN];
1604
1605         inode_cache = &active_node->inode_cache;
1606
1607         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1608         total = btrfs_item_size_nr(eb, slot);
1609         while (cur < total) {
1610                 name_len = btrfs_inode_extref_name_len(eb, extref);
1611                 index = btrfs_inode_extref_index(eb, extref);
1612                 parent = btrfs_inode_extref_parent(eb, extref);
1613                 if (name_len <= BTRFS_NAME_LEN) {
1614                         len = name_len;
1615                         error = 0;
1616                 } else {
1617                         len = BTRFS_NAME_LEN;
1618                         error = REF_ERR_NAME_TOO_LONG;
1619                 }
1620                 read_extent_buffer(eb, namebuf,
1621                                    (unsigned long)(extref + 1), len);
1622                 add_inode_backref(inode_cache, key->objectid, parent,
1623                                   index, namebuf, len, 0, key->type, error);
1624
1625                 len = sizeof(*extref) + name_len;
1626                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1627                 cur += len;
1628         }
1629         return 0;
1630
1631 }
1632
1633 static int count_csum_range(struct btrfs_root *root, u64 start,
1634                             u64 len, u64 *found)
1635 {
1636         struct btrfs_key key;
1637         struct btrfs_path path;
1638         struct extent_buffer *leaf;
1639         int ret;
1640         size_t size;
1641         *found = 0;
1642         u64 csum_end;
1643         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1644
1645         btrfs_init_path(&path);
1646
1647         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1648         key.offset = start;
1649         key.type = BTRFS_EXTENT_CSUM_KEY;
1650
1651         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1652                                 &key, &path, 0, 0);
1653         if (ret < 0)
1654                 goto out;
1655         if (ret > 0 && path.slots[0] > 0) {
1656                 leaf = path.nodes[0];
1657                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1658                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1659                     key.type == BTRFS_EXTENT_CSUM_KEY)
1660                         path.slots[0]--;
1661         }
1662
1663         while (len > 0) {
1664                 leaf = path.nodes[0];
1665                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1666                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1667                         if (ret > 0)
1668                                 break;
1669                         else if (ret < 0)
1670                                 goto out;
1671                         leaf = path.nodes[0];
1672                 }
1673
1674                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1675                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1676                     key.type != BTRFS_EXTENT_CSUM_KEY)
1677                         break;
1678
1679                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1680                 if (key.offset >= start + len)
1681                         break;
1682
1683                 if (key.offset > start)
1684                         start = key.offset;
1685
1686                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1687                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1688                 if (csum_end > start) {
1689                         size = min(csum_end - start, len);
1690                         len -= size;
1691                         start += size;
1692                         *found += size;
1693                 }
1694
1695                 path.slots[0]++;
1696         }
1697 out:
1698         btrfs_release_path(&path);
1699         if (ret < 0)
1700                 return ret;
1701         return 0;
1702 }
1703
1704 static int process_file_extent(struct btrfs_root *root,
1705                                 struct extent_buffer *eb,
1706                                 int slot, struct btrfs_key *key,
1707                                 struct shared_node *active_node)
1708 {
1709         struct inode_record *rec;
1710         struct btrfs_file_extent_item *fi;
1711         u64 num_bytes = 0;
1712         u64 disk_bytenr = 0;
1713         u64 extent_offset = 0;
1714         u64 mask = root->sectorsize - 1;
1715         int extent_type;
1716         int ret;
1717
1718         rec = active_node->current;
1719         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1720         rec->found_file_extent = 1;
1721
1722         if (rec->extent_start == (u64)-1) {
1723                 rec->extent_start = key->offset;
1724                 rec->extent_end = key->offset;
1725         }
1726
1727         if (rec->extent_end > key->offset)
1728                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1729         else if (rec->extent_end < key->offset) {
1730                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1731                                            key->offset - rec->extent_end);
1732                 if (ret < 0)
1733                         return ret;
1734         }
1735
1736         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1737         extent_type = btrfs_file_extent_type(eb, fi);
1738
1739         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1740                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1741                 if (num_bytes == 0)
1742                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1743                 rec->found_size += num_bytes;
1744                 num_bytes = (num_bytes + mask) & ~mask;
1745         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1746                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1747                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1748                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1749                 extent_offset = btrfs_file_extent_offset(eb, fi);
1750                 if (num_bytes == 0 || (num_bytes & mask))
1751                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752                 if (num_bytes + extent_offset >
1753                     btrfs_file_extent_ram_bytes(eb, fi))
1754                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1755                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1756                     (btrfs_file_extent_compression(eb, fi) ||
1757                      btrfs_file_extent_encryption(eb, fi) ||
1758                      btrfs_file_extent_other_encoding(eb, fi)))
1759                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760                 if (disk_bytenr > 0)
1761                         rec->found_size += num_bytes;
1762         } else {
1763                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1764         }
1765         rec->extent_end = key->offset + num_bytes;
1766
1767         /*
1768          * The data reloc tree will copy full extents into its inode and then
1769          * copy the corresponding csums.  Because the extent it copied could be
1770          * a preallocated extent that hasn't been written to yet there may be no
1771          * csums to copy, ergo we won't have csums for our file extent.  This is
1772          * ok so just don't bother checking csums if the inode belongs to the
1773          * data reloc tree.
1774          */
1775         if (disk_bytenr > 0 &&
1776             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1777                 u64 found;
1778                 if (btrfs_file_extent_compression(eb, fi))
1779                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1780                 else
1781                         disk_bytenr += extent_offset;
1782
1783                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1784                 if (ret < 0)
1785                         return ret;
1786                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1787                         if (found > 0)
1788                                 rec->found_csum_item = 1;
1789                         if (found < num_bytes)
1790                                 rec->some_csum_missing = 1;
1791                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1792                         if (found > 0)
1793                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1794                 }
1795         }
1796         return 0;
1797 }
1798
1799 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1800                             struct walk_control *wc)
1801 {
1802         struct btrfs_key key;
1803         u32 nritems;
1804         int i;
1805         int ret = 0;
1806         struct cache_tree *inode_cache;
1807         struct shared_node *active_node;
1808
1809         if (wc->root_level == wc->active_node &&
1810             btrfs_root_refs(&root->root_item) == 0)
1811                 return 0;
1812
1813         active_node = wc->nodes[wc->active_node];
1814         inode_cache = &active_node->inode_cache;
1815         nritems = btrfs_header_nritems(eb);
1816         for (i = 0; i < nritems; i++) {
1817                 btrfs_item_key_to_cpu(eb, &key, i);
1818
1819                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1820                         continue;
1821                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1822                         continue;
1823
1824                 if (active_node->current == NULL ||
1825                     active_node->current->ino < key.objectid) {
1826                         if (active_node->current) {
1827                                 active_node->current->checked = 1;
1828                                 maybe_free_inode_rec(inode_cache,
1829                                                      active_node->current);
1830                         }
1831                         active_node->current = get_inode_rec(inode_cache,
1832                                                              key.objectid, 1);
1833                         BUG_ON(IS_ERR(active_node->current));
1834                 }
1835                 switch (key.type) {
1836                 case BTRFS_DIR_ITEM_KEY:
1837                 case BTRFS_DIR_INDEX_KEY:
1838                         ret = process_dir_item(root, eb, i, &key, active_node);
1839                         break;
1840                 case BTRFS_INODE_REF_KEY:
1841                         ret = process_inode_ref(eb, i, &key, active_node);
1842                         break;
1843                 case BTRFS_INODE_EXTREF_KEY:
1844                         ret = process_inode_extref(eb, i, &key, active_node);
1845                         break;
1846                 case BTRFS_INODE_ITEM_KEY:
1847                         ret = process_inode_item(eb, i, &key, active_node);
1848                         break;
1849                 case BTRFS_EXTENT_DATA_KEY:
1850                         ret = process_file_extent(root, eb, i, &key,
1851                                                   active_node);
1852                         break;
1853                 default:
1854                         break;
1855                 };
1856         }
1857         return ret;
1858 }
1859
1860 struct node_refs {
1861         u64 bytenr[BTRFS_MAX_LEVEL];
1862         u64 refs[BTRFS_MAX_LEVEL];
1863         int need_check[BTRFS_MAX_LEVEL];
1864 };
1865
1866 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1867                              struct node_refs *nrefs, u64 level);
1868 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1869                             unsigned int ext_ref);
1870
1871 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1872                                struct node_refs *nrefs, int *level, int ext_ref)
1873 {
1874         struct extent_buffer *cur = path->nodes[0];
1875         struct btrfs_key key;
1876         u64 cur_bytenr;
1877         u32 nritems;
1878         u64 first_ino = 0;
1879         int root_level = btrfs_header_level(root->node);
1880         int i;
1881         int ret = 0; /* Final return value */
1882         int err = 0; /* Positive error bitmap */
1883
1884         cur_bytenr = cur->start;
1885
1886         /* skip to first inode item or the first inode number change */
1887         nritems = btrfs_header_nritems(cur);
1888         for (i = 0; i < nritems; i++) {
1889                 btrfs_item_key_to_cpu(cur, &key, i);
1890                 if (i == 0)
1891                         first_ino = key.objectid;
1892                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1893                     (first_ino && first_ino != key.objectid))
1894                         break;
1895         }
1896         if (i == nritems) {
1897                 path->slots[0] = nritems;
1898                 return 0;
1899         }
1900         path->slots[0] = i;
1901
1902 again:
1903         err |= check_inode_item(root, path, ext_ref);
1904
1905         if (err & LAST_ITEM)
1906                 goto out;
1907
1908         /* still have inode items in thie leaf */
1909         if (cur->start == cur_bytenr)
1910                 goto again;
1911
1912         /*
1913          * we have switched to another leaf, above nodes may
1914          * have changed, here walk down the path, if a node
1915          * or leaf is shared, check whether we can skip this
1916          * node or leaf.
1917          */
1918         for (i = root_level; i >= 0; i--) {
1919                 if (path->nodes[i]->start == nrefs->bytenr[i])
1920                         continue;
1921
1922                 ret = update_nodes_refs(root,
1923                                 path->nodes[i]->start,
1924                                 nrefs, i);
1925                 if (ret)
1926                         goto out;
1927
1928                 if (!nrefs->need_check[i]) {
1929                         *level += 1;
1930                         break;
1931                 }
1932         }
1933
1934         for (i = 0; i < *level; i++) {
1935                 free_extent_buffer(path->nodes[i]);
1936                 path->nodes[i] = NULL;
1937         }
1938 out:
1939         err &= ~LAST_ITEM;
1940         /*
1941          * Convert any error bitmap to -EIO, as we should avoid
1942          * mixing positive and negative return value to represent
1943          * error
1944          */
1945         if (err && !ret)
1946                 ret = -EIO;
1947         return ret;
1948 }
1949
1950 static void reada_walk_down(struct btrfs_root *root,
1951                             struct extent_buffer *node, int slot)
1952 {
1953         u64 bytenr;
1954         u64 ptr_gen;
1955         u32 nritems;
1956         u32 blocksize;
1957         int i;
1958         int level;
1959
1960         level = btrfs_header_level(node);
1961         if (level != 1)
1962                 return;
1963
1964         nritems = btrfs_header_nritems(node);
1965         blocksize = root->nodesize;
1966         for (i = slot; i < nritems; i++) {
1967                 bytenr = btrfs_node_blockptr(node, i);
1968                 ptr_gen = btrfs_node_ptr_generation(node, i);
1969                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1970         }
1971 }
1972
1973 /*
1974  * Check the child node/leaf by the following condition:
1975  * 1. the first item key of the node/leaf should be the same with the one
1976  *    in parent.
1977  * 2. block in parent node should match the child node/leaf.
1978  * 3. generation of parent node and child's header should be consistent.
1979  *
1980  * Or the child node/leaf pointed by the key in parent is not valid.
1981  *
1982  * We hope to check leaf owner too, but since subvol may share leaves,
1983  * which makes leaf owner check not so strong, key check should be
1984  * sufficient enough for that case.
1985  */
1986 static int check_child_node(struct btrfs_root *root,
1987                             struct extent_buffer *parent, int slot,
1988                             struct extent_buffer *child)
1989 {
1990         struct btrfs_key parent_key;
1991         struct btrfs_key child_key;
1992         int ret = 0;
1993
1994         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1995         if (btrfs_header_level(child) == 0)
1996                 btrfs_item_key_to_cpu(child, &child_key, 0);
1997         else
1998                 btrfs_node_key_to_cpu(child, &child_key, 0);
1999
2000         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2001                 ret = -EINVAL;
2002                 fprintf(stderr,
2003                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2004                         parent_key.objectid, parent_key.type, parent_key.offset,
2005                         child_key.objectid, child_key.type, child_key.offset);
2006         }
2007         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2008                 ret = -EINVAL;
2009                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2010                         btrfs_node_blockptr(parent, slot),
2011                         btrfs_header_bytenr(child));
2012         }
2013         if (btrfs_node_ptr_generation(parent, slot) !=
2014             btrfs_header_generation(child)) {
2015                 ret = -EINVAL;
2016                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2017                         btrfs_header_generation(child),
2018                         btrfs_node_ptr_generation(parent, slot));
2019         }
2020         return ret;
2021 }
2022
2023 /*
2024  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2025  * in every fs or file tree check. Here we find its all root ids, and only check
2026  * it in the fs or file tree which has the smallest root id.
2027  */
2028 static int need_check(struct btrfs_root *root, struct ulist *roots)
2029 {
2030         struct rb_node *node;
2031         struct ulist_node *u;
2032
2033         if (roots->nnodes == 1)
2034                 return 1;
2035
2036         node = rb_first(&roots->root);
2037         u = rb_entry(node, struct ulist_node, rb_node);
2038         /*
2039          * current root id is not smallest, we skip it and let it be checked
2040          * in the fs or file tree who hash the smallest root id.
2041          */
2042         if (root->objectid != u->val)
2043                 return 0;
2044
2045         return 1;
2046 }
2047
2048 /*
2049  * for a tree node or leaf, we record its reference count, so later if we still
2050  * process this node or leaf, don't need to compute its reference count again.
2051  */
2052 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2053                              struct node_refs *nrefs, u64 level)
2054 {
2055         int check, ret;
2056         u64 refs;
2057         struct ulist *roots;
2058
2059         if (nrefs->bytenr[level] != bytenr) {
2060                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2061                                        level, 1, &refs, NULL);
2062                 if (ret < 0)
2063                         return ret;
2064
2065                 nrefs->bytenr[level] = bytenr;
2066                 nrefs->refs[level] = refs;
2067                 if (refs > 1) {
2068                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2069                                                    0, &roots);
2070                         if (ret)
2071                                 return -EIO;
2072
2073                         check = need_check(root, roots);
2074                         ulist_free(roots);
2075                         nrefs->need_check[level] = check;
2076                 } else {
2077                         nrefs->need_check[level] = 1;
2078                 }
2079         }
2080
2081         return 0;
2082 }
2083
2084 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2085                           struct walk_control *wc, int *level,
2086                           struct node_refs *nrefs)
2087 {
2088         enum btrfs_tree_block_status status;
2089         u64 bytenr;
2090         u64 ptr_gen;
2091         struct extent_buffer *next;
2092         struct extent_buffer *cur;
2093         u32 blocksize;
2094         int ret, err = 0;
2095         u64 refs;
2096
2097         WARN_ON(*level < 0);
2098         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2099
2100         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2101                 refs = nrefs->refs[*level];
2102                 ret = 0;
2103         } else {
2104                 ret = btrfs_lookup_extent_info(NULL, root,
2105                                        path->nodes[*level]->start,
2106                                        *level, 1, &refs, NULL);
2107                 if (ret < 0) {
2108                         err = ret;
2109                         goto out;
2110                 }
2111                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2112                 nrefs->refs[*level] = refs;
2113         }
2114
2115         if (refs > 1) {
2116                 ret = enter_shared_node(root, path->nodes[*level]->start,
2117                                         refs, wc, *level);
2118                 if (ret > 0) {
2119                         err = ret;
2120                         goto out;
2121                 }
2122         }
2123
2124         while (*level >= 0) {
2125                 WARN_ON(*level < 0);
2126                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2127                 cur = path->nodes[*level];
2128
2129                 if (btrfs_header_level(cur) != *level)
2130                         WARN_ON(1);
2131
2132                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2133                         break;
2134                 if (*level == 0) {
2135                         ret = process_one_leaf(root, cur, wc);
2136                         if (ret < 0)
2137                                 err = ret;
2138                         break;
2139                 }
2140                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2141                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2142                 blocksize = root->nodesize;
2143
2144                 if (bytenr == nrefs->bytenr[*level - 1]) {
2145                         refs = nrefs->refs[*level - 1];
2146                 } else {
2147                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2148                                         *level - 1, 1, &refs, NULL);
2149                         if (ret < 0) {
2150                                 refs = 0;
2151                         } else {
2152                                 nrefs->bytenr[*level - 1] = bytenr;
2153                                 nrefs->refs[*level - 1] = refs;
2154                         }
2155                 }
2156
2157                 if (refs > 1) {
2158                         ret = enter_shared_node(root, bytenr, refs,
2159                                                 wc, *level - 1);
2160                         if (ret > 0) {
2161                                 path->slots[*level]++;
2162                                 continue;
2163                         }
2164                 }
2165
2166                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2167                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2168                         free_extent_buffer(next);
2169                         reada_walk_down(root, cur, path->slots[*level]);
2170                         next = read_tree_block(root, bytenr, blocksize,
2171                                                ptr_gen);
2172                         if (!extent_buffer_uptodate(next)) {
2173                                 struct btrfs_key node_key;
2174
2175                                 btrfs_node_key_to_cpu(path->nodes[*level],
2176                                                       &node_key,
2177                                                       path->slots[*level]);
2178                                 btrfs_add_corrupt_extent_record(root->fs_info,
2179                                                 &node_key,
2180                                                 path->nodes[*level]->start,
2181                                                 root->nodesize, *level);
2182                                 err = -EIO;
2183                                 goto out;
2184                         }
2185                 }
2186
2187                 ret = check_child_node(root, cur, path->slots[*level], next);
2188                 if (ret) {
2189                         err = ret;
2190                         goto out;
2191                 }
2192
2193                 if (btrfs_is_leaf(next))
2194                         status = btrfs_check_leaf(root, NULL, next);
2195                 else
2196                         status = btrfs_check_node(root, NULL, next);
2197                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2198                         free_extent_buffer(next);
2199                         err = -EIO;
2200                         goto out;
2201                 }
2202
2203                 *level = *level - 1;
2204                 free_extent_buffer(path->nodes[*level]);
2205                 path->nodes[*level] = next;
2206                 path->slots[*level] = 0;
2207         }
2208 out:
2209         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2210         return err;
2211 }
2212
2213 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2214                             unsigned int ext_ref);
2215
2216 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2217                              int *level, struct node_refs *nrefs, int ext_ref)
2218 {
2219         enum btrfs_tree_block_status status;
2220         u64 bytenr;
2221         u64 ptr_gen;
2222         struct extent_buffer *next;
2223         struct extent_buffer *cur;
2224         u32 blocksize;
2225         int ret;
2226
2227         WARN_ON(*level < 0);
2228         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2229
2230         ret = update_nodes_refs(root, path->nodes[*level]->start,
2231                                 nrefs, *level);
2232         if (ret < 0)
2233                 return ret;
2234
2235         while (*level >= 0) {
2236                 WARN_ON(*level < 0);
2237                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2238                 cur = path->nodes[*level];
2239
2240                 if (btrfs_header_level(cur) != *level)
2241                         WARN_ON(1);
2242
2243                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2244                         break;
2245                 /* Don't forgot to check leaf/node validation */
2246                 if (*level == 0) {
2247                         ret = btrfs_check_leaf(root, NULL, cur);
2248                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2249                                 ret = -EIO;
2250                                 break;
2251                         }
2252                         ret = process_one_leaf_v2(root, path, nrefs,
2253                                                   level, ext_ref);
2254                         break;
2255                 } else {
2256                         ret = btrfs_check_node(root, NULL, cur);
2257                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2258                                 ret = -EIO;
2259                                 break;
2260                         }
2261                 }
2262                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2263                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2264                 blocksize = root->nodesize;
2265
2266                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2267                 if (ret)
2268                         break;
2269                 if (!nrefs->need_check[*level - 1]) {
2270                         path->slots[*level]++;
2271                         continue;
2272                 }
2273
2274                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2275                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2276                         free_extent_buffer(next);
2277                         reada_walk_down(root, cur, path->slots[*level]);
2278                         next = read_tree_block(root, bytenr, blocksize,
2279                                                ptr_gen);
2280                         if (!extent_buffer_uptodate(next)) {
2281                                 struct btrfs_key node_key;
2282
2283                                 btrfs_node_key_to_cpu(path->nodes[*level],
2284                                                       &node_key,
2285                                                       path->slots[*level]);
2286                                 btrfs_add_corrupt_extent_record(root->fs_info,
2287                                                 &node_key,
2288                                                 path->nodes[*level]->start,
2289                                                 root->nodesize, *level);
2290                                 ret = -EIO;
2291                                 break;
2292                         }
2293                 }
2294
2295                 ret = check_child_node(root, cur, path->slots[*level], next);
2296                 if (ret < 0) 
2297                         break;
2298
2299                 if (btrfs_is_leaf(next))
2300                         status = btrfs_check_leaf(root, NULL, next);
2301                 else
2302                         status = btrfs_check_node(root, NULL, next);
2303                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2304                         free_extent_buffer(next);
2305                         ret = -EIO;
2306                         break;
2307                 }
2308
2309                 *level = *level - 1;
2310                 free_extent_buffer(path->nodes[*level]);
2311                 path->nodes[*level] = next;
2312                 path->slots[*level] = 0;
2313         }
2314         return ret;
2315 }
2316
2317 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2318                         struct walk_control *wc, int *level)
2319 {
2320         int i;
2321         struct extent_buffer *leaf;
2322
2323         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2324                 leaf = path->nodes[i];
2325                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2326                         path->slots[i]++;
2327                         *level = i;
2328                         return 0;
2329                 } else {
2330                         free_extent_buffer(path->nodes[*level]);
2331                         path->nodes[*level] = NULL;
2332                         BUG_ON(*level > wc->active_node);
2333                         if (*level == wc->active_node)
2334                                 leave_shared_node(root, wc, *level);
2335                         *level = i + 1;
2336                 }
2337         }
2338         return 1;
2339 }
2340
2341 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2342                            int *level)
2343 {
2344         int i;
2345         struct extent_buffer *leaf;
2346
2347         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2348                 leaf = path->nodes[i];
2349                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2350                         path->slots[i]++;
2351                         *level = i;
2352                         return 0;
2353                 } else {
2354                         free_extent_buffer(path->nodes[*level]);
2355                         path->nodes[*level] = NULL;
2356                         *level = i + 1;
2357                 }
2358         }
2359         return 1;
2360 }
2361
2362 static int check_root_dir(struct inode_record *rec)
2363 {
2364         struct inode_backref *backref;
2365         int ret = -1;
2366
2367         if (!rec->found_inode_item || rec->errors)
2368                 goto out;
2369         if (rec->nlink != 1 || rec->found_link != 0)
2370                 goto out;
2371         if (list_empty(&rec->backrefs))
2372                 goto out;
2373         backref = to_inode_backref(rec->backrefs.next);
2374         if (!backref->found_inode_ref)
2375                 goto out;
2376         if (backref->index != 0 || backref->namelen != 2 ||
2377             memcmp(backref->name, "..", 2))
2378                 goto out;
2379         if (backref->found_dir_index || backref->found_dir_item)
2380                 goto out;
2381         ret = 0;
2382 out:
2383         return ret;
2384 }
2385
2386 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2387                               struct btrfs_root *root, struct btrfs_path *path,
2388                               struct inode_record *rec)
2389 {
2390         struct btrfs_inode_item *ei;
2391         struct btrfs_key key;
2392         int ret;
2393
2394         key.objectid = rec->ino;
2395         key.type = BTRFS_INODE_ITEM_KEY;
2396         key.offset = (u64)-1;
2397
2398         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2399         if (ret < 0)
2400                 goto out;
2401         if (ret) {
2402                 if (!path->slots[0]) {
2403                         ret = -ENOENT;
2404                         goto out;
2405                 }
2406                 path->slots[0]--;
2407                 ret = 0;
2408         }
2409         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2410         if (key.objectid != rec->ino) {
2411                 ret = -ENOENT;
2412                 goto out;
2413         }
2414
2415         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2416                             struct btrfs_inode_item);
2417         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2418         btrfs_mark_buffer_dirty(path->nodes[0]);
2419         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2420         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2421                root->root_key.objectid);
2422 out:
2423         btrfs_release_path(path);
2424         return ret;
2425 }
2426
2427 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2428                                     struct btrfs_root *root,
2429                                     struct btrfs_path *path,
2430                                     struct inode_record *rec)
2431 {
2432         int ret;
2433
2434         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2435         btrfs_release_path(path);
2436         if (!ret)
2437                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2438         return ret;
2439 }
2440
2441 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2442                                struct btrfs_root *root,
2443                                struct btrfs_path *path,
2444                                struct inode_record *rec)
2445 {
2446         struct btrfs_inode_item *ei;
2447         struct btrfs_key key;
2448         int ret = 0;
2449
2450         key.objectid = rec->ino;
2451         key.type = BTRFS_INODE_ITEM_KEY;
2452         key.offset = 0;
2453
2454         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2455         if (ret) {
2456                 if (ret > 0)
2457                         ret = -ENOENT;
2458                 goto out;
2459         }
2460
2461         /* Since ret == 0, no need to check anything */
2462         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2463                             struct btrfs_inode_item);
2464         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2465         btrfs_mark_buffer_dirty(path->nodes[0]);
2466         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2467         printf("reset nbytes for ino %llu root %llu\n",
2468                rec->ino, root->root_key.objectid);
2469 out:
2470         btrfs_release_path(path);
2471         return ret;
2472 }
2473
2474 static int add_missing_dir_index(struct btrfs_root *root,
2475                                  struct cache_tree *inode_cache,
2476                                  struct inode_record *rec,
2477                                  struct inode_backref *backref)
2478 {
2479         struct btrfs_path path;
2480         struct btrfs_trans_handle *trans;
2481         struct btrfs_dir_item *dir_item;
2482         struct extent_buffer *leaf;
2483         struct btrfs_key key;
2484         struct btrfs_disk_key disk_key;
2485         struct inode_record *dir_rec;
2486         unsigned long name_ptr;
2487         u32 data_size = sizeof(*dir_item) + backref->namelen;
2488         int ret;
2489
2490         trans = btrfs_start_transaction(root, 1);
2491         if (IS_ERR(trans))
2492                 return PTR_ERR(trans);
2493
2494         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2495                 (unsigned long long)rec->ino);
2496
2497         btrfs_init_path(&path);
2498         key.objectid = backref->dir;
2499         key.type = BTRFS_DIR_INDEX_KEY;
2500         key.offset = backref->index;
2501         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2502         BUG_ON(ret);
2503
2504         leaf = path.nodes[0];
2505         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2506
2507         disk_key.objectid = cpu_to_le64(rec->ino);
2508         disk_key.type = BTRFS_INODE_ITEM_KEY;
2509         disk_key.offset = 0;
2510
2511         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2512         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2513         btrfs_set_dir_data_len(leaf, dir_item, 0);
2514         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2515         name_ptr = (unsigned long)(dir_item + 1);
2516         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2517         btrfs_mark_buffer_dirty(leaf);
2518         btrfs_release_path(&path);
2519         btrfs_commit_transaction(trans, root);
2520
2521         backref->found_dir_index = 1;
2522         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2523         BUG_ON(IS_ERR(dir_rec));
2524         if (!dir_rec)
2525                 return 0;
2526         dir_rec->found_size += backref->namelen;
2527         if (dir_rec->found_size == dir_rec->isize &&
2528             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2529                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2530         if (dir_rec->found_size != dir_rec->isize)
2531                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2532
2533         return 0;
2534 }
2535
2536 static int delete_dir_index(struct btrfs_root *root,
2537                             struct cache_tree *inode_cache,
2538                             struct inode_record *rec,
2539                             struct inode_backref *backref)
2540 {
2541         struct btrfs_trans_handle *trans;
2542         struct btrfs_dir_item *di;
2543         struct btrfs_path path;
2544         int ret = 0;
2545
2546         trans = btrfs_start_transaction(root, 1);
2547         if (IS_ERR(trans))
2548                 return PTR_ERR(trans);
2549
2550         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2551                 (unsigned long long)backref->dir,
2552                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2553                 (unsigned long long)root->objectid);
2554
2555         btrfs_init_path(&path);
2556         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2557                                     backref->name, backref->namelen,
2558                                     backref->index, -1);
2559         if (IS_ERR(di)) {
2560                 ret = PTR_ERR(di);
2561                 btrfs_release_path(&path);
2562                 btrfs_commit_transaction(trans, root);
2563                 if (ret == -ENOENT)
2564                         return 0;
2565                 return ret;
2566         }
2567
2568         if (!di)
2569                 ret = btrfs_del_item(trans, root, &path);
2570         else
2571                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2572         BUG_ON(ret);
2573         btrfs_release_path(&path);
2574         btrfs_commit_transaction(trans, root);
2575         return ret;
2576 }
2577
2578 static int create_inode_item(struct btrfs_root *root,
2579                              struct inode_record *rec,
2580                              struct inode_backref *backref, int root_dir)
2581 {
2582         struct btrfs_trans_handle *trans;
2583         struct btrfs_inode_item inode_item;
2584         time_t now = time(NULL);
2585         int ret;
2586
2587         trans = btrfs_start_transaction(root, 1);
2588         if (IS_ERR(trans)) {
2589                 ret = PTR_ERR(trans);
2590                 return ret;
2591         }
2592
2593         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2594                 "be incomplete, please check permissions and content after "
2595                 "the fsck completes.\n", (unsigned long long)root->objectid,
2596                 (unsigned long long)rec->ino);
2597
2598         memset(&inode_item, 0, sizeof(inode_item));
2599         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2600         if (root_dir)
2601                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2602         else
2603                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2604         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2605         if (rec->found_dir_item) {
2606                 if (rec->found_file_extent)
2607                         fprintf(stderr, "root %llu inode %llu has both a dir "
2608                                 "item and extents, unsure if it is a dir or a "
2609                                 "regular file so setting it as a directory\n",
2610                                 (unsigned long long)root->objectid,
2611                                 (unsigned long long)rec->ino);
2612                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2613                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2614         } else if (!rec->found_dir_item) {
2615                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2616                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2617         }
2618         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2619         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2620         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2621         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2622         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2623         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2624         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2625         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2626
2627         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2628         BUG_ON(ret);
2629         btrfs_commit_transaction(trans, root);
2630         return 0;
2631 }
2632
2633 static int repair_inode_backrefs(struct btrfs_root *root,
2634                                  struct inode_record *rec,
2635                                  struct cache_tree *inode_cache,
2636                                  int delete)
2637 {
2638         struct inode_backref *tmp, *backref;
2639         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2640         int ret = 0;
2641         int repaired = 0;
2642
2643         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2644                 if (!delete && rec->ino == root_dirid) {
2645                         if (!rec->found_inode_item) {
2646                                 ret = create_inode_item(root, rec, backref, 1);
2647                                 if (ret)
2648                                         break;
2649                                 repaired++;
2650                         }
2651                 }
2652
2653                 /* Index 0 for root dir's are special, don't mess with it */
2654                 if (rec->ino == root_dirid && backref->index == 0)
2655                         continue;
2656
2657                 if (delete &&
2658                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2659                      (backref->found_dir_index && backref->found_inode_ref &&
2660                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2661                         ret = delete_dir_index(root, inode_cache, rec, backref);
2662                         if (ret)
2663                                 break;
2664                         repaired++;
2665                         list_del(&backref->list);
2666                         free(backref);
2667                 }
2668
2669                 if (!delete && !backref->found_dir_index &&
2670                     backref->found_dir_item && backref->found_inode_ref) {
2671                         ret = add_missing_dir_index(root, inode_cache, rec,
2672                                                     backref);
2673                         if (ret)
2674                                 break;
2675                         repaired++;
2676                         if (backref->found_dir_item &&
2677                             backref->found_dir_index &&
2678                             backref->found_dir_index) {
2679                                 if (!backref->errors &&
2680                                     backref->found_inode_ref) {
2681                                         list_del(&backref->list);
2682                                         free(backref);
2683                                 }
2684                         }
2685                 }
2686
2687                 if (!delete && (!backref->found_dir_index &&
2688                                 !backref->found_dir_item &&
2689                                 backref->found_inode_ref)) {
2690                         struct btrfs_trans_handle *trans;
2691                         struct btrfs_key location;
2692
2693                         ret = check_dir_conflict(root, backref->name,
2694                                                  backref->namelen,
2695                                                  backref->dir,
2696                                                  backref->index);
2697                         if (ret) {
2698                                 /*
2699                                  * let nlink fixing routine to handle it,
2700                                  * which can do it better.
2701                                  */
2702                                 ret = 0;
2703                                 break;
2704                         }
2705                         location.objectid = rec->ino;
2706                         location.type = BTRFS_INODE_ITEM_KEY;
2707                         location.offset = 0;
2708
2709                         trans = btrfs_start_transaction(root, 1);
2710                         if (IS_ERR(trans)) {
2711                                 ret = PTR_ERR(trans);
2712                                 break;
2713                         }
2714                         fprintf(stderr, "adding missing dir index/item pair "
2715                                 "for inode %llu\n",
2716                                 (unsigned long long)rec->ino);
2717                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2718                                                     backref->namelen,
2719                                                     backref->dir, &location,
2720                                                     imode_to_type(rec->imode),
2721                                                     backref->index);
2722                         BUG_ON(ret);
2723                         btrfs_commit_transaction(trans, root);
2724                         repaired++;
2725                 }
2726
2727                 if (!delete && (backref->found_inode_ref &&
2728                                 backref->found_dir_index &&
2729                                 backref->found_dir_item &&
2730                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2731                                 !rec->found_inode_item)) {
2732                         ret = create_inode_item(root, rec, backref, 0);
2733                         if (ret)
2734                                 break;
2735                         repaired++;
2736                 }
2737
2738         }
2739         return ret ? ret : repaired;
2740 }
2741
2742 /*
2743  * To determine the file type for nlink/inode_item repair
2744  *
2745  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2746  * Return -ENOENT if file type is not found.
2747  */
2748 static int find_file_type(struct inode_record *rec, u8 *type)
2749 {
2750         struct inode_backref *backref;
2751
2752         /* For inode item recovered case */
2753         if (rec->found_inode_item) {
2754                 *type = imode_to_type(rec->imode);
2755                 return 0;
2756         }
2757
2758         list_for_each_entry(backref, &rec->backrefs, list) {
2759                 if (backref->found_dir_index || backref->found_dir_item) {
2760                         *type = backref->filetype;
2761                         return 0;
2762                 }
2763         }
2764         return -ENOENT;
2765 }
2766
2767 /*
2768  * To determine the file name for nlink repair
2769  *
2770  * Return 0 if file name is found, set name and namelen.
2771  * Return -ENOENT if file name is not found.
2772  */
2773 static int find_file_name(struct inode_record *rec,
2774                           char *name, int *namelen)
2775 {
2776         struct inode_backref *backref;
2777
2778         list_for_each_entry(backref, &rec->backrefs, list) {
2779                 if (backref->found_dir_index || backref->found_dir_item ||
2780                     backref->found_inode_ref) {
2781                         memcpy(name, backref->name, backref->namelen);
2782                         *namelen = backref->namelen;
2783                         return 0;
2784                 }
2785         }
2786         return -ENOENT;
2787 }
2788
2789 /* Reset the nlink of the inode to the correct one */
2790 static int reset_nlink(struct btrfs_trans_handle *trans,
2791                        struct btrfs_root *root,
2792                        struct btrfs_path *path,
2793                        struct inode_record *rec)
2794 {
2795         struct inode_backref *backref;
2796         struct inode_backref *tmp;
2797         struct btrfs_key key;
2798         struct btrfs_inode_item *inode_item;
2799         int ret = 0;
2800
2801         /* We don't believe this either, reset it and iterate backref */
2802         rec->found_link = 0;
2803
2804         /* Remove all backref including the valid ones */
2805         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2806                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2807                                    backref->index, backref->name,
2808                                    backref->namelen, 0);
2809                 if (ret < 0)
2810                         goto out;
2811
2812                 /* remove invalid backref, so it won't be added back */
2813                 if (!(backref->found_dir_index &&
2814                       backref->found_dir_item &&
2815                       backref->found_inode_ref)) {
2816                         list_del(&backref->list);
2817                         free(backref);
2818                 } else {
2819                         rec->found_link++;
2820                 }
2821         }
2822
2823         /* Set nlink to 0 */
2824         key.objectid = rec->ino;
2825         key.type = BTRFS_INODE_ITEM_KEY;
2826         key.offset = 0;
2827         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2828         if (ret < 0)
2829                 goto out;
2830         if (ret > 0) {
2831                 ret = -ENOENT;
2832                 goto out;
2833         }
2834         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2835                                     struct btrfs_inode_item);
2836         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2837         btrfs_mark_buffer_dirty(path->nodes[0]);
2838         btrfs_release_path(path);
2839
2840         /*
2841          * Add back valid inode_ref/dir_item/dir_index,
2842          * add_link() will handle the nlink inc, so new nlink must be correct
2843          */
2844         list_for_each_entry(backref, &rec->backrefs, list) {
2845                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2846                                      backref->name, backref->namelen,
2847                                      backref->filetype, &backref->index, 1);
2848                 if (ret < 0)
2849                         goto out;
2850         }
2851 out:
2852         btrfs_release_path(path);
2853         return ret;
2854 }
2855
2856 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2857                                struct btrfs_root *root,
2858                                struct btrfs_path *path,
2859                                struct inode_record *rec)
2860 {
2861         char *dir_name = "lost+found";
2862         char namebuf[BTRFS_NAME_LEN] = {0};
2863         u64 lost_found_ino;
2864         u32 mode = 0700;
2865         u8 type = 0;
2866         int namelen = 0;
2867         int name_recovered = 0;
2868         int type_recovered = 0;
2869         int ret = 0;
2870
2871         /*
2872          * Get file name and type first before these invalid inode ref
2873          * are deleted by remove_all_invalid_backref()
2874          */
2875         name_recovered = !find_file_name(rec, namebuf, &namelen);
2876         type_recovered = !find_file_type(rec, &type);
2877
2878         if (!name_recovered) {
2879                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2880                        rec->ino, rec->ino);
2881                 namelen = count_digits(rec->ino);
2882                 sprintf(namebuf, "%llu", rec->ino);
2883                 name_recovered = 1;
2884         }
2885         if (!type_recovered) {
2886                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2887                        rec->ino);
2888                 type = BTRFS_FT_REG_FILE;
2889                 type_recovered = 1;
2890         }
2891
2892         ret = reset_nlink(trans, root, path, rec);
2893         if (ret < 0) {
2894                 fprintf(stderr,
2895                         "Failed to reset nlink for inode %llu: %s\n",
2896                         rec->ino, strerror(-ret));
2897                 goto out;
2898         }
2899
2900         if (rec->found_link == 0) {
2901                 lost_found_ino = root->highest_inode;
2902                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2903                         ret = -EOVERFLOW;
2904                         goto out;
2905                 }
2906                 lost_found_ino++;
2907                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2908                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2909                                   mode);
2910                 if (ret < 0) {
2911                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2912                                 dir_name, strerror(-ret));
2913                         goto out;
2914                 }
2915                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2916                                      namebuf, namelen, type, NULL, 1);
2917                 /*
2918                  * Add ".INO" suffix several times to handle case where
2919                  * "FILENAME.INO" is already taken by another file.
2920                  */
2921                 while (ret == -EEXIST) {
2922                         /*
2923                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2924                          */
2925                         if (namelen + count_digits(rec->ino) + 1 >
2926                             BTRFS_NAME_LEN) {
2927                                 ret = -EFBIG;
2928                                 goto out;
2929                         }
2930                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2931                                  ".%llu", rec->ino);
2932                         namelen += count_digits(rec->ino) + 1;
2933                         ret = btrfs_add_link(trans, root, rec->ino,
2934                                              lost_found_ino, namebuf,
2935                                              namelen, type, NULL, 1);
2936                 }
2937                 if (ret < 0) {
2938                         fprintf(stderr,
2939                                 "Failed to link the inode %llu to %s dir: %s\n",
2940                                 rec->ino, dir_name, strerror(-ret));
2941                         goto out;
2942                 }
2943                 /*
2944                  * Just increase the found_link, don't actually add the
2945                  * backref. This will make things easier and this inode
2946                  * record will be freed after the repair is done.
2947                  * So fsck will not report problem about this inode.
2948                  */
2949                 rec->found_link++;
2950                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2951                        namelen, namebuf, dir_name);
2952         }
2953         printf("Fixed the nlink of inode %llu\n", rec->ino);
2954 out:
2955         /*
2956          * Clear the flag anyway, or we will loop forever for the same inode
2957          * as it will not be removed from the bad inode list and the dead loop
2958          * happens.
2959          */
2960         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2961         btrfs_release_path(path);
2962         return ret;
2963 }
2964
2965 /*
2966  * Check if there is any normal(reg or prealloc) file extent for given
2967  * ino.
2968  * This is used to determine the file type when neither its dir_index/item or
2969  * inode_item exists.
2970  *
2971  * This will *NOT* report error, if any error happens, just consider it does
2972  * not have any normal file extent.
2973  */
2974 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2975 {
2976         struct btrfs_path path;
2977         struct btrfs_key key;
2978         struct btrfs_key found_key;
2979         struct btrfs_file_extent_item *fi;
2980         u8 type;
2981         int ret = 0;
2982
2983         btrfs_init_path(&path);
2984         key.objectid = ino;
2985         key.type = BTRFS_EXTENT_DATA_KEY;
2986         key.offset = 0;
2987
2988         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2989         if (ret < 0) {
2990                 ret = 0;
2991                 goto out;
2992         }
2993         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2994                 ret = btrfs_next_leaf(root, &path);
2995                 if (ret) {
2996                         ret = 0;
2997                         goto out;
2998                 }
2999         }
3000         while (1) {
3001                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3002                                       path.slots[0]);
3003                 if (found_key.objectid != ino ||
3004                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3005                         break;
3006                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3007                                     struct btrfs_file_extent_item);
3008                 type = btrfs_file_extent_type(path.nodes[0], fi);
3009                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3010                         ret = 1;
3011                         goto out;
3012                 }
3013         }
3014 out:
3015         btrfs_release_path(&path);
3016         return ret;
3017 }
3018
3019 static u32 btrfs_type_to_imode(u8 type)
3020 {
3021         static u32 imode_by_btrfs_type[] = {
3022                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3023                 [BTRFS_FT_DIR]          = S_IFDIR,
3024                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3025                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3026                 [BTRFS_FT_FIFO]         = S_IFIFO,
3027                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3028                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3029         };
3030
3031         return imode_by_btrfs_type[(type)];
3032 }
3033
3034 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3035                                 struct btrfs_root *root,
3036                                 struct btrfs_path *path,
3037                                 struct inode_record *rec)
3038 {
3039         u8 filetype;
3040         u32 mode = 0700;
3041         int type_recovered = 0;
3042         int ret = 0;
3043
3044         printf("Trying to rebuild inode:%llu\n", rec->ino);
3045
3046         type_recovered = !find_file_type(rec, &filetype);
3047
3048         /*
3049          * Try to determine inode type if type not found.
3050          *
3051          * For found regular file extent, it must be FILE.
3052          * For found dir_item/index, it must be DIR.
3053          *
3054          * For undetermined one, use FILE as fallback.
3055          *
3056          * TODO:
3057          * 1. If found backref(inode_index/item is already handled) to it,
3058          *    it must be DIR.
3059          *    Need new inode-inode ref structure to allow search for that.
3060          */
3061         if (!type_recovered) {
3062                 if (rec->found_file_extent &&
3063                     find_normal_file_extent(root, rec->ino)) {
3064                         type_recovered = 1;
3065                         filetype = BTRFS_FT_REG_FILE;
3066                 } else if (rec->found_dir_item) {
3067                         type_recovered = 1;
3068                         filetype = BTRFS_FT_DIR;
3069                 } else if (!list_empty(&rec->orphan_extents)) {
3070                         type_recovered = 1;
3071                         filetype = BTRFS_FT_REG_FILE;
3072                 } else{
3073                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3074                                rec->ino);
3075                         type_recovered = 1;
3076                         filetype = BTRFS_FT_REG_FILE;
3077                 }
3078         }
3079
3080         ret = btrfs_new_inode(trans, root, rec->ino,
3081                               mode | btrfs_type_to_imode(filetype));
3082         if (ret < 0)
3083                 goto out;
3084
3085         /*
3086          * Here inode rebuild is done, we only rebuild the inode item,
3087          * don't repair the nlink(like move to lost+found).
3088          * That is the job of nlink repair.
3089          *
3090          * We just fill the record and return
3091          */
3092         rec->found_dir_item = 1;
3093         rec->imode = mode | btrfs_type_to_imode(filetype);
3094         rec->nlink = 0;
3095         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3096         /* Ensure the inode_nlinks repair function will be called */
3097         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3098 out:
3099         return ret;
3100 }
3101
3102 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3103                                       struct btrfs_root *root,
3104                                       struct btrfs_path *path,
3105                                       struct inode_record *rec)
3106 {
3107         struct orphan_data_extent *orphan;
3108         struct orphan_data_extent *tmp;
3109         int ret = 0;
3110
3111         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3112                 /*
3113                  * Check for conflicting file extents
3114                  *
3115                  * Here we don't know whether the extents is compressed or not,
3116                  * so we can only assume it not compressed nor data offset,
3117                  * and use its disk_len as extent length.
3118                  */
3119                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3120                                        orphan->offset, orphan->disk_len, 0);
3121                 btrfs_release_path(path);
3122                 if (ret < 0)
3123                         goto out;
3124                 if (!ret) {
3125                         fprintf(stderr,
3126                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3127                                 orphan->disk_bytenr, orphan->disk_len);
3128                         ret = btrfs_free_extent(trans,
3129                                         root->fs_info->extent_root,
3130                                         orphan->disk_bytenr, orphan->disk_len,
3131                                         0, root->objectid, orphan->objectid,
3132                                         orphan->offset);
3133                         if (ret < 0)
3134                                 goto out;
3135                 }
3136                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3137                                 orphan->offset, orphan->disk_bytenr,
3138                                 orphan->disk_len, orphan->disk_len);
3139                 if (ret < 0)
3140                         goto out;
3141
3142                 /* Update file size info */
3143                 rec->found_size += orphan->disk_len;
3144                 if (rec->found_size == rec->nbytes)
3145                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3146
3147                 /* Update the file extent hole info too */
3148                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3149                                            orphan->disk_len);
3150                 if (ret < 0)
3151                         goto out;
3152                 if (RB_EMPTY_ROOT(&rec->holes))
3153                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3154
3155                 list_del(&orphan->list);
3156                 free(orphan);
3157         }
3158         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3159 out:
3160         return ret;
3161 }
3162
3163 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3164                                         struct btrfs_root *root,
3165                                         struct btrfs_path *path,
3166                                         struct inode_record *rec)
3167 {
3168         struct rb_node *node;
3169         struct file_extent_hole *hole;
3170         int found = 0;
3171         int ret = 0;
3172
3173         node = rb_first(&rec->holes);
3174
3175         while (node) {
3176                 found = 1;
3177                 hole = rb_entry(node, struct file_extent_hole, node);
3178                 ret = btrfs_punch_hole(trans, root, rec->ino,
3179                                        hole->start, hole->len);
3180                 if (ret < 0)
3181                         goto out;
3182                 ret = del_file_extent_hole(&rec->holes, hole->start,
3183                                            hole->len);
3184                 if (ret < 0)
3185                         goto out;
3186                 if (RB_EMPTY_ROOT(&rec->holes))
3187                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3188                 node = rb_first(&rec->holes);
3189         }
3190         /* special case for a file losing all its file extent */
3191         if (!found) {
3192                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3193                                        round_up(rec->isize, root->sectorsize));
3194                 if (ret < 0)
3195                         goto out;
3196         }
3197         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3198                rec->ino, root->objectid);
3199 out:
3200         return ret;
3201 }
3202
3203 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3204 {
3205         struct btrfs_trans_handle *trans;
3206         struct btrfs_path path;
3207         int ret = 0;
3208
3209         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3210                              I_ERR_NO_ORPHAN_ITEM |
3211                              I_ERR_LINK_COUNT_WRONG |
3212                              I_ERR_NO_INODE_ITEM |
3213                              I_ERR_FILE_EXTENT_ORPHAN |
3214                              I_ERR_FILE_EXTENT_DISCOUNT|
3215                              I_ERR_FILE_NBYTES_WRONG)))
3216                 return rec->errors;
3217
3218         /*
3219          * For nlink repair, it may create a dir and add link, so
3220          * 2 for parent(256)'s dir_index and dir_item
3221          * 2 for lost+found dir's inode_item and inode_ref
3222          * 1 for the new inode_ref of the file
3223          * 2 for lost+found dir's dir_index and dir_item for the file
3224          */
3225         trans = btrfs_start_transaction(root, 7);
3226         if (IS_ERR(trans))
3227                 return PTR_ERR(trans);
3228
3229         btrfs_init_path(&path);
3230         if (rec->errors & I_ERR_NO_INODE_ITEM)
3231                 ret = repair_inode_no_item(trans, root, &path, rec);
3232         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3233                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3234         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3235                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3236         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3237                 ret = repair_inode_isize(trans, root, &path, rec);
3238         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3239                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3240         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3241                 ret = repair_inode_nlinks(trans, root, &path, rec);
3242         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3243                 ret = repair_inode_nbytes(trans, root, &path, rec);
3244         btrfs_commit_transaction(trans, root);
3245         btrfs_release_path(&path);
3246         return ret;
3247 }
3248
3249 static int check_inode_recs(struct btrfs_root *root,
3250                             struct cache_tree *inode_cache)
3251 {
3252         struct cache_extent *cache;
3253         struct ptr_node *node;
3254         struct inode_record *rec;
3255         struct inode_backref *backref;
3256         int stage = 0;
3257         int ret = 0;
3258         int err = 0;
3259         u64 error = 0;
3260         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3261
3262         if (btrfs_root_refs(&root->root_item) == 0) {
3263                 if (!cache_tree_empty(inode_cache))
3264                         fprintf(stderr, "warning line %d\n", __LINE__);
3265                 return 0;
3266         }
3267
3268         /*
3269          * We need to record the highest inode number for later 'lost+found'
3270          * dir creation.
3271          * We must select an ino not used/referred by any existing inode, or
3272          * 'lost+found' ino may be a missing ino in a corrupted leaf,
3273          * this may cause 'lost+found' dir has wrong nlinks.
3274          */
3275         cache = last_cache_extent(inode_cache);
3276         if (cache) {
3277                 node = container_of(cache, struct ptr_node, cache);
3278                 rec = node->data;
3279                 if (rec->ino > root->highest_inode)
3280                         root->highest_inode = rec->ino;
3281         }
3282
3283         /*
3284          * We need to repair backrefs first because we could change some of the
3285          * errors in the inode recs.
3286          *
3287          * We also need to go through and delete invalid backrefs first and then
3288          * add the correct ones second.  We do this because we may get EEXIST
3289          * when adding back the correct index because we hadn't yet deleted the
3290          * invalid index.
3291          *
3292          * For example, if we were missing a dir index then the directories
3293          * isize would be wrong, so if we fixed the isize to what we thought it
3294          * would be and then fixed the backref we'd still have a invalid fs, so
3295          * we need to add back the dir index and then check to see if the isize
3296          * is still wrong.
3297          */
3298         while (stage < 3) {
3299                 stage++;
3300                 if (stage == 3 && !err)
3301                         break;
3302
3303                 cache = search_cache_extent(inode_cache, 0);
3304                 while (repair && cache) {
3305                         node = container_of(cache, struct ptr_node, cache);
3306                         rec = node->data;
3307                         cache = next_cache_extent(cache);
3308
3309                         /* Need to free everything up and rescan */
3310                         if (stage == 3) {
3311                                 remove_cache_extent(inode_cache, &node->cache);
3312                                 free(node);
3313                                 free_inode_rec(rec);
3314                                 continue;
3315                         }
3316
3317                         if (list_empty(&rec->backrefs))
3318                                 continue;
3319
3320                         ret = repair_inode_backrefs(root, rec, inode_cache,
3321                                                     stage == 1);
3322                         if (ret < 0) {
3323                                 err = ret;
3324                                 stage = 2;
3325                                 break;
3326                         } if (ret > 0) {
3327                                 err = -EAGAIN;
3328                         }
3329                 }
3330         }
3331         if (err)
3332                 return err;
3333
3334         rec = get_inode_rec(inode_cache, root_dirid, 0);
3335         BUG_ON(IS_ERR(rec));
3336         if (rec) {
3337                 ret = check_root_dir(rec);
3338                 if (ret) {
3339                         fprintf(stderr, "root %llu root dir %llu error\n",
3340                                 (unsigned long long)root->root_key.objectid,
3341                                 (unsigned long long)root_dirid);
3342                         print_inode_error(root, rec);
3343                         error++;
3344                 }
3345         } else {
3346                 if (repair) {
3347                         struct btrfs_trans_handle *trans;
3348
3349                         trans = btrfs_start_transaction(root, 1);
3350                         if (IS_ERR(trans)) {
3351                                 err = PTR_ERR(trans);
3352                                 return err;
3353                         }
3354
3355                         fprintf(stderr,
3356                                 "root %llu missing its root dir, recreating\n",
3357                                 (unsigned long long)root->objectid);
3358
3359                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3360                         BUG_ON(ret);
3361
3362                         btrfs_commit_transaction(trans, root);
3363                         return -EAGAIN;
3364                 }
3365
3366                 fprintf(stderr, "root %llu root dir %llu not found\n",
3367                         (unsigned long long)root->root_key.objectid,
3368                         (unsigned long long)root_dirid);
3369         }
3370
3371         while (1) {
3372                 cache = search_cache_extent(inode_cache, 0);
3373                 if (!cache)
3374                         break;
3375                 node = container_of(cache, struct ptr_node, cache);
3376                 rec = node->data;
3377                 remove_cache_extent(inode_cache, &node->cache);
3378                 free(node);
3379                 if (rec->ino == root_dirid ||
3380                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3381                         free_inode_rec(rec);
3382                         continue;
3383                 }
3384
3385                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3386                         ret = check_orphan_item(root, rec->ino);
3387                         if (ret == 0)
3388                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3389                         if (can_free_inode_rec(rec)) {
3390                                 free_inode_rec(rec);
3391                                 continue;
3392                         }
3393                 }
3394
3395                 if (!rec->found_inode_item)
3396                         rec->errors |= I_ERR_NO_INODE_ITEM;
3397                 if (rec->found_link != rec->nlink)
3398                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3399                 if (repair) {
3400                         ret = try_repair_inode(root, rec);
3401                         if (ret == 0 && can_free_inode_rec(rec)) {
3402                                 free_inode_rec(rec);
3403                                 continue;
3404                         }
3405                         ret = 0;
3406                 }
3407
3408                 if (!(repair && ret == 0))
3409                         error++;
3410                 print_inode_error(root, rec);
3411                 list_for_each_entry(backref, &rec->backrefs, list) {
3412                         if (!backref->found_dir_item)
3413                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3414                         if (!backref->found_dir_index)
3415                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3416                         if (!backref->found_inode_ref)
3417                                 backref->errors |= REF_ERR_NO_INODE_REF;
3418                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3419                                 " namelen %u name %s filetype %d errors %x",
3420                                 (unsigned long long)backref->dir,
3421                                 (unsigned long long)backref->index,
3422                                 backref->namelen, backref->name,
3423                                 backref->filetype, backref->errors);
3424                         print_ref_error(backref->errors);
3425                 }
3426                 free_inode_rec(rec);
3427         }
3428         return (error > 0) ? -1 : 0;
3429 }
3430
3431 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3432                                         u64 objectid)
3433 {
3434         struct cache_extent *cache;
3435         struct root_record *rec = NULL;
3436         int ret;
3437
3438         cache = lookup_cache_extent(root_cache, objectid, 1);
3439         if (cache) {
3440                 rec = container_of(cache, struct root_record, cache);
3441         } else {
3442                 rec = calloc(1, sizeof(*rec));
3443                 if (!rec)
3444                         return ERR_PTR(-ENOMEM);
3445                 rec->objectid = objectid;
3446                 INIT_LIST_HEAD(&rec->backrefs);
3447                 rec->cache.start = objectid;
3448                 rec->cache.size = 1;
3449
3450                 ret = insert_cache_extent(root_cache, &rec->cache);
3451                 if (ret)
3452                         return ERR_PTR(-EEXIST);
3453         }
3454         return rec;
3455 }
3456
3457 static struct root_backref *get_root_backref(struct root_record *rec,
3458                                              u64 ref_root, u64 dir, u64 index,
3459                                              const char *name, int namelen)
3460 {
3461         struct root_backref *backref;
3462
3463         list_for_each_entry(backref, &rec->backrefs, list) {
3464                 if (backref->ref_root != ref_root || backref->dir != dir ||
3465                     backref->namelen != namelen)
3466                         continue;
3467                 if (memcmp(name, backref->name, namelen))
3468                         continue;
3469                 return backref;
3470         }
3471
3472         backref = calloc(1, sizeof(*backref) + namelen + 1);
3473         if (!backref)
3474                 return NULL;
3475         backref->ref_root = ref_root;
3476         backref->dir = dir;
3477         backref->index = index;
3478         backref->namelen = namelen;
3479         memcpy(backref->name, name, namelen);
3480         backref->name[namelen] = '\0';
3481         list_add_tail(&backref->list, &rec->backrefs);
3482         return backref;
3483 }
3484
3485 static void free_root_record(struct cache_extent *cache)
3486 {
3487         struct root_record *rec;
3488         struct root_backref *backref;
3489
3490         rec = container_of(cache, struct root_record, cache);
3491         while (!list_empty(&rec->backrefs)) {
3492                 backref = to_root_backref(rec->backrefs.next);
3493                 list_del(&backref->list);
3494                 free(backref);
3495         }
3496
3497         free(rec);
3498 }
3499
3500 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3501
3502 static int add_root_backref(struct cache_tree *root_cache,
3503                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3504                             const char *name, int namelen,
3505                             int item_type, int errors)
3506 {
3507         struct root_record *rec;
3508         struct root_backref *backref;
3509
3510         rec = get_root_rec(root_cache, root_id);
3511         BUG_ON(IS_ERR(rec));
3512         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3513         BUG_ON(!backref);
3514
3515         backref->errors |= errors;
3516
3517         if (item_type != BTRFS_DIR_ITEM_KEY) {
3518                 if (backref->found_dir_index || backref->found_back_ref ||
3519                     backref->found_forward_ref) {
3520                         if (backref->index != index)
3521                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3522                 } else {
3523                         backref->index = index;
3524                 }
3525         }
3526
3527         if (item_type == BTRFS_DIR_ITEM_KEY) {
3528                 if (backref->found_forward_ref)
3529                         rec->found_ref++;
3530                 backref->found_dir_item = 1;
3531         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3532                 backref->found_dir_index = 1;
3533         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3534                 if (backref->found_forward_ref)
3535                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3536                 else if (backref->found_dir_item)
3537                         rec->found_ref++;
3538                 backref->found_forward_ref = 1;
3539         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3540                 if (backref->found_back_ref)
3541                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3542                 backref->found_back_ref = 1;
3543         } else {
3544                 BUG_ON(1);
3545         }
3546
3547         if (backref->found_forward_ref && backref->found_dir_item)
3548                 backref->reachable = 1;
3549         return 0;
3550 }
3551
3552 static int merge_root_recs(struct btrfs_root *root,
3553                            struct cache_tree *src_cache,
3554                            struct cache_tree *dst_cache)
3555 {
3556         struct cache_extent *cache;
3557         struct ptr_node *node;
3558         struct inode_record *rec;
3559         struct inode_backref *backref;
3560         int ret = 0;
3561
3562         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3563                 free_inode_recs_tree(src_cache);
3564                 return 0;
3565         }
3566
3567         while (1) {
3568                 cache = search_cache_extent(src_cache, 0);
3569                 if (!cache)
3570                         break;
3571                 node = container_of(cache, struct ptr_node, cache);
3572                 rec = node->data;
3573                 remove_cache_extent(src_cache, &node->cache);
3574                 free(node);
3575
3576                 ret = is_child_root(root, root->objectid, rec->ino);
3577                 if (ret < 0)
3578                         break;
3579                 else if (ret == 0)
3580                         goto skip;
3581
3582                 list_for_each_entry(backref, &rec->backrefs, list) {
3583                         BUG_ON(backref->found_inode_ref);
3584                         if (backref->found_dir_item)
3585                                 add_root_backref(dst_cache, rec->ino,
3586                                         root->root_key.objectid, backref->dir,
3587                                         backref->index, backref->name,
3588                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3589                                         backref->errors);
3590                         if (backref->found_dir_index)
3591                                 add_root_backref(dst_cache, rec->ino,
3592                                         root->root_key.objectid, backref->dir,
3593                                         backref->index, backref->name,
3594                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3595                                         backref->errors);
3596                 }
3597 skip:
3598                 free_inode_rec(rec);
3599         }
3600         if (ret < 0)
3601                 return ret;
3602         return 0;
3603 }
3604
3605 static int check_root_refs(struct btrfs_root *root,
3606                            struct cache_tree *root_cache)
3607 {
3608         struct root_record *rec;
3609         struct root_record *ref_root;
3610         struct root_backref *backref;
3611         struct cache_extent *cache;
3612         int loop = 1;
3613         int ret;
3614         int error;
3615         int errors = 0;
3616
3617         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3618         BUG_ON(IS_ERR(rec));
3619         rec->found_ref = 1;
3620
3621         /* fixme: this can not detect circular references */
3622         while (loop) {
3623                 loop = 0;
3624                 cache = search_cache_extent(root_cache, 0);
3625                 while (1) {
3626                         if (!cache)
3627                                 break;
3628                         rec = container_of(cache, struct root_record, cache);
3629                         cache = next_cache_extent(cache);
3630
3631                         if (rec->found_ref == 0)
3632                                 continue;
3633
3634                         list_for_each_entry(backref, &rec->backrefs, list) {
3635                                 if (!backref->reachable)
3636                                         continue;
3637
3638                                 ref_root = get_root_rec(root_cache,
3639                                                         backref->ref_root);
3640                                 BUG_ON(IS_ERR(ref_root));
3641                                 if (ref_root->found_ref > 0)
3642                                         continue;
3643
3644                                 backref->reachable = 0;
3645                                 rec->found_ref--;
3646                                 if (rec->found_ref == 0)
3647                                         loop = 1;
3648                         }
3649                 }
3650         }
3651
3652         cache = search_cache_extent(root_cache, 0);
3653         while (1) {
3654                 if (!cache)
3655                         break;
3656                 rec = container_of(cache, struct root_record, cache);
3657                 cache = next_cache_extent(cache);
3658
3659                 if (rec->found_ref == 0 &&
3660                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3661                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3662                         ret = check_orphan_item(root->fs_info->tree_root,
3663                                                 rec->objectid);
3664                         if (ret == 0)
3665                                 continue;
3666
3667                         /*
3668                          * If we don't have a root item then we likely just have
3669                          * a dir item in a snapshot for this root but no actual
3670                          * ref key or anything so it's meaningless.
3671                          */
3672                         if (!rec->found_root_item)
3673                                 continue;
3674                         errors++;
3675                         fprintf(stderr, "fs tree %llu not referenced\n",
3676                                 (unsigned long long)rec->objectid);
3677                 }
3678
3679                 error = 0;
3680                 if (rec->found_ref > 0 && !rec->found_root_item)
3681                         error = 1;
3682                 list_for_each_entry(backref, &rec->backrefs, list) {
3683                         if (!backref->found_dir_item)
3684                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3685                         if (!backref->found_dir_index)
3686                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3687                         if (!backref->found_back_ref)
3688                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3689                         if (!backref->found_forward_ref)
3690                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3691                         if (backref->reachable && backref->errors)
3692                                 error = 1;
3693                 }
3694                 if (!error)
3695                         continue;
3696
3697                 errors++;
3698                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3699                         (unsigned long long)rec->objectid, rec->found_ref,
3700                          rec->found_root_item ? "" : "not found");
3701
3702                 list_for_each_entry(backref, &rec->backrefs, list) {
3703                         if (!backref->reachable)
3704                                 continue;
3705                         if (!backref->errors && rec->found_root_item)
3706                                 continue;
3707                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3708                                 " index %llu namelen %u name %s errors %x\n",
3709                                 (unsigned long long)backref->ref_root,
3710                                 (unsigned long long)backref->dir,
3711                                 (unsigned long long)backref->index,
3712                                 backref->namelen, backref->name,
3713                                 backref->errors);
3714                         print_ref_error(backref->errors);
3715                 }
3716         }
3717         return errors > 0 ? 1 : 0;
3718 }
3719
3720 static int process_root_ref(struct extent_buffer *eb, int slot,
3721                             struct btrfs_key *key,
3722                             struct cache_tree *root_cache)
3723 {
3724         u64 dirid;
3725         u64 index;
3726         u32 len;
3727         u32 name_len;
3728         struct btrfs_root_ref *ref;
3729         char namebuf[BTRFS_NAME_LEN];
3730         int error;
3731
3732         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3733
3734         dirid = btrfs_root_ref_dirid(eb, ref);
3735         index = btrfs_root_ref_sequence(eb, ref);
3736         name_len = btrfs_root_ref_name_len(eb, ref);
3737
3738         if (name_len <= BTRFS_NAME_LEN) {
3739                 len = name_len;
3740                 error = 0;
3741         } else {
3742                 len = BTRFS_NAME_LEN;
3743                 error = REF_ERR_NAME_TOO_LONG;
3744         }
3745         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3746
3747         if (key->type == BTRFS_ROOT_REF_KEY) {
3748                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3749                                  index, namebuf, len, key->type, error);
3750         } else {
3751                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3752                                  index, namebuf, len, key->type, error);
3753         }
3754         return 0;
3755 }
3756
3757 static void free_corrupt_block(struct cache_extent *cache)
3758 {
3759         struct btrfs_corrupt_block *corrupt;
3760
3761         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3762         free(corrupt);
3763 }
3764
3765 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3766
3767 /*
3768  * Repair the btree of the given root.
3769  *
3770  * The fix is to remove the node key in corrupt_blocks cache_tree.
3771  * and rebalance the tree.
3772  * After the fix, the btree should be writeable.
3773  */
3774 static int repair_btree(struct btrfs_root *root,
3775                         struct cache_tree *corrupt_blocks)
3776 {
3777         struct btrfs_trans_handle *trans;
3778         struct btrfs_path path;
3779         struct btrfs_corrupt_block *corrupt;
3780         struct cache_extent *cache;
3781         struct btrfs_key key;
3782         u64 offset;
3783         int level;
3784         int ret = 0;
3785
3786         if (cache_tree_empty(corrupt_blocks))
3787                 return 0;
3788
3789         trans = btrfs_start_transaction(root, 1);
3790         if (IS_ERR(trans)) {
3791                 ret = PTR_ERR(trans);
3792                 fprintf(stderr, "Error starting transaction: %s\n",
3793                         strerror(-ret));
3794                 return ret;
3795         }
3796         btrfs_init_path(&path);
3797         cache = first_cache_extent(corrupt_blocks);
3798         while (cache) {
3799                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3800                                        cache);
3801                 level = corrupt->level;
3802                 path.lowest_level = level;
3803                 key.objectid = corrupt->key.objectid;
3804                 key.type = corrupt->key.type;
3805                 key.offset = corrupt->key.offset;
3806
3807                 /*
3808                  * Here we don't want to do any tree balance, since it may
3809                  * cause a balance with corrupted brother leaf/node,
3810                  * so ins_len set to 0 here.
3811                  * Balance will be done after all corrupt node/leaf is deleted.
3812                  */
3813                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3814                 if (ret < 0)
3815                         goto out;
3816                 offset = btrfs_node_blockptr(path.nodes[level],
3817                                              path.slots[level]);
3818
3819                 /* Remove the ptr */
3820                 ret = btrfs_del_ptr(trans, root, &path, level,
3821                                     path.slots[level]);
3822                 if (ret < 0)
3823                         goto out;
3824                 /*
3825                  * Remove the corresponding extent
3826                  * return value is not concerned.
3827                  */
3828                 btrfs_release_path(&path);
3829                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3830                                         0, root->root_key.objectid,
3831                                         level - 1, 0);
3832                 cache = next_cache_extent(cache);
3833         }
3834
3835         /* Balance the btree using btrfs_search_slot() */
3836         cache = first_cache_extent(corrupt_blocks);
3837         while (cache) {
3838                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3839                                        cache);
3840                 memcpy(&key, &corrupt->key, sizeof(key));
3841                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3842                 if (ret < 0)
3843                         goto out;
3844                 /* return will always >0 since it won't find the item */
3845                 ret = 0;
3846                 btrfs_release_path(&path);
3847                 cache = next_cache_extent(cache);
3848         }
3849 out:
3850         btrfs_commit_transaction(trans, root);
3851         btrfs_release_path(&path);
3852         return ret;
3853 }
3854
3855 static int check_fs_root(struct btrfs_root *root,
3856                          struct cache_tree *root_cache,
3857                          struct walk_control *wc)
3858 {
3859         int ret = 0;
3860         int err = 0;
3861         int wret;
3862         int level;
3863         struct btrfs_path path;
3864         struct shared_node root_node;
3865         struct root_record *rec;
3866         struct btrfs_root_item *root_item = &root->root_item;
3867         struct cache_tree corrupt_blocks;
3868         struct orphan_data_extent *orphan;
3869         struct orphan_data_extent *tmp;
3870         enum btrfs_tree_block_status status;
3871         struct node_refs nrefs;
3872
3873         /*
3874          * Reuse the corrupt_block cache tree to record corrupted tree block
3875          *
3876          * Unlike the usage in extent tree check, here we do it in a per
3877          * fs/subvol tree base.
3878          */
3879         cache_tree_init(&corrupt_blocks);
3880         root->fs_info->corrupt_blocks = &corrupt_blocks;
3881
3882         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3883                 rec = get_root_rec(root_cache, root->root_key.objectid);
3884                 BUG_ON(IS_ERR(rec));
3885                 if (btrfs_root_refs(root_item) > 0)
3886                         rec->found_root_item = 1;
3887         }
3888
3889         btrfs_init_path(&path);
3890         memset(&root_node, 0, sizeof(root_node));
3891         cache_tree_init(&root_node.root_cache);
3892         cache_tree_init(&root_node.inode_cache);
3893         memset(&nrefs, 0, sizeof(nrefs));
3894
3895         /* Move the orphan extent record to corresponding inode_record */
3896         list_for_each_entry_safe(orphan, tmp,
3897                                  &root->orphan_data_extents, list) {
3898                 struct inode_record *inode;
3899
3900                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3901                                       1);
3902                 BUG_ON(IS_ERR(inode));
3903                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3904                 list_move(&orphan->list, &inode->orphan_extents);
3905         }
3906
3907         level = btrfs_header_level(root->node);
3908         memset(wc->nodes, 0, sizeof(wc->nodes));
3909         wc->nodes[level] = &root_node;
3910         wc->active_node = level;
3911         wc->root_level = level;
3912
3913         /* We may not have checked the root block, lets do that now */
3914         if (btrfs_is_leaf(root->node))
3915                 status = btrfs_check_leaf(root, NULL, root->node);
3916         else
3917                 status = btrfs_check_node(root, NULL, root->node);
3918         if (status != BTRFS_TREE_BLOCK_CLEAN)
3919                 return -EIO;
3920
3921         if (btrfs_root_refs(root_item) > 0 ||
3922             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3923                 path.nodes[level] = root->node;
3924                 extent_buffer_get(root->node);
3925                 path.slots[level] = 0;
3926         } else {
3927                 struct btrfs_key key;
3928                 struct btrfs_disk_key found_key;
3929
3930                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3931                 level = root_item->drop_level;
3932                 path.lowest_level = level;
3933                 if (level > btrfs_header_level(root->node) ||
3934                     level >= BTRFS_MAX_LEVEL) {
3935                         error("ignoring invalid drop level: %u", level);
3936                         goto skip_walking;
3937                 }
3938                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3939                 if (wret < 0)
3940                         goto skip_walking;
3941                 btrfs_node_key(path.nodes[level], &found_key,
3942                                 path.slots[level]);
3943                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3944                                         sizeof(found_key)));
3945         }
3946
3947         while (1) {
3948                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3949                 if (wret < 0)
3950                         ret = wret;
3951                 if (wret != 0)
3952                         break;
3953
3954                 wret = walk_up_tree(root, &path, wc, &level);
3955                 if (wret < 0)
3956                         ret = wret;
3957                 if (wret != 0)
3958                         break;
3959         }
3960 skip_walking:
3961         btrfs_release_path(&path);
3962
3963         if (!cache_tree_empty(&corrupt_blocks)) {
3964                 struct cache_extent *cache;
3965                 struct btrfs_corrupt_block *corrupt;
3966
3967                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3968                        root->root_key.objectid);
3969                 cache = first_cache_extent(&corrupt_blocks);
3970                 while (cache) {
3971                         corrupt = container_of(cache,
3972                                                struct btrfs_corrupt_block,
3973                                                cache);
3974                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3975                                cache->start, corrupt->level,
3976                                corrupt->key.objectid, corrupt->key.type,
3977                                corrupt->key.offset);
3978                         cache = next_cache_extent(cache);
3979                 }
3980                 if (repair) {
3981                         printf("Try to repair the btree for root %llu\n",
3982                                root->root_key.objectid);
3983                         ret = repair_btree(root, &corrupt_blocks);
3984                         if (ret < 0)
3985                                 fprintf(stderr, "Failed to repair btree: %s\n",
3986                                         strerror(-ret));
3987                         if (!ret)
3988                                 printf("Btree for root %llu is fixed\n",
3989                                        root->root_key.objectid);
3990                 }
3991         }
3992
3993         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3994         if (err < 0)
3995                 ret = err;
3996
3997         if (root_node.current) {
3998                 root_node.current->checked = 1;
3999                 maybe_free_inode_rec(&root_node.inode_cache,
4000                                 root_node.current);
4001         }
4002
4003         err = check_inode_recs(root, &root_node.inode_cache);
4004         if (!ret)
4005                 ret = err;
4006
4007         free_corrupt_blocks_tree(&corrupt_blocks);
4008         root->fs_info->corrupt_blocks = NULL;
4009         free_orphan_data_extents(&root->orphan_data_extents);
4010         return ret;
4011 }
4012
4013 static int fs_root_objectid(u64 objectid)
4014 {
4015         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4016             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4017                 return 1;
4018         return is_fstree(objectid);
4019 }
4020
4021 static int check_fs_roots(struct btrfs_root *root,
4022                           struct cache_tree *root_cache)
4023 {
4024         struct btrfs_path path;
4025         struct btrfs_key key;
4026         struct walk_control wc;
4027         struct extent_buffer *leaf, *tree_node;
4028         struct btrfs_root *tmp_root;
4029         struct btrfs_root *tree_root = root->fs_info->tree_root;
4030         int ret;
4031         int err = 0;
4032
4033         if (ctx.progress_enabled) {
4034                 ctx.tp = TASK_FS_ROOTS;
4035                 task_start(ctx.info);
4036         }
4037
4038         /*
4039          * Just in case we made any changes to the extent tree that weren't
4040          * reflected into the free space cache yet.
4041          */
4042         if (repair)
4043                 reset_cached_block_groups(root->fs_info);
4044         memset(&wc, 0, sizeof(wc));
4045         cache_tree_init(&wc.shared);
4046         btrfs_init_path(&path);
4047
4048 again:
4049         key.offset = 0;
4050         key.objectid = 0;
4051         key.type = BTRFS_ROOT_ITEM_KEY;
4052         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4053         if (ret < 0) {
4054                 err = 1;
4055                 goto out;
4056         }
4057         tree_node = tree_root->node;
4058         while (1) {
4059                 if (tree_node != tree_root->node) {
4060                         free_root_recs_tree(root_cache);
4061                         btrfs_release_path(&path);
4062                         goto again;
4063                 }
4064                 leaf = path.nodes[0];
4065                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4066                         ret = btrfs_next_leaf(tree_root, &path);
4067                         if (ret) {
4068                                 if (ret < 0)
4069                                         err = 1;
4070                                 break;
4071                         }
4072                         leaf = path.nodes[0];
4073                 }
4074                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4075                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4076                     fs_root_objectid(key.objectid)) {
4077                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4078                                 tmp_root = btrfs_read_fs_root_no_cache(
4079                                                 root->fs_info, &key);
4080                         } else {
4081                                 key.offset = (u64)-1;
4082                                 tmp_root = btrfs_read_fs_root(
4083                                                 root->fs_info, &key);
4084                         }
4085                         if (IS_ERR(tmp_root)) {
4086                                 err = 1;
4087                                 goto next;
4088                         }
4089                         ret = check_fs_root(tmp_root, root_cache, &wc);
4090                         if (ret == -EAGAIN) {
4091                                 free_root_recs_tree(root_cache);
4092                                 btrfs_release_path(&path);
4093                                 goto again;
4094                         }
4095                         if (ret)
4096                                 err = 1;
4097                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4098                                 btrfs_free_fs_root(tmp_root);
4099                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4100                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4101                         process_root_ref(leaf, path.slots[0], &key,
4102                                          root_cache);
4103                 }
4104 next:
4105                 path.slots[0]++;
4106         }
4107 out:
4108         btrfs_release_path(&path);
4109         if (err)
4110                 free_extent_cache_tree(&wc.shared);
4111         if (!cache_tree_empty(&wc.shared))
4112                 fprintf(stderr, "warning line %d\n", __LINE__);
4113
4114         task_stop(ctx.info);
4115
4116         return err;
4117 }
4118
4119 /*
4120  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4121  * INODE_REF/INODE_EXTREF match.
4122  *
4123  * @root:       the root of the fs/file tree
4124  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4125  * @key:        the key of the DIR_ITEM/DIR_INDEX
4126  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4127  *              distinguish root_dir between normal dir/file
4128  * @name:       the name in the INODE_REF/INODE_EXTREF
4129  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4130  * @mode:       the st_mode of INODE_ITEM
4131  *
4132  * Return 0 if no error occurred.
4133  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4134  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4135  * dir/file.
4136  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4137  * not match for normal dir/file.
4138  */
4139 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4140                          struct btrfs_key *key, u64 index, char *name,
4141                          u32 namelen, u32 mode)
4142 {
4143         struct btrfs_path path;
4144         struct extent_buffer *node;
4145         struct btrfs_dir_item *di;
4146         struct btrfs_key location;
4147         char namebuf[BTRFS_NAME_LEN] = {0};
4148         u32 total;
4149         u32 cur = 0;
4150         u32 len;
4151         u32 name_len;
4152         u32 data_len;
4153         u8 filetype;
4154         int slot;
4155         int ret;
4156
4157         btrfs_init_path(&path);
4158         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4159         if (ret < 0) {
4160                 ret = DIR_ITEM_MISSING;
4161                 goto out;
4162         }
4163
4164         /* Process root dir and goto out*/
4165         if (index == 0) {
4166                 if (ret == 0) {
4167                         ret = ROOT_DIR_ERROR;
4168                         error(
4169                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4170                                 root->objectid,
4171                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4172                                         "REF" : "EXTREF",
4173                                 ref_key->objectid, ref_key->offset,
4174                                 key->type == BTRFS_DIR_ITEM_KEY ?
4175                                         "DIR_ITEM" : "DIR_INDEX");
4176                 } else {
4177                         ret = 0;
4178                 }
4179
4180                 goto out;
4181         }
4182
4183         /* Process normal file/dir */
4184         if (ret > 0) {
4185                 ret = DIR_ITEM_MISSING;
4186                 error(
4187                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4188                         root->objectid,
4189                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4190                         ref_key->objectid, ref_key->offset,
4191                         key->type == BTRFS_DIR_ITEM_KEY ?
4192                                 "DIR_ITEM" : "DIR_INDEX",
4193                         key->objectid, key->offset, namelen, name,
4194                         imode_to_type(mode));
4195                 goto out;
4196         }
4197
4198         /* Check whether inode_id/filetype/name match */
4199         node = path.nodes[0];
4200         slot = path.slots[0];
4201         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4202         total = btrfs_item_size_nr(node, slot);
4203         while (cur < total) {
4204                 ret = DIR_ITEM_MISMATCH;
4205                 name_len = btrfs_dir_name_len(node, di);
4206                 data_len = btrfs_dir_data_len(node, di);
4207
4208                 btrfs_dir_item_key_to_cpu(node, di, &location);
4209                 if (location.objectid != ref_key->objectid ||
4210                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4211                     location.offset != 0)
4212                         goto next;
4213
4214                 filetype = btrfs_dir_type(node, di);
4215                 if (imode_to_type(mode) != filetype)
4216                         goto next;
4217
4218                 if (name_len <= BTRFS_NAME_LEN) {
4219                         len = name_len;
4220                 } else {
4221                         len = BTRFS_NAME_LEN;
4222                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4223                         root->objectid,
4224                         key->type == BTRFS_DIR_ITEM_KEY ?
4225                         "DIR_ITEM" : "DIR_INDEX",
4226                         key->objectid, key->offset, name_len);
4227                 }
4228                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4229                 if (len != namelen || strncmp(namebuf, name, len))
4230                         goto next;
4231
4232                 ret = 0;
4233                 goto out;
4234 next:
4235                 len = sizeof(*di) + name_len + data_len;
4236                 di = (struct btrfs_dir_item *)((char *)di + len);
4237                 cur += len;
4238         }
4239         if (ret == DIR_ITEM_MISMATCH)
4240                 error(
4241                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4242                         root->objectid,
4243                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4244                         ref_key->objectid, ref_key->offset,
4245                         key->type == BTRFS_DIR_ITEM_KEY ?
4246                                 "DIR_ITEM" : "DIR_INDEX",
4247                         key->objectid, key->offset, namelen, name,
4248                         imode_to_type(mode));
4249 out:
4250         btrfs_release_path(&path);
4251         return ret;
4252 }
4253
4254 /*
4255  * Traverse the given INODE_REF and call find_dir_item() to find related
4256  * DIR_ITEM/DIR_INDEX.
4257  *
4258  * @root:       the root of the fs/file tree
4259  * @ref_key:    the key of the INODE_REF
4260  * @refs:       the count of INODE_REF
4261  * @mode:       the st_mode of INODE_ITEM
4262  *
4263  * Return 0 if no error occurred.
4264  */
4265 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4266                            struct extent_buffer *node, int slot, u64 *refs,
4267                            int mode)
4268 {
4269         struct btrfs_key key;
4270         struct btrfs_inode_ref *ref;
4271         char namebuf[BTRFS_NAME_LEN] = {0};
4272         u32 total;
4273         u32 cur = 0;
4274         u32 len;
4275         u32 name_len;
4276         u64 index;
4277         int ret, err = 0;
4278
4279         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4280         total = btrfs_item_size_nr(node, slot);
4281
4282 next:
4283         /* Update inode ref count */
4284         (*refs)++;
4285
4286         index = btrfs_inode_ref_index(node, ref);
4287         name_len = btrfs_inode_ref_name_len(node, ref);
4288         if (name_len <= BTRFS_NAME_LEN) {
4289                 len = name_len;
4290         } else {
4291                 len = BTRFS_NAME_LEN;
4292                 warning("root %llu INODE_REF[%llu %llu] name too long",
4293                         root->objectid, ref_key->objectid, ref_key->offset);
4294         }
4295
4296         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4297
4298         /* Check root dir ref name */
4299         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4300                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4301                       root->objectid, ref_key->objectid, ref_key->offset,
4302                       namebuf);
4303                 err |= ROOT_DIR_ERROR;
4304         }
4305
4306         /* Find related DIR_INDEX */
4307         key.objectid = ref_key->offset;
4308         key.type = BTRFS_DIR_INDEX_KEY;
4309         key.offset = index;
4310         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4311         err |= ret;
4312
4313         /* Find related dir_item */
4314         key.objectid = ref_key->offset;
4315         key.type = BTRFS_DIR_ITEM_KEY;
4316         key.offset = btrfs_name_hash(namebuf, len);
4317         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4318         err |= ret;
4319
4320         len = sizeof(*ref) + name_len;
4321         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4322         cur += len;
4323         if (cur < total)
4324                 goto next;
4325
4326         return err;
4327 }
4328
4329 /*
4330  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4331  * DIR_ITEM/DIR_INDEX.
4332  *
4333  * @root:       the root of the fs/file tree
4334  * @ref_key:    the key of the INODE_EXTREF
4335  * @refs:       the count of INODE_EXTREF
4336  * @mode:       the st_mode of INODE_ITEM
4337  *
4338  * Return 0 if no error occurred.
4339  */
4340 static int check_inode_extref(struct btrfs_root *root,
4341                               struct btrfs_key *ref_key,
4342                               struct extent_buffer *node, int slot, u64 *refs,
4343                               int mode)
4344 {
4345         struct btrfs_key key;
4346         struct btrfs_inode_extref *extref;
4347         char namebuf[BTRFS_NAME_LEN] = {0};
4348         u32 total;
4349         u32 cur = 0;
4350         u32 len;
4351         u32 name_len;
4352         u64 index;
4353         u64 parent;
4354         int ret;
4355         int err = 0;
4356
4357         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4358         total = btrfs_item_size_nr(node, slot);
4359
4360 next:
4361         /* update inode ref count */
4362         (*refs)++;
4363         name_len = btrfs_inode_extref_name_len(node, extref);
4364         index = btrfs_inode_extref_index(node, extref);
4365         parent = btrfs_inode_extref_parent(node, extref);
4366         if (name_len <= BTRFS_NAME_LEN) {
4367                 len = name_len;
4368         } else {
4369                 len = BTRFS_NAME_LEN;
4370                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4371                         root->objectid, ref_key->objectid, ref_key->offset);
4372         }
4373         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4374
4375         /* Check root dir ref name */
4376         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4377                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4378                       root->objectid, ref_key->objectid, ref_key->offset,
4379                       namebuf);
4380                 err |= ROOT_DIR_ERROR;
4381         }
4382
4383         /* find related dir_index */
4384         key.objectid = parent;
4385         key.type = BTRFS_DIR_INDEX_KEY;
4386         key.offset = index;
4387         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4388         err |= ret;
4389
4390         /* find related dir_item */
4391         key.objectid = parent;
4392         key.type = BTRFS_DIR_ITEM_KEY;
4393         key.offset = btrfs_name_hash(namebuf, len);
4394         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4395         err |= ret;
4396
4397         len = sizeof(*extref) + name_len;
4398         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4399         cur += len;
4400
4401         if (cur < total)
4402                 goto next;
4403
4404         return err;
4405 }
4406
4407 /*
4408  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4409  * DIR_ITEM/DIR_INDEX match.
4410  *
4411  * @root:       the root of the fs/file tree
4412  * @key:        the key of the INODE_REF/INODE_EXTREF
4413  * @name:       the name in the INODE_REF/INODE_EXTREF
4414  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4415  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4416  * to (u64)-1
4417  * @ext_ref:    the EXTENDED_IREF feature
4418  *
4419  * Return 0 if no error occurred.
4420  * Return >0 for error bitmap
4421  */
4422 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4423                           char *name, int namelen, u64 index,
4424                           unsigned int ext_ref)
4425 {
4426         struct btrfs_path path;
4427         struct btrfs_inode_ref *ref;
4428         struct btrfs_inode_extref *extref;
4429         struct extent_buffer *node;
4430         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4431         u32 total;
4432         u32 cur = 0;
4433         u32 len;
4434         u32 ref_namelen;
4435         u64 ref_index;
4436         u64 parent;
4437         u64 dir_id;
4438         int slot;
4439         int ret;
4440
4441         btrfs_init_path(&path);
4442         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4443         if (ret) {
4444                 ret = INODE_REF_MISSING;
4445                 goto extref;
4446         }
4447
4448         node = path.nodes[0];
4449         slot = path.slots[0];
4450
4451         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4452         total = btrfs_item_size_nr(node, slot);
4453
4454         /* Iterate all entry of INODE_REF */
4455         while (cur < total) {
4456                 ret = INODE_REF_MISSING;
4457
4458                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4459                 ref_index = btrfs_inode_ref_index(node, ref);
4460                 if (index != (u64)-1 && index != ref_index)
4461                         goto next_ref;
4462
4463                 if (ref_namelen <= BTRFS_NAME_LEN) {
4464                         len = ref_namelen;
4465                 } else {
4466                         len = BTRFS_NAME_LEN;
4467                         warning("root %llu INODE %s[%llu %llu] name too long",
4468                                 root->objectid,
4469                                 key->type == BTRFS_INODE_REF_KEY ?
4470                                         "REF" : "EXTREF",
4471                                 key->objectid, key->offset);
4472                 }
4473                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4474                                    len);
4475
4476                 if (len != namelen || strncmp(ref_namebuf, name, len))
4477                         goto next_ref;
4478
4479                 ret = 0;
4480                 goto out;
4481 next_ref:
4482                 len = sizeof(*ref) + ref_namelen;
4483                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4484                 cur += len;
4485         }
4486
4487 extref:
4488         /* Skip if not support EXTENDED_IREF feature */
4489         if (!ext_ref)
4490                 goto out;
4491
4492         btrfs_release_path(&path);
4493         btrfs_init_path(&path);
4494
4495         dir_id = key->offset;
4496         key->type = BTRFS_INODE_EXTREF_KEY;
4497         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4498
4499         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4500         if (ret) {
4501                 ret = INODE_REF_MISSING;
4502                 goto out;
4503         }
4504
4505         node = path.nodes[0];
4506         slot = path.slots[0];
4507
4508         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4509         cur = 0;
4510         total = btrfs_item_size_nr(node, slot);
4511
4512         /* Iterate all entry of INODE_EXTREF */
4513         while (cur < total) {
4514                 ret = INODE_REF_MISSING;
4515
4516                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4517                 ref_index = btrfs_inode_extref_index(node, extref);
4518                 parent = btrfs_inode_extref_parent(node, extref);
4519                 if (index != (u64)-1 && index != ref_index)
4520                         goto next_extref;
4521
4522                 if (parent != dir_id)
4523                         goto next_extref;
4524
4525                 if (ref_namelen <= BTRFS_NAME_LEN) {
4526                         len = ref_namelen;
4527                 } else {
4528                         len = BTRFS_NAME_LEN;
4529                         warning("root %llu INODE %s[%llu %llu] name too long",
4530                                 root->objectid,
4531                                 key->type == BTRFS_INODE_REF_KEY ?
4532                                         "REF" : "EXTREF",
4533                                 key->objectid, key->offset);
4534                 }
4535                 read_extent_buffer(node, ref_namebuf,
4536                                    (unsigned long)(extref + 1), len);
4537
4538                 if (len != namelen || strncmp(ref_namebuf, name, len))
4539                         goto next_extref;
4540
4541                 ret = 0;
4542                 goto out;
4543
4544 next_extref:
4545                 len = sizeof(*extref) + ref_namelen;
4546                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4547                 cur += len;
4548
4549         }
4550 out:
4551         btrfs_release_path(&path);
4552         return ret;
4553 }
4554
4555 /*
4556  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4557  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4558  *
4559  * @root:       the root of the fs/file tree
4560  * @key:        the key of the INODE_REF/INODE_EXTREF
4561  * @size:       the st_size of the INODE_ITEM
4562  * @ext_ref:    the EXTENDED_IREF feature
4563  *
4564  * Return 0 if no error occurred.
4565  */
4566 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4567                           struct extent_buffer *node, int slot, u64 *size,
4568                           unsigned int ext_ref)
4569 {
4570         struct btrfs_dir_item *di;
4571         struct btrfs_inode_item *ii;
4572         struct btrfs_path path;
4573         struct btrfs_key location;
4574         char namebuf[BTRFS_NAME_LEN] = {0};
4575         u32 total;
4576         u32 cur = 0;
4577         u32 len;
4578         u32 name_len;
4579         u32 data_len;
4580         u8 filetype;
4581         u32 mode;
4582         u64 index;
4583         int ret;
4584         int err = 0;
4585
4586         /*
4587          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4588          * ignore index check.
4589          */
4590         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4591
4592         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4593         total = btrfs_item_size_nr(node, slot);
4594
4595         while (cur < total) {
4596                 data_len = btrfs_dir_data_len(node, di);
4597                 if (data_len)
4598                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4599                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4600                               "DIR_ITEM" : "DIR_INDEX",
4601                               key->objectid, key->offset, data_len);
4602
4603                 name_len = btrfs_dir_name_len(node, di);
4604                 if (name_len <= BTRFS_NAME_LEN) {
4605                         len = name_len;
4606                 } else {
4607                         len = BTRFS_NAME_LEN;
4608                         warning("root %llu %s[%llu %llu] name too long",
4609                                 root->objectid,
4610                                 key->type == BTRFS_DIR_ITEM_KEY ?
4611                                 "DIR_ITEM" : "DIR_INDEX",
4612                                 key->objectid, key->offset);
4613                 }
4614                 (*size) += name_len;
4615
4616                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4617                 filetype = btrfs_dir_type(node, di);
4618
4619                 btrfs_init_path(&path);
4620                 btrfs_dir_item_key_to_cpu(node, di, &location);
4621
4622                 /* Ignore related ROOT_ITEM check */
4623                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4624                         goto next;
4625
4626                 /* Check relative INODE_ITEM(existence/filetype) */
4627                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4628                 if (ret) {
4629                         err |= INODE_ITEM_MISSING;
4630                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4631                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4632                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4633                               key->offset, location.objectid, name_len,
4634                               namebuf, filetype);
4635                         goto next;
4636                 }
4637
4638                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4639                                     struct btrfs_inode_item);
4640                 mode = btrfs_inode_mode(path.nodes[0], ii);
4641
4642                 if (imode_to_type(mode) != filetype) {
4643                         err |= INODE_ITEM_MISMATCH;
4644                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4645                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4646                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4647                               key->offset, name_len, namebuf, filetype);
4648                 }
4649
4650                 /* Check relative INODE_REF/INODE_EXTREF */
4651                 location.type = BTRFS_INODE_REF_KEY;
4652                 location.offset = key->objectid;
4653                 ret = find_inode_ref(root, &location, namebuf, len,
4654                                        index, ext_ref);
4655                 err |= ret;
4656                 if (ret & INODE_REF_MISSING)
4657                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4658                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4659                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4660                               key->offset, name_len, namebuf, filetype);
4661
4662 next:
4663                 btrfs_release_path(&path);
4664                 len = sizeof(*di) + name_len + data_len;
4665                 di = (struct btrfs_dir_item *)((char *)di + len);
4666                 cur += len;
4667
4668                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4669                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4670                               root->objectid, key->objectid, key->offset);
4671                         break;
4672                 }
4673         }
4674
4675         return err;
4676 }
4677
4678 /*
4679  * Check file extent datasum/hole, update the size of the file extents,
4680  * check and update the last offset of the file extent.
4681  *
4682  * @root:       the root of fs/file tree.
4683  * @fkey:       the key of the file extent.
4684  * @nodatasum:  INODE_NODATASUM feature.
4685  * @size:       the sum of all EXTENT_DATA items size for this inode.
4686  * @end:        the offset of the last extent.
4687  *
4688  * Return 0 if no error occurred.
4689  */
4690 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4691                              struct extent_buffer *node, int slot,
4692                              unsigned int nodatasum, u64 *size, u64 *end)
4693 {
4694         struct btrfs_file_extent_item *fi;
4695         u64 disk_bytenr;
4696         u64 disk_num_bytes;
4697         u64 extent_num_bytes;
4698         u64 found;
4699         unsigned int extent_type;
4700         unsigned int is_hole;
4701         int ret;
4702         int err = 0;
4703
4704         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4705
4706         extent_type = btrfs_file_extent_type(node, fi);
4707         /* Skip if file extent is inline */
4708         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4709                 struct btrfs_item *e = btrfs_item_nr(slot);
4710                 u32 item_inline_len;
4711
4712                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4713                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4714                 if (extent_num_bytes == 0 ||
4715                     extent_num_bytes != item_inline_len)
4716                         err |= FILE_EXTENT_ERROR;
4717                 *size += extent_num_bytes;
4718                 return err;
4719         }
4720
4721         /* Check extent type */
4722         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4723                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4724                 err |= FILE_EXTENT_ERROR;
4725                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4726                       root->objectid, fkey->objectid, fkey->offset);
4727                 return err;
4728         }
4729
4730         /* Check REG_EXTENT/PREALLOC_EXTENT */
4731         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4732         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4733         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4734         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4735
4736         /* Check EXTENT_DATA datasum */
4737         ret = count_csum_range(root, disk_bytenr, disk_num_bytes, &found);
4738         if (found > 0 && nodatasum) {
4739                 err |= ODD_CSUM_ITEM;
4740                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4741                       root->objectid, fkey->objectid, fkey->offset);
4742         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4743                    !is_hole &&
4744                    (ret < 0 || found == 0 || found < disk_num_bytes)) {
4745                 err |= CSUM_ITEM_MISSING;
4746                 error("root %llu EXTENT_DATA[%llu %llu] datasum missing",
4747                       root->objectid, fkey->objectid, fkey->offset);
4748         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && found > 0) {
4749                 err |= ODD_CSUM_ITEM;
4750                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have datasum",
4751                       root->objectid, fkey->objectid, fkey->offset);
4752         }
4753
4754         /* Check EXTENT_DATA hole */
4755         if (no_holes && is_hole) {
4756                 err |= FILE_EXTENT_ERROR;
4757                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4758                       root->objectid, fkey->objectid, fkey->offset);
4759         } else if (!no_holes && *end != fkey->offset) {
4760                 err |= FILE_EXTENT_ERROR;
4761                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4762                       root->objectid, fkey->objectid, fkey->offset);
4763         }
4764
4765         *end += extent_num_bytes;
4766         if (!is_hole)
4767                 *size += extent_num_bytes;
4768
4769         return err;
4770 }
4771
4772 /*
4773  * Check INODE_ITEM and related ITEMs (the same inode number)
4774  * 1. check link count
4775  * 2. check inode ref/extref
4776  * 3. check dir item/index
4777  *
4778  * @ext_ref:    the EXTENDED_IREF feature
4779  *
4780  * Return 0 if no error occurred.
4781  * Return >0 for error or hit the traversal is done(by error bitmap)
4782  */
4783 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4784                             unsigned int ext_ref)
4785 {
4786         struct extent_buffer *node;
4787         struct btrfs_inode_item *ii;
4788         struct btrfs_key key;
4789         u64 inode_id;
4790         u32 mode;
4791         u64 nlink;
4792         u64 nbytes;
4793         u64 isize;
4794         u64 size = 0;
4795         u64 refs = 0;
4796         u64 extent_end = 0;
4797         u64 extent_size = 0;
4798         unsigned int dir;
4799         unsigned int nodatasum;
4800         int slot;
4801         int ret;
4802         int err = 0;
4803
4804         node = path->nodes[0];
4805         slot = path->slots[0];
4806
4807         btrfs_item_key_to_cpu(node, &key, slot);
4808         inode_id = key.objectid;
4809
4810         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4811                 ret = btrfs_next_item(root, path);
4812                 if (ret > 0)
4813                         err |= LAST_ITEM;
4814                 return err;
4815         }
4816
4817         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4818         isize = btrfs_inode_size(node, ii);
4819         nbytes = btrfs_inode_nbytes(node, ii);
4820         mode = btrfs_inode_mode(node, ii);
4821         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4822         nlink = btrfs_inode_nlink(node, ii);
4823         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4824
4825         while (1) {
4826                 ret = btrfs_next_item(root, path);
4827                 if (ret < 0) {
4828                         /* out will fill 'err' rusing current statistics */
4829                         goto out;
4830                 } else if (ret > 0) {
4831                         err |= LAST_ITEM;
4832                         goto out;
4833                 }
4834
4835                 node = path->nodes[0];
4836                 slot = path->slots[0];
4837                 btrfs_item_key_to_cpu(node, &key, slot);
4838                 if (key.objectid != inode_id)
4839                         goto out;
4840
4841                 switch (key.type) {
4842                 case BTRFS_INODE_REF_KEY:
4843                         ret = check_inode_ref(root, &key, node, slot, &refs,
4844                                               mode);
4845                         err |= ret;
4846                         break;
4847                 case BTRFS_INODE_EXTREF_KEY:
4848                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4849                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4850                                         root->objectid, key.objectid,
4851                                         key.offset);
4852                         ret = check_inode_extref(root, &key, node, slot, &refs,
4853                                                  mode);
4854                         err |= ret;
4855                         break;
4856                 case BTRFS_DIR_ITEM_KEY:
4857                 case BTRFS_DIR_INDEX_KEY:
4858                         if (!dir) {
4859                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4860                                         root->objectid, inode_id,
4861                                         imode_to_type(mode), key.objectid,
4862                                         key.offset);
4863                         }
4864                         ret = check_dir_item(root, &key, node, slot, &size,
4865                                              ext_ref);
4866                         err |= ret;
4867                         break;
4868                 case BTRFS_EXTENT_DATA_KEY:
4869                         if (dir) {
4870                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4871                                         root->objectid, inode_id, key.objectid,
4872                                         key.offset);
4873                         }
4874                         ret = check_file_extent(root, &key, node, slot,
4875                                                 nodatasum, &extent_size,
4876                                                 &extent_end);
4877                         err |= ret;
4878                         break;
4879                 case BTRFS_XATTR_ITEM_KEY:
4880                         break;
4881                 default:
4882                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4883                               key.objectid, key.type, key.offset);
4884                 }
4885         }
4886
4887 out:
4888         /* verify INODE_ITEM nlink/isize/nbytes */
4889         if (dir) {
4890                 if (nlink != 1) {
4891                         err |= LINK_COUNT_ERROR;
4892                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4893                               root->objectid, inode_id, nlink);
4894                 }
4895
4896                 /*
4897                  * Just a warning, as dir inode nbytes is just an
4898                  * instructive value.
4899                  */
4900                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4901                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4902                                 root->objectid, inode_id, root->nodesize);
4903                 }
4904
4905                 if (isize != size) {
4906                         err |= ISIZE_ERROR;
4907                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4908                               root->objectid, inode_id, isize, size);
4909                 }
4910         } else {
4911                 if (nlink != refs) {
4912                         err |= LINK_COUNT_ERROR;
4913                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4914                               root->objectid, inode_id, nlink, refs);
4915                 } else if (!nlink) {
4916                         err |= ORPHAN_ITEM;
4917                 }
4918
4919                 if (!nbytes && !no_holes && extent_end < isize) {
4920                         err |= NBYTES_ERROR;
4921                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4922                               root->objectid, inode_id, isize);
4923                 }
4924
4925                 if (nbytes != extent_size) {
4926                         err |= NBYTES_ERROR;
4927                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4928                               root->objectid, inode_id, nbytes, extent_size);
4929                 }
4930         }
4931
4932         return err;
4933 }
4934
4935 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4936 {
4937         struct btrfs_path path;
4938         struct btrfs_key key;
4939         int err = 0;
4940         int ret;
4941
4942         btrfs_init_path(&path);
4943         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4944         key.type = BTRFS_INODE_ITEM_KEY;
4945         key.offset = 0;
4946
4947         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4948         if (ret < 0)
4949                 goto out;
4950         if (ret > 0) {
4951                 ret = 0;
4952                 err |= INODE_ITEM_MISSING;
4953         }
4954
4955         err |= check_inode_item(root, &path, ext_ref);
4956         err &= ~LAST_ITEM;
4957         if (err && !ret)
4958                 ret = -EIO;
4959 out:
4960         btrfs_release_path(&path);
4961         return ret;
4962 }
4963
4964 /*
4965  * Iterate all item on the tree and call check_inode_item() to check.
4966  *
4967  * @root:       the root of the tree to be checked.
4968  * @ext_ref:    the EXTENDED_IREF feature
4969  *
4970  * Return 0 if no error found.
4971  * Return <0 for error.
4972  */
4973 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
4974 {
4975         struct btrfs_path *path;
4976         struct node_refs nrefs;
4977         struct btrfs_root_item *root_item = &root->root_item;
4978         int ret, wret;
4979         int level;
4980
4981         /*
4982          * We need to manually check the first inode item(256)
4983          * As the following traversal function will only start from
4984          * the first inode item in the leaf, if inode item(256) is missing
4985          * we will just skip it forever.
4986          */
4987         ret = check_fs_first_inode(root, ext_ref);
4988         if (ret < 0)
4989                 return ret;
4990
4991         path = btrfs_alloc_path();
4992         if (!path)
4993                 return -ENOMEM;
4994
4995         memset(&nrefs, 0, sizeof(nrefs));
4996         level = btrfs_header_level(root->node);
4997
4998         if (btrfs_root_refs(root_item) > 0 ||
4999             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5000                 path->nodes[level] = root->node;
5001                 path->slots[level] = 0;
5002                 extent_buffer_get(root->node);
5003         } else {
5004                 struct btrfs_key key;
5005
5006                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5007                 level = root_item->drop_level;
5008                 path->lowest_level = level;
5009                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5010                 if (ret < 0)
5011                         goto out;
5012                 ret = 0;
5013         }
5014
5015         while (1) {
5016                 wret = walk_down_tree_v2(root, path, &level, &nrefs, ext_ref);
5017                 if (wret < 0)
5018                         ret = wret;
5019                 if (wret != 0)
5020                         break;
5021
5022                 wret = walk_up_tree_v2(root, path, &level);
5023                 if (wret < 0)
5024                         ret = wret;
5025                 if (wret != 0)
5026                         break;
5027         }
5028
5029 out:
5030         btrfs_free_path(path);
5031         return ret;
5032 }
5033
5034 /*
5035  * Find the relative ref for root_ref and root_backref.
5036  *
5037  * @root:       the root of the root tree.
5038  * @ref_key:    the key of the root ref.
5039  *
5040  * Return 0 if no error occurred.
5041  */
5042 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5043                           struct extent_buffer *node, int slot)
5044 {
5045         struct btrfs_path path;
5046         struct btrfs_key key;
5047         struct btrfs_root_ref *ref;
5048         struct btrfs_root_ref *backref;
5049         char ref_name[BTRFS_NAME_LEN] = {0};
5050         char backref_name[BTRFS_NAME_LEN] = {0};
5051         u64 ref_dirid;
5052         u64 ref_seq;
5053         u32 ref_namelen;
5054         u64 backref_dirid;
5055         u64 backref_seq;
5056         u32 backref_namelen;
5057         u32 len;
5058         int ret;
5059         int err = 0;
5060
5061         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5062         ref_dirid = btrfs_root_ref_dirid(node, ref);
5063         ref_seq = btrfs_root_ref_sequence(node, ref);
5064         ref_namelen = btrfs_root_ref_name_len(node, ref);
5065
5066         if (ref_namelen <= BTRFS_NAME_LEN) {
5067                 len = ref_namelen;
5068         } else {
5069                 len = BTRFS_NAME_LEN;
5070                 warning("%s[%llu %llu] ref_name too long",
5071                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5072                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5073                         ref_key->offset);
5074         }
5075         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5076
5077         /* Find relative root_ref */
5078         key.objectid = ref_key->offset;
5079         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5080         key.offset = ref_key->objectid;
5081
5082         btrfs_init_path(&path);
5083         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5084         if (ret) {
5085                 err |= ROOT_REF_MISSING;
5086                 error("%s[%llu %llu] couldn't find relative ref",
5087                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5088                       "ROOT_REF" : "ROOT_BACKREF",
5089                       ref_key->objectid, ref_key->offset);
5090                 goto out;
5091         }
5092
5093         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5094                                  struct btrfs_root_ref);
5095         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5096         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5097         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5098
5099         if (backref_namelen <= BTRFS_NAME_LEN) {
5100                 len = backref_namelen;
5101         } else {
5102                 len = BTRFS_NAME_LEN;
5103                 warning("%s[%llu %llu] ref_name too long",
5104                         key.type == BTRFS_ROOT_REF_KEY ?
5105                         "ROOT_REF" : "ROOT_BACKREF",
5106                         key.objectid, key.offset);
5107         }
5108         read_extent_buffer(path.nodes[0], backref_name,
5109                            (unsigned long)(backref + 1), len);
5110
5111         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5112             ref_namelen != backref_namelen ||
5113             strncmp(ref_name, backref_name, len)) {
5114                 err |= ROOT_REF_MISMATCH;
5115                 error("%s[%llu %llu] mismatch relative ref",
5116                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5117                       "ROOT_REF" : "ROOT_BACKREF",
5118                       ref_key->objectid, ref_key->offset);
5119         }
5120 out:
5121         btrfs_release_path(&path);
5122         return err;
5123 }
5124
5125 /*
5126  * Check all fs/file tree in low_memory mode.
5127  *
5128  * 1. for fs tree root item, call check_fs_root_v2()
5129  * 2. for fs tree root ref/backref, call check_root_ref()
5130  *
5131  * Return 0 if no error occurred.
5132  */
5133 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5134 {
5135         struct btrfs_root *tree_root = fs_info->tree_root;
5136         struct btrfs_root *cur_root = NULL;
5137         struct btrfs_path *path;
5138         struct btrfs_key key;
5139         struct extent_buffer *node;
5140         unsigned int ext_ref;
5141         int slot;
5142         int ret;
5143         int err = 0;
5144
5145         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5146
5147         path = btrfs_alloc_path();
5148         if (!path)
5149                 return -ENOMEM;
5150
5151         key.objectid = BTRFS_FS_TREE_OBJECTID;
5152         key.offset = 0;
5153         key.type = BTRFS_ROOT_ITEM_KEY;
5154
5155         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
5156         if (ret < 0) {
5157                 err = ret;
5158                 goto out;
5159         } else if (ret > 0) {
5160                 err = -ENOENT;
5161                 goto out;
5162         }
5163
5164         while (1) {
5165                 node = path->nodes[0];
5166                 slot = path->slots[0];
5167                 btrfs_item_key_to_cpu(node, &key, slot);
5168                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5169                         goto out;
5170                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5171                     fs_root_objectid(key.objectid)) {
5172                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5173                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5174                                                                        &key);
5175                         } else {
5176                                 key.offset = (u64)-1;
5177                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5178                         }
5179
5180                         if (IS_ERR(cur_root)) {
5181                                 error("Fail to read fs/subvol tree: %lld",
5182                                       key.objectid);
5183                                 err = -EIO;
5184                                 goto next;
5185                         }
5186
5187                         ret = check_fs_root_v2(cur_root, ext_ref);
5188                         err |= ret;
5189
5190                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5191                                 btrfs_free_fs_root(cur_root);
5192                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5193                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5194                         ret = check_root_ref(tree_root, &key, node, slot);
5195                         err |= ret;
5196                 }
5197 next:
5198                 ret = btrfs_next_item(tree_root, path);
5199                 if (ret > 0)
5200                         goto out;
5201                 if (ret < 0) {
5202                         err = ret;
5203                         goto out;
5204                 }
5205         }
5206
5207 out:
5208         btrfs_free_path(path);
5209         return err;
5210 }
5211
5212 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5213 {
5214         struct list_head *cur = rec->backrefs.next;
5215         struct extent_backref *back;
5216         struct tree_backref *tback;
5217         struct data_backref *dback;
5218         u64 found = 0;
5219         int err = 0;
5220
5221         while(cur != &rec->backrefs) {
5222                 back = to_extent_backref(cur);
5223                 cur = cur->next;
5224                 if (!back->found_extent_tree) {
5225                         err = 1;
5226                         if (!print_errs)
5227                                 goto out;
5228                         if (back->is_data) {
5229                                 dback = to_data_backref(back);
5230                                 fprintf(stderr, "Backref %llu %s %llu"
5231                                         " owner %llu offset %llu num_refs %lu"
5232                                         " not found in extent tree\n",
5233                                         (unsigned long long)rec->start,
5234                                         back->full_backref ?
5235                                         "parent" : "root",
5236                                         back->full_backref ?
5237                                         (unsigned long long)dback->parent:
5238                                         (unsigned long long)dback->root,
5239                                         (unsigned long long)dback->owner,
5240                                         (unsigned long long)dback->offset,
5241                                         (unsigned long)dback->num_refs);
5242                         } else {
5243                                 tback = to_tree_backref(back);
5244                                 fprintf(stderr, "Backref %llu parent %llu"
5245                                         " root %llu not found in extent tree\n",
5246                                         (unsigned long long)rec->start,
5247                                         (unsigned long long)tback->parent,
5248                                         (unsigned long long)tback->root);
5249                         }
5250                 }
5251                 if (!back->is_data && !back->found_ref) {
5252                         err = 1;
5253                         if (!print_errs)
5254                                 goto out;
5255                         tback = to_tree_backref(back);
5256                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5257                                 (unsigned long long)rec->start,
5258                                 back->full_backref ? "parent" : "root",
5259                                 back->full_backref ?
5260                                 (unsigned long long)tback->parent :
5261                                 (unsigned long long)tback->root, back);
5262                 }
5263                 if (back->is_data) {
5264                         dback = to_data_backref(back);
5265                         if (dback->found_ref != dback->num_refs) {
5266                                 err = 1;
5267                                 if (!print_errs)
5268                                         goto out;
5269                                 fprintf(stderr, "Incorrect local backref count"
5270                                         " on %llu %s %llu owner %llu"
5271                                         " offset %llu found %u wanted %u back %p\n",
5272                                         (unsigned long long)rec->start,
5273                                         back->full_backref ?
5274                                         "parent" : "root",
5275                                         back->full_backref ?
5276                                         (unsigned long long)dback->parent:
5277                                         (unsigned long long)dback->root,
5278                                         (unsigned long long)dback->owner,
5279                                         (unsigned long long)dback->offset,
5280                                         dback->found_ref, dback->num_refs, back);
5281                         }
5282                         if (dback->disk_bytenr != rec->start) {
5283                                 err = 1;
5284                                 if (!print_errs)
5285                                         goto out;
5286                                 fprintf(stderr, "Backref disk bytenr does not"
5287                                         " match extent record, bytenr=%llu, "
5288                                         "ref bytenr=%llu\n",
5289                                         (unsigned long long)rec->start,
5290                                         (unsigned long long)dback->disk_bytenr);
5291                         }
5292
5293                         if (dback->bytes != rec->nr) {
5294                                 err = 1;
5295                                 if (!print_errs)
5296                                         goto out;
5297                                 fprintf(stderr, "Backref bytes do not match "
5298                                         "extent backref, bytenr=%llu, ref "
5299                                         "bytes=%llu, backref bytes=%llu\n",
5300                                         (unsigned long long)rec->start,
5301                                         (unsigned long long)rec->nr,
5302                                         (unsigned long long)dback->bytes);
5303                         }
5304                 }
5305                 if (!back->is_data) {
5306                         found += 1;
5307                 } else {
5308                         dback = to_data_backref(back);
5309                         found += dback->found_ref;
5310                 }
5311         }
5312         if (found != rec->refs) {
5313                 err = 1;
5314                 if (!print_errs)
5315                         goto out;
5316                 fprintf(stderr, "Incorrect global backref count "
5317                         "on %llu found %llu wanted %llu\n",
5318                         (unsigned long long)rec->start,
5319                         (unsigned long long)found,
5320                         (unsigned long long)rec->refs);
5321         }
5322 out:
5323         return err;
5324 }
5325
5326 static int free_all_extent_backrefs(struct extent_record *rec)
5327 {
5328         struct extent_backref *back;
5329         struct list_head *cur;
5330         while (!list_empty(&rec->backrefs)) {
5331                 cur = rec->backrefs.next;
5332                 back = to_extent_backref(cur);
5333                 list_del(cur);
5334                 free(back);
5335         }
5336         return 0;
5337 }
5338
5339 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
5340                                      struct cache_tree *extent_cache)
5341 {
5342         struct cache_extent *cache;
5343         struct extent_record *rec;
5344
5345         while (1) {
5346                 cache = first_cache_extent(extent_cache);
5347                 if (!cache)
5348                         break;
5349                 rec = container_of(cache, struct extent_record, cache);
5350                 remove_cache_extent(extent_cache, cache);
5351                 free_all_extent_backrefs(rec);
5352                 free(rec);
5353         }
5354 }
5355
5356 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5357                                  struct extent_record *rec)
5358 {
5359         if (rec->content_checked && rec->owner_ref_checked &&
5360             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5361             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5362             !rec->bad_full_backref && !rec->crossing_stripes &&
5363             !rec->wrong_chunk_type) {
5364                 remove_cache_extent(extent_cache, &rec->cache);
5365                 free_all_extent_backrefs(rec);
5366                 list_del_init(&rec->list);
5367                 free(rec);
5368         }
5369         return 0;
5370 }
5371
5372 static int check_owner_ref(struct btrfs_root *root,
5373                             struct extent_record *rec,
5374                             struct extent_buffer *buf)
5375 {
5376         struct extent_backref *node;
5377         struct tree_backref *back;
5378         struct btrfs_root *ref_root;
5379         struct btrfs_key key;
5380         struct btrfs_path path;
5381         struct extent_buffer *parent;
5382         int level;
5383         int found = 0;
5384         int ret;
5385
5386         list_for_each_entry(node, &rec->backrefs, list) {
5387                 if (node->is_data)
5388                         continue;
5389                 if (!node->found_ref)
5390                         continue;
5391                 if (node->full_backref)
5392                         continue;
5393                 back = to_tree_backref(node);
5394                 if (btrfs_header_owner(buf) == back->root)
5395                         return 0;
5396         }
5397         BUG_ON(rec->is_root);
5398
5399         /* try to find the block by search corresponding fs tree */
5400         key.objectid = btrfs_header_owner(buf);
5401         key.type = BTRFS_ROOT_ITEM_KEY;
5402         key.offset = (u64)-1;
5403
5404         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5405         if (IS_ERR(ref_root))
5406                 return 1;
5407
5408         level = btrfs_header_level(buf);
5409         if (level == 0)
5410                 btrfs_item_key_to_cpu(buf, &key, 0);
5411         else
5412                 btrfs_node_key_to_cpu(buf, &key, 0);
5413
5414         btrfs_init_path(&path);
5415         path.lowest_level = level + 1;
5416         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5417         if (ret < 0)
5418                 return 0;
5419
5420         parent = path.nodes[level + 1];
5421         if (parent && buf->start == btrfs_node_blockptr(parent,
5422                                                         path.slots[level + 1]))
5423                 found = 1;
5424
5425         btrfs_release_path(&path);
5426         return found ? 0 : 1;
5427 }
5428
5429 static int is_extent_tree_record(struct extent_record *rec)
5430 {
5431         struct list_head *cur = rec->backrefs.next;
5432         struct extent_backref *node;
5433         struct tree_backref *back;
5434         int is_extent = 0;
5435
5436         while(cur != &rec->backrefs) {
5437                 node = to_extent_backref(cur);
5438                 cur = cur->next;
5439                 if (node->is_data)
5440                         return 0;
5441                 back = to_tree_backref(node);
5442                 if (node->full_backref)
5443                         return 0;
5444                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5445                         is_extent = 1;
5446         }
5447         return is_extent;
5448 }
5449
5450
5451 static int record_bad_block_io(struct btrfs_fs_info *info,
5452                                struct cache_tree *extent_cache,
5453                                u64 start, u64 len)
5454 {
5455         struct extent_record *rec;
5456         struct cache_extent *cache;
5457         struct btrfs_key key;
5458
5459         cache = lookup_cache_extent(extent_cache, start, len);
5460         if (!cache)
5461                 return 0;
5462
5463         rec = container_of(cache, struct extent_record, cache);
5464         if (!is_extent_tree_record(rec))
5465                 return 0;
5466
5467         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5468         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5469 }
5470
5471 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5472                        struct extent_buffer *buf, int slot)
5473 {
5474         if (btrfs_header_level(buf)) {
5475                 struct btrfs_key_ptr ptr1, ptr2;
5476
5477                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5478                                    sizeof(struct btrfs_key_ptr));
5479                 read_extent_buffer(buf, &ptr2,
5480                                    btrfs_node_key_ptr_offset(slot + 1),
5481                                    sizeof(struct btrfs_key_ptr));
5482                 write_extent_buffer(buf, &ptr1,
5483                                     btrfs_node_key_ptr_offset(slot + 1),
5484                                     sizeof(struct btrfs_key_ptr));
5485                 write_extent_buffer(buf, &ptr2,
5486                                     btrfs_node_key_ptr_offset(slot),
5487                                     sizeof(struct btrfs_key_ptr));
5488                 if (slot == 0) {
5489                         struct btrfs_disk_key key;
5490                         btrfs_node_key(buf, &key, 0);
5491                         btrfs_fixup_low_keys(root, path, &key,
5492                                              btrfs_header_level(buf) + 1);
5493                 }
5494         } else {
5495                 struct btrfs_item *item1, *item2;
5496                 struct btrfs_key k1, k2;
5497                 char *item1_data, *item2_data;
5498                 u32 item1_offset, item2_offset, item1_size, item2_size;
5499
5500                 item1 = btrfs_item_nr(slot);
5501                 item2 = btrfs_item_nr(slot + 1);
5502                 btrfs_item_key_to_cpu(buf, &k1, slot);
5503                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5504                 item1_offset = btrfs_item_offset(buf, item1);
5505                 item2_offset = btrfs_item_offset(buf, item2);
5506                 item1_size = btrfs_item_size(buf, item1);
5507                 item2_size = btrfs_item_size(buf, item2);
5508
5509                 item1_data = malloc(item1_size);
5510                 if (!item1_data)
5511                         return -ENOMEM;
5512                 item2_data = malloc(item2_size);
5513                 if (!item2_data) {
5514                         free(item1_data);
5515                         return -ENOMEM;
5516                 }
5517
5518                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5519                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5520
5521                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5522                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5523                 free(item1_data);
5524                 free(item2_data);
5525
5526                 btrfs_set_item_offset(buf, item1, item2_offset);
5527                 btrfs_set_item_offset(buf, item2, item1_offset);
5528                 btrfs_set_item_size(buf, item1, item2_size);
5529                 btrfs_set_item_size(buf, item2, item1_size);
5530
5531                 path->slots[0] = slot;
5532                 btrfs_set_item_key_unsafe(root, path, &k2);
5533                 path->slots[0] = slot + 1;
5534                 btrfs_set_item_key_unsafe(root, path, &k1);
5535         }
5536         return 0;
5537 }
5538
5539 static int fix_key_order(struct btrfs_trans_handle *trans,
5540                          struct btrfs_root *root,
5541                          struct btrfs_path *path)
5542 {
5543         struct extent_buffer *buf;
5544         struct btrfs_key k1, k2;
5545         int i;
5546         int level = path->lowest_level;
5547         int ret = -EIO;
5548
5549         buf = path->nodes[level];
5550         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5551                 if (level) {
5552                         btrfs_node_key_to_cpu(buf, &k1, i);
5553                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5554                 } else {
5555                         btrfs_item_key_to_cpu(buf, &k1, i);
5556                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5557                 }
5558                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5559                         continue;
5560                 ret = swap_values(root, path, buf, i);
5561                 if (ret)
5562                         break;
5563                 btrfs_mark_buffer_dirty(buf);
5564                 i = 0;
5565         }
5566         return ret;
5567 }
5568
5569 static int delete_bogus_item(struct btrfs_trans_handle *trans,
5570                              struct btrfs_root *root,
5571                              struct btrfs_path *path,
5572                              struct extent_buffer *buf, int slot)
5573 {
5574         struct btrfs_key key;
5575         int nritems = btrfs_header_nritems(buf);
5576
5577         btrfs_item_key_to_cpu(buf, &key, slot);
5578
5579         /* These are all the keys we can deal with missing. */
5580         if (key.type != BTRFS_DIR_INDEX_KEY &&
5581             key.type != BTRFS_EXTENT_ITEM_KEY &&
5582             key.type != BTRFS_METADATA_ITEM_KEY &&
5583             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5584             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5585                 return -1;
5586
5587         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5588                (unsigned long long)key.objectid, key.type,
5589                (unsigned long long)key.offset, slot, buf->start);
5590         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5591                               btrfs_item_nr_offset(slot + 1),
5592                               sizeof(struct btrfs_item) *
5593                               (nritems - slot - 1));
5594         btrfs_set_header_nritems(buf, nritems - 1);
5595         if (slot == 0) {
5596                 struct btrfs_disk_key disk_key;
5597
5598                 btrfs_item_key(buf, &disk_key, 0);
5599                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5600         }
5601         btrfs_mark_buffer_dirty(buf);
5602         return 0;
5603 }
5604
5605 static int fix_item_offset(struct btrfs_trans_handle *trans,
5606                            struct btrfs_root *root,
5607                            struct btrfs_path *path)
5608 {
5609         struct extent_buffer *buf;
5610         int i;
5611         int ret = 0;
5612
5613         /* We should only get this for leaves */
5614         BUG_ON(path->lowest_level);
5615         buf = path->nodes[0];
5616 again:
5617         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5618                 unsigned int shift = 0, offset;
5619
5620                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5621                     BTRFS_LEAF_DATA_SIZE(root)) {
5622                         if (btrfs_item_end_nr(buf, i) >
5623                             BTRFS_LEAF_DATA_SIZE(root)) {
5624                                 ret = delete_bogus_item(trans, root, path,
5625                                                         buf, i);
5626                                 if (!ret)
5627                                         goto again;
5628                                 fprintf(stderr, "item is off the end of the "
5629                                         "leaf, can't fix\n");
5630                                 ret = -EIO;
5631                                 break;
5632                         }
5633                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5634                                 btrfs_item_end_nr(buf, i);
5635                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5636                            btrfs_item_offset_nr(buf, i - 1)) {
5637                         if (btrfs_item_end_nr(buf, i) >
5638                             btrfs_item_offset_nr(buf, i - 1)) {
5639                                 ret = delete_bogus_item(trans, root, path,
5640                                                         buf, i);
5641                                 if (!ret)
5642                                         goto again;
5643                                 fprintf(stderr, "items overlap, can't fix\n");
5644                                 ret = -EIO;
5645                                 break;
5646                         }
5647                         shift = btrfs_item_offset_nr(buf, i - 1) -
5648                                 btrfs_item_end_nr(buf, i);
5649                 }
5650                 if (!shift)
5651                         continue;
5652
5653                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5654                        i, shift, (unsigned long long)buf->start);
5655                 offset = btrfs_item_offset_nr(buf, i);
5656                 memmove_extent_buffer(buf,
5657                                       btrfs_leaf_data(buf) + offset + shift,
5658                                       btrfs_leaf_data(buf) + offset,
5659                                       btrfs_item_size_nr(buf, i));
5660                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5661                                       offset + shift);
5662                 btrfs_mark_buffer_dirty(buf);
5663         }
5664
5665         /*
5666          * We may have moved things, in which case we want to exit so we don't
5667          * write those changes out.  Once we have proper abort functionality in
5668          * progs this can be changed to something nicer.
5669          */
5670         BUG_ON(ret);
5671         return ret;
5672 }
5673
5674 /*
5675  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5676  * then just return -EIO.
5677  */
5678 static int try_to_fix_bad_block(struct btrfs_root *root,
5679                                 struct extent_buffer *buf,
5680                                 enum btrfs_tree_block_status status)
5681 {
5682         struct btrfs_trans_handle *trans;
5683         struct ulist *roots;
5684         struct ulist_node *node;
5685         struct btrfs_root *search_root;
5686         struct btrfs_path path;
5687         struct ulist_iterator iter;
5688         struct btrfs_key root_key, key;
5689         int ret;
5690
5691         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5692             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5693                 return -EIO;
5694
5695         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5696         if (ret)
5697                 return -EIO;
5698
5699         btrfs_init_path(&path);
5700         ULIST_ITER_INIT(&iter);
5701         while ((node = ulist_next(roots, &iter))) {
5702                 root_key.objectid = node->val;
5703                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5704                 root_key.offset = (u64)-1;
5705
5706                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5707                 if (IS_ERR(root)) {
5708                         ret = -EIO;
5709                         break;
5710                 }
5711
5712
5713                 trans = btrfs_start_transaction(search_root, 0);
5714                 if (IS_ERR(trans)) {
5715                         ret = PTR_ERR(trans);
5716                         break;
5717                 }
5718
5719                 path.lowest_level = btrfs_header_level(buf);
5720                 path.skip_check_block = 1;
5721                 if (path.lowest_level)
5722                         btrfs_node_key_to_cpu(buf, &key, 0);
5723                 else
5724                         btrfs_item_key_to_cpu(buf, &key, 0);
5725                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5726                 if (ret) {
5727                         ret = -EIO;
5728                         btrfs_commit_transaction(trans, search_root);
5729                         break;
5730                 }
5731                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5732                         ret = fix_key_order(trans, search_root, &path);
5733                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5734                         ret = fix_item_offset(trans, search_root, &path);
5735                 if (ret) {
5736                         btrfs_commit_transaction(trans, search_root);
5737                         break;
5738                 }
5739                 btrfs_release_path(&path);
5740                 btrfs_commit_transaction(trans, search_root);
5741         }
5742         ulist_free(roots);
5743         btrfs_release_path(&path);
5744         return ret;
5745 }
5746
5747 static int check_block(struct btrfs_root *root,
5748                        struct cache_tree *extent_cache,
5749                        struct extent_buffer *buf, u64 flags)
5750 {
5751         struct extent_record *rec;
5752         struct cache_extent *cache;
5753         struct btrfs_key key;
5754         enum btrfs_tree_block_status status;
5755         int ret = 0;
5756         int level;
5757
5758         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5759         if (!cache)
5760                 return 1;
5761         rec = container_of(cache, struct extent_record, cache);
5762         rec->generation = btrfs_header_generation(buf);
5763
5764         level = btrfs_header_level(buf);
5765         if (btrfs_header_nritems(buf) > 0) {
5766
5767                 if (level == 0)
5768                         btrfs_item_key_to_cpu(buf, &key, 0);
5769                 else
5770                         btrfs_node_key_to_cpu(buf, &key, 0);
5771
5772                 rec->info_objectid = key.objectid;
5773         }
5774         rec->info_level = level;
5775
5776         if (btrfs_is_leaf(buf))
5777                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5778         else
5779                 status = btrfs_check_node(root, &rec->parent_key, buf);
5780
5781         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5782                 if (repair)
5783                         status = try_to_fix_bad_block(root, buf, status);
5784                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5785                         ret = -EIO;
5786                         fprintf(stderr, "bad block %llu\n",
5787                                 (unsigned long long)buf->start);
5788                 } else {
5789                         /*
5790                          * Signal to callers we need to start the scan over
5791                          * again since we'll have cowed blocks.
5792                          */
5793                         ret = -EAGAIN;
5794                 }
5795         } else {
5796                 rec->content_checked = 1;
5797                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5798                         rec->owner_ref_checked = 1;
5799                 else {
5800                         ret = check_owner_ref(root, rec, buf);
5801                         if (!ret)
5802                                 rec->owner_ref_checked = 1;
5803                 }
5804         }
5805         if (!ret)
5806                 maybe_free_extent_rec(extent_cache, rec);
5807         return ret;
5808 }
5809
5810 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5811                                                 u64 parent, u64 root)
5812 {
5813         struct list_head *cur = rec->backrefs.next;
5814         struct extent_backref *node;
5815         struct tree_backref *back;
5816
5817         while(cur != &rec->backrefs) {
5818                 node = to_extent_backref(cur);
5819                 cur = cur->next;
5820                 if (node->is_data)
5821                         continue;
5822                 back = to_tree_backref(node);
5823                 if (parent > 0) {
5824                         if (!node->full_backref)
5825                                 continue;
5826                         if (parent == back->parent)
5827                                 return back;
5828                 } else {
5829                         if (node->full_backref)
5830                                 continue;
5831                         if (back->root == root)
5832                                 return back;
5833                 }
5834         }
5835         return NULL;
5836 }
5837
5838 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5839                                                 u64 parent, u64 root)
5840 {
5841         struct tree_backref *ref = malloc(sizeof(*ref));
5842
5843         if (!ref)
5844                 return NULL;
5845         memset(&ref->node, 0, sizeof(ref->node));
5846         if (parent > 0) {
5847                 ref->parent = parent;
5848                 ref->node.full_backref = 1;
5849         } else {
5850                 ref->root = root;
5851                 ref->node.full_backref = 0;
5852         }
5853         list_add_tail(&ref->node.list, &rec->backrefs);
5854
5855         return ref;
5856 }
5857
5858 static struct data_backref *find_data_backref(struct extent_record *rec,
5859                                                 u64 parent, u64 root,
5860                                                 u64 owner, u64 offset,
5861                                                 int found_ref,
5862                                                 u64 disk_bytenr, u64 bytes)
5863 {
5864         struct list_head *cur = rec->backrefs.next;
5865         struct extent_backref *node;
5866         struct data_backref *back;
5867
5868         while(cur != &rec->backrefs) {
5869                 node = to_extent_backref(cur);
5870                 cur = cur->next;
5871                 if (!node->is_data)
5872                         continue;
5873                 back = to_data_backref(node);
5874                 if (parent > 0) {
5875                         if (!node->full_backref)
5876                                 continue;
5877                         if (parent == back->parent)
5878                                 return back;
5879                 } else {
5880                         if (node->full_backref)
5881                                 continue;
5882                         if (back->root == root && back->owner == owner &&
5883                             back->offset == offset) {
5884                                 if (found_ref && node->found_ref &&
5885                                     (back->bytes != bytes ||
5886                                     back->disk_bytenr != disk_bytenr))
5887                                         continue;
5888                                 return back;
5889                         }
5890                 }
5891         }
5892         return NULL;
5893 }
5894
5895 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5896                                                 u64 parent, u64 root,
5897                                                 u64 owner, u64 offset,
5898                                                 u64 max_size)
5899 {
5900         struct data_backref *ref = malloc(sizeof(*ref));
5901
5902         if (!ref)
5903                 return NULL;
5904         memset(&ref->node, 0, sizeof(ref->node));
5905         ref->node.is_data = 1;
5906
5907         if (parent > 0) {
5908                 ref->parent = parent;
5909                 ref->owner = 0;
5910                 ref->offset = 0;
5911                 ref->node.full_backref = 1;
5912         } else {
5913                 ref->root = root;
5914                 ref->owner = owner;
5915                 ref->offset = offset;
5916                 ref->node.full_backref = 0;
5917         }
5918         ref->bytes = max_size;
5919         ref->found_ref = 0;
5920         ref->num_refs = 0;
5921         list_add_tail(&ref->node.list, &rec->backrefs);
5922         if (max_size > rec->max_size)
5923                 rec->max_size = max_size;
5924         return ref;
5925 }
5926
5927 /* Check if the type of extent matches with its chunk */
5928 static void check_extent_type(struct extent_record *rec)
5929 {
5930         struct btrfs_block_group_cache *bg_cache;
5931
5932         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5933         if (!bg_cache)
5934                 return;
5935
5936         /* data extent, check chunk directly*/
5937         if (!rec->metadata) {
5938                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5939                         rec->wrong_chunk_type = 1;
5940                 return;
5941         }
5942
5943         /* metadata extent, check the obvious case first */
5944         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5945                                  BTRFS_BLOCK_GROUP_METADATA))) {
5946                 rec->wrong_chunk_type = 1;
5947                 return;
5948         }
5949
5950         /*
5951          * Check SYSTEM extent, as it's also marked as metadata, we can only
5952          * make sure it's a SYSTEM extent by its backref
5953          */
5954         if (!list_empty(&rec->backrefs)) {
5955                 struct extent_backref *node;
5956                 struct tree_backref *tback;
5957                 u64 bg_type;
5958
5959                 node = to_extent_backref(rec->backrefs.next);
5960                 if (node->is_data) {
5961                         /* tree block shouldn't have data backref */
5962                         rec->wrong_chunk_type = 1;
5963                         return;
5964                 }
5965                 tback = container_of(node, struct tree_backref, node);
5966
5967                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5968                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5969                 else
5970                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
5971                 if (!(bg_cache->flags & bg_type))
5972                         rec->wrong_chunk_type = 1;
5973         }
5974 }
5975
5976 /*
5977  * Allocate a new extent record, fill default values from @tmpl and insert int
5978  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
5979  * the cache, otherwise it fails.
5980  */
5981 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
5982                 struct extent_record *tmpl)
5983 {
5984         struct extent_record *rec;
5985         int ret = 0;
5986
5987         rec = malloc(sizeof(*rec));
5988         if (!rec)
5989                 return -ENOMEM;
5990         rec->start = tmpl->start;
5991         rec->max_size = tmpl->max_size;
5992         rec->nr = max(tmpl->nr, tmpl->max_size);
5993         rec->found_rec = tmpl->found_rec;
5994         rec->content_checked = tmpl->content_checked;
5995         rec->owner_ref_checked = tmpl->owner_ref_checked;
5996         rec->num_duplicates = 0;
5997         rec->metadata = tmpl->metadata;
5998         rec->flag_block_full_backref = FLAG_UNSET;
5999         rec->bad_full_backref = 0;
6000         rec->crossing_stripes = 0;
6001         rec->wrong_chunk_type = 0;
6002         rec->is_root = tmpl->is_root;
6003         rec->refs = tmpl->refs;
6004         rec->extent_item_refs = tmpl->extent_item_refs;
6005         rec->parent_generation = tmpl->parent_generation;
6006         INIT_LIST_HEAD(&rec->backrefs);
6007         INIT_LIST_HEAD(&rec->dups);
6008         INIT_LIST_HEAD(&rec->list);
6009         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6010         rec->cache.start = tmpl->start;
6011         rec->cache.size = tmpl->nr;
6012         ret = insert_cache_extent(extent_cache, &rec->cache);
6013         if (ret) {
6014                 free(rec);
6015                 return ret;
6016         }
6017         bytes_used += rec->nr;
6018
6019         if (tmpl->metadata)
6020                 rec->crossing_stripes = check_crossing_stripes(global_info,
6021                                 rec->start, global_info->tree_root->nodesize);
6022         check_extent_type(rec);
6023         return ret;
6024 }
6025
6026 /*
6027  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6028  * some are hints:
6029  * - refs              - if found, increase refs
6030  * - is_root           - if found, set
6031  * - content_checked   - if found, set
6032  * - owner_ref_checked - if found, set
6033  *
6034  * If not found, create a new one, initialize and insert.
6035  */
6036 static int add_extent_rec(struct cache_tree *extent_cache,
6037                 struct extent_record *tmpl)
6038 {
6039         struct extent_record *rec;
6040         struct cache_extent *cache;
6041         int ret = 0;
6042         int dup = 0;
6043
6044         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6045         if (cache) {
6046                 rec = container_of(cache, struct extent_record, cache);
6047                 if (tmpl->refs)
6048                         rec->refs++;
6049                 if (rec->nr == 1)
6050                         rec->nr = max(tmpl->nr, tmpl->max_size);
6051
6052                 /*
6053                  * We need to make sure to reset nr to whatever the extent
6054                  * record says was the real size, this way we can compare it to
6055                  * the backrefs.
6056                  */
6057                 if (tmpl->found_rec) {
6058                         if (tmpl->start != rec->start || rec->found_rec) {
6059                                 struct extent_record *tmp;
6060
6061                                 dup = 1;
6062                                 if (list_empty(&rec->list))
6063                                         list_add_tail(&rec->list,
6064                                                       &duplicate_extents);
6065
6066                                 /*
6067                                  * We have to do this song and dance in case we
6068                                  * find an extent record that falls inside of
6069                                  * our current extent record but does not have
6070                                  * the same objectid.
6071                                  */
6072                                 tmp = malloc(sizeof(*tmp));
6073                                 if (!tmp)
6074                                         return -ENOMEM;
6075                                 tmp->start = tmpl->start;
6076                                 tmp->max_size = tmpl->max_size;
6077                                 tmp->nr = tmpl->nr;
6078                                 tmp->found_rec = 1;
6079                                 tmp->metadata = tmpl->metadata;
6080                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6081                                 INIT_LIST_HEAD(&tmp->list);
6082                                 list_add_tail(&tmp->list, &rec->dups);
6083                                 rec->num_duplicates++;
6084                         } else {
6085                                 rec->nr = tmpl->nr;
6086                                 rec->found_rec = 1;
6087                         }
6088                 }
6089
6090                 if (tmpl->extent_item_refs && !dup) {
6091                         if (rec->extent_item_refs) {
6092                                 fprintf(stderr, "block %llu rec "
6093                                         "extent_item_refs %llu, passed %llu\n",
6094                                         (unsigned long long)tmpl->start,
6095                                         (unsigned long long)
6096                                                         rec->extent_item_refs,
6097                                         (unsigned long long)tmpl->extent_item_refs);
6098                         }
6099                         rec->extent_item_refs = tmpl->extent_item_refs;
6100                 }
6101                 if (tmpl->is_root)
6102                         rec->is_root = 1;
6103                 if (tmpl->content_checked)
6104                         rec->content_checked = 1;
6105                 if (tmpl->owner_ref_checked)
6106                         rec->owner_ref_checked = 1;
6107                 memcpy(&rec->parent_key, &tmpl->parent_key,
6108                                 sizeof(tmpl->parent_key));
6109                 if (tmpl->parent_generation)
6110                         rec->parent_generation = tmpl->parent_generation;
6111                 if (rec->max_size < tmpl->max_size)
6112                         rec->max_size = tmpl->max_size;
6113
6114                 /*
6115                  * A metadata extent can't cross stripe_len boundary, otherwise
6116                  * kernel scrub won't be able to handle it.
6117                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6118                  * it.
6119                  */
6120                 if (tmpl->metadata)
6121                         rec->crossing_stripes = check_crossing_stripes(
6122                                         global_info, rec->start,
6123                                         global_info->tree_root->nodesize);
6124                 check_extent_type(rec);
6125                 maybe_free_extent_rec(extent_cache, rec);
6126                 return ret;
6127         }
6128
6129         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6130
6131         return ret;
6132 }
6133
6134 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6135                             u64 parent, u64 root, int found_ref)
6136 {
6137         struct extent_record *rec;
6138         struct tree_backref *back;
6139         struct cache_extent *cache;
6140         int ret;
6141
6142         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6143         if (!cache) {
6144                 struct extent_record tmpl;
6145
6146                 memset(&tmpl, 0, sizeof(tmpl));
6147                 tmpl.start = bytenr;
6148                 tmpl.nr = 1;
6149                 tmpl.metadata = 1;
6150
6151                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6152                 if (ret)
6153                         return ret;
6154
6155                 /* really a bug in cache_extent implement now */
6156                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6157                 if (!cache)
6158                         return -ENOENT;
6159         }
6160
6161         rec = container_of(cache, struct extent_record, cache);
6162         if (rec->start != bytenr) {
6163                 /*
6164                  * Several cause, from unaligned bytenr to over lapping extents
6165                  */
6166                 return -EEXIST;
6167         }
6168
6169         back = find_tree_backref(rec, parent, root);
6170         if (!back) {
6171                 back = alloc_tree_backref(rec, parent, root);
6172                 if (!back)
6173                         return -ENOMEM;
6174         }
6175
6176         if (found_ref) {
6177                 if (back->node.found_ref) {
6178                         fprintf(stderr, "Extent back ref already exists "
6179                                 "for %llu parent %llu root %llu \n",
6180                                 (unsigned long long)bytenr,
6181                                 (unsigned long long)parent,
6182                                 (unsigned long long)root);
6183                 }
6184                 back->node.found_ref = 1;
6185         } else {
6186                 if (back->node.found_extent_tree) {
6187                         fprintf(stderr, "Extent back ref already exists "
6188                                 "for %llu parent %llu root %llu \n",
6189                                 (unsigned long long)bytenr,
6190                                 (unsigned long long)parent,
6191                                 (unsigned long long)root);
6192                 }
6193                 back->node.found_extent_tree = 1;
6194         }
6195         check_extent_type(rec);
6196         maybe_free_extent_rec(extent_cache, rec);
6197         return 0;
6198 }
6199
6200 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6201                             u64 parent, u64 root, u64 owner, u64 offset,
6202                             u32 num_refs, int found_ref, u64 max_size)
6203 {
6204         struct extent_record *rec;
6205         struct data_backref *back;
6206         struct cache_extent *cache;
6207         int ret;
6208
6209         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6210         if (!cache) {
6211                 struct extent_record tmpl;
6212
6213                 memset(&tmpl, 0, sizeof(tmpl));
6214                 tmpl.start = bytenr;
6215                 tmpl.nr = 1;
6216                 tmpl.max_size = max_size;
6217
6218                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6219                 if (ret)
6220                         return ret;
6221
6222                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6223                 if (!cache)
6224                         abort();
6225         }
6226
6227         rec = container_of(cache, struct extent_record, cache);
6228         if (rec->max_size < max_size)
6229                 rec->max_size = max_size;
6230
6231         /*
6232          * If found_ref is set then max_size is the real size and must match the
6233          * existing refs.  So if we have already found a ref then we need to
6234          * make sure that this ref matches the existing one, otherwise we need
6235          * to add a new backref so we can notice that the backrefs don't match
6236          * and we need to figure out who is telling the truth.  This is to
6237          * account for that awful fsync bug I introduced where we'd end up with
6238          * a btrfs_file_extent_item that would have its length include multiple
6239          * prealloc extents or point inside of a prealloc extent.
6240          */
6241         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6242                                  bytenr, max_size);
6243         if (!back) {
6244                 back = alloc_data_backref(rec, parent, root, owner, offset,
6245                                           max_size);
6246                 BUG_ON(!back);
6247         }
6248
6249         if (found_ref) {
6250                 BUG_ON(num_refs != 1);
6251                 if (back->node.found_ref)
6252                         BUG_ON(back->bytes != max_size);
6253                 back->node.found_ref = 1;
6254                 back->found_ref += 1;
6255                 back->bytes = max_size;
6256                 back->disk_bytenr = bytenr;
6257                 rec->refs += 1;
6258                 rec->content_checked = 1;
6259                 rec->owner_ref_checked = 1;
6260         } else {
6261                 if (back->node.found_extent_tree) {
6262                         fprintf(stderr, "Extent back ref already exists "
6263                                 "for %llu parent %llu root %llu "
6264                                 "owner %llu offset %llu num_refs %lu\n",
6265                                 (unsigned long long)bytenr,
6266                                 (unsigned long long)parent,
6267                                 (unsigned long long)root,
6268                                 (unsigned long long)owner,
6269                                 (unsigned long long)offset,
6270                                 (unsigned long)num_refs);
6271                 }
6272                 back->num_refs = num_refs;
6273                 back->node.found_extent_tree = 1;
6274         }
6275         maybe_free_extent_rec(extent_cache, rec);
6276         return 0;
6277 }
6278
6279 static int add_pending(struct cache_tree *pending,
6280                        struct cache_tree *seen, u64 bytenr, u32 size)
6281 {
6282         int ret;
6283         ret = add_cache_extent(seen, bytenr, size);
6284         if (ret)
6285                 return ret;
6286         add_cache_extent(pending, bytenr, size);
6287         return 0;
6288 }
6289
6290 static int pick_next_pending(struct cache_tree *pending,
6291                         struct cache_tree *reada,
6292                         struct cache_tree *nodes,
6293                         u64 last, struct block_info *bits, int bits_nr,
6294                         int *reada_bits)
6295 {
6296         unsigned long node_start = last;
6297         struct cache_extent *cache;
6298         int ret;
6299
6300         cache = search_cache_extent(reada, 0);
6301         if (cache) {
6302                 bits[0].start = cache->start;
6303                 bits[0].size = cache->size;
6304                 *reada_bits = 1;
6305                 return 1;
6306         }
6307         *reada_bits = 0;
6308         if (node_start > 32768)
6309                 node_start -= 32768;
6310
6311         cache = search_cache_extent(nodes, node_start);
6312         if (!cache)
6313                 cache = search_cache_extent(nodes, 0);
6314
6315         if (!cache) {
6316                  cache = search_cache_extent(pending, 0);
6317                  if (!cache)
6318                          return 0;
6319                  ret = 0;
6320                  do {
6321                          bits[ret].start = cache->start;
6322                          bits[ret].size = cache->size;
6323                          cache = next_cache_extent(cache);
6324                          ret++;
6325                  } while (cache && ret < bits_nr);
6326                  return ret;
6327         }
6328
6329         ret = 0;
6330         do {
6331                 bits[ret].start = cache->start;
6332                 bits[ret].size = cache->size;
6333                 cache = next_cache_extent(cache);
6334                 ret++;
6335         } while (cache && ret < bits_nr);
6336
6337         if (bits_nr - ret > 8) {
6338                 u64 lookup = bits[0].start + bits[0].size;
6339                 struct cache_extent *next;
6340                 next = search_cache_extent(pending, lookup);
6341                 while(next) {
6342                         if (next->start - lookup > 32768)
6343                                 break;
6344                         bits[ret].start = next->start;
6345                         bits[ret].size = next->size;
6346                         lookup = next->start + next->size;
6347                         ret++;
6348                         if (ret == bits_nr)
6349                                 break;
6350                         next = next_cache_extent(next);
6351                         if (!next)
6352                                 break;
6353                 }
6354         }
6355         return ret;
6356 }
6357
6358 static void free_chunk_record(struct cache_extent *cache)
6359 {
6360         struct chunk_record *rec;
6361
6362         rec = container_of(cache, struct chunk_record, cache);
6363         list_del_init(&rec->list);
6364         list_del_init(&rec->dextents);
6365         free(rec);
6366 }
6367
6368 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6369 {
6370         cache_tree_free_extents(chunk_cache, free_chunk_record);
6371 }
6372
6373 static void free_device_record(struct rb_node *node)
6374 {
6375         struct device_record *rec;
6376
6377         rec = container_of(node, struct device_record, node);
6378         free(rec);
6379 }
6380
6381 FREE_RB_BASED_TREE(device_cache, free_device_record);
6382
6383 int insert_block_group_record(struct block_group_tree *tree,
6384                               struct block_group_record *bg_rec)
6385 {
6386         int ret;
6387
6388         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6389         if (ret)
6390                 return ret;
6391
6392         list_add_tail(&bg_rec->list, &tree->block_groups);
6393         return 0;
6394 }
6395
6396 static void free_block_group_record(struct cache_extent *cache)
6397 {
6398         struct block_group_record *rec;
6399
6400         rec = container_of(cache, struct block_group_record, cache);
6401         list_del_init(&rec->list);
6402         free(rec);
6403 }
6404
6405 void free_block_group_tree(struct block_group_tree *tree)
6406 {
6407         cache_tree_free_extents(&tree->tree, free_block_group_record);
6408 }
6409
6410 int insert_device_extent_record(struct device_extent_tree *tree,
6411                                 struct device_extent_record *de_rec)
6412 {
6413         int ret;
6414
6415         /*
6416          * Device extent is a bit different from the other extents, because
6417          * the extents which belong to the different devices may have the
6418          * same start and size, so we need use the special extent cache
6419          * search/insert functions.
6420          */
6421         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6422         if (ret)
6423                 return ret;
6424
6425         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6426         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6427         return 0;
6428 }
6429
6430 static void free_device_extent_record(struct cache_extent *cache)
6431 {
6432         struct device_extent_record *rec;
6433
6434         rec = container_of(cache, struct device_extent_record, cache);
6435         if (!list_empty(&rec->chunk_list))
6436                 list_del_init(&rec->chunk_list);
6437         if (!list_empty(&rec->device_list))
6438                 list_del_init(&rec->device_list);
6439         free(rec);
6440 }
6441
6442 void free_device_extent_tree(struct device_extent_tree *tree)
6443 {
6444         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6445 }
6446
6447 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6448 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6449                                  struct extent_buffer *leaf, int slot)
6450 {
6451         struct btrfs_extent_ref_v0 *ref0;
6452         struct btrfs_key key;
6453         int ret;
6454
6455         btrfs_item_key_to_cpu(leaf, &key, slot);
6456         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6457         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6458                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6459                                 0, 0);
6460         } else {
6461                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6462                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6463         }
6464         return ret;
6465 }
6466 #endif
6467
6468 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6469                                             struct btrfs_key *key,
6470                                             int slot)
6471 {
6472         struct btrfs_chunk *ptr;
6473         struct chunk_record *rec;
6474         int num_stripes, i;
6475
6476         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6477         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6478
6479         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6480         if (!rec) {
6481                 fprintf(stderr, "memory allocation failed\n");
6482                 exit(-1);
6483         }
6484
6485         INIT_LIST_HEAD(&rec->list);
6486         INIT_LIST_HEAD(&rec->dextents);
6487         rec->bg_rec = NULL;
6488
6489         rec->cache.start = key->offset;
6490         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6491
6492         rec->generation = btrfs_header_generation(leaf);
6493
6494         rec->objectid = key->objectid;
6495         rec->type = key->type;
6496         rec->offset = key->offset;
6497
6498         rec->length = rec->cache.size;
6499         rec->owner = btrfs_chunk_owner(leaf, ptr);
6500         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6501         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6502         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6503         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6504         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6505         rec->num_stripes = num_stripes;
6506         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6507
6508         for (i = 0; i < rec->num_stripes; ++i) {
6509                 rec->stripes[i].devid =
6510                         btrfs_stripe_devid_nr(leaf, ptr, i);
6511                 rec->stripes[i].offset =
6512                         btrfs_stripe_offset_nr(leaf, ptr, i);
6513                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6514                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6515                                 BTRFS_UUID_SIZE);
6516         }
6517
6518         return rec;
6519 }
6520
6521 static int process_chunk_item(struct cache_tree *chunk_cache,
6522                               struct btrfs_key *key, struct extent_buffer *eb,
6523                               int slot)
6524 {
6525         struct chunk_record *rec;
6526         struct btrfs_chunk *chunk;
6527         int ret = 0;
6528
6529         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6530         /*
6531          * Do extra check for this chunk item,
6532          *
6533          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6534          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6535          * and owner<->key_type check.
6536          */
6537         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6538                                       key->offset);
6539         if (ret < 0) {
6540                 error("chunk(%llu, %llu) is not valid, ignore it",
6541                       key->offset, btrfs_chunk_length(eb, chunk));
6542                 return 0;
6543         }
6544         rec = btrfs_new_chunk_record(eb, key, slot);
6545         ret = insert_cache_extent(chunk_cache, &rec->cache);
6546         if (ret) {
6547                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6548                         rec->offset, rec->length);
6549                 free(rec);
6550         }
6551
6552         return ret;
6553 }
6554
6555 static int process_device_item(struct rb_root *dev_cache,
6556                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6557 {
6558         struct btrfs_dev_item *ptr;
6559         struct device_record *rec;
6560         int ret = 0;
6561
6562         ptr = btrfs_item_ptr(eb,
6563                 slot, struct btrfs_dev_item);
6564
6565         rec = malloc(sizeof(*rec));
6566         if (!rec) {
6567                 fprintf(stderr, "memory allocation failed\n");
6568                 return -ENOMEM;
6569         }
6570
6571         rec->devid = key->offset;
6572         rec->generation = btrfs_header_generation(eb);
6573
6574         rec->objectid = key->objectid;
6575         rec->type = key->type;
6576         rec->offset = key->offset;
6577
6578         rec->devid = btrfs_device_id(eb, ptr);
6579         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6580         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6581
6582         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6583         if (ret) {
6584                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6585                 free(rec);
6586         }
6587
6588         return ret;
6589 }
6590
6591 struct block_group_record *
6592 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6593                              int slot)
6594 {
6595         struct btrfs_block_group_item *ptr;
6596         struct block_group_record *rec;
6597
6598         rec = calloc(1, sizeof(*rec));
6599         if (!rec) {
6600                 fprintf(stderr, "memory allocation failed\n");
6601                 exit(-1);
6602         }
6603
6604         rec->cache.start = key->objectid;
6605         rec->cache.size = key->offset;
6606
6607         rec->generation = btrfs_header_generation(leaf);
6608
6609         rec->objectid = key->objectid;
6610         rec->type = key->type;
6611         rec->offset = key->offset;
6612
6613         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6614         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6615
6616         INIT_LIST_HEAD(&rec->list);
6617
6618         return rec;
6619 }
6620
6621 static int process_block_group_item(struct block_group_tree *block_group_cache,
6622                                     struct btrfs_key *key,
6623                                     struct extent_buffer *eb, int slot)
6624 {
6625         struct block_group_record *rec;
6626         int ret = 0;
6627
6628         rec = btrfs_new_block_group_record(eb, key, slot);
6629         ret = insert_block_group_record(block_group_cache, rec);
6630         if (ret) {
6631                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6632                         rec->objectid, rec->offset);
6633                 free(rec);
6634         }
6635
6636         return ret;
6637 }
6638
6639 struct device_extent_record *
6640 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6641                                struct btrfs_key *key, int slot)
6642 {
6643         struct device_extent_record *rec;
6644         struct btrfs_dev_extent *ptr;
6645
6646         rec = calloc(1, sizeof(*rec));
6647         if (!rec) {
6648                 fprintf(stderr, "memory allocation failed\n");
6649                 exit(-1);
6650         }
6651
6652         rec->cache.objectid = key->objectid;
6653         rec->cache.start = key->offset;
6654
6655         rec->generation = btrfs_header_generation(leaf);
6656
6657         rec->objectid = key->objectid;
6658         rec->type = key->type;
6659         rec->offset = key->offset;
6660
6661         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6662         rec->chunk_objecteid =
6663                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6664         rec->chunk_offset =
6665                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6666         rec->length = btrfs_dev_extent_length(leaf, ptr);
6667         rec->cache.size = rec->length;
6668
6669         INIT_LIST_HEAD(&rec->chunk_list);
6670         INIT_LIST_HEAD(&rec->device_list);
6671
6672         return rec;
6673 }
6674
6675 static int
6676 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6677                            struct btrfs_key *key, struct extent_buffer *eb,
6678                            int slot)
6679 {
6680         struct device_extent_record *rec;
6681         int ret;
6682
6683         rec = btrfs_new_device_extent_record(eb, key, slot);
6684         ret = insert_device_extent_record(dev_extent_cache, rec);
6685         if (ret) {
6686                 fprintf(stderr,
6687                         "Device extent[%llu, %llu, %llu] existed.\n",
6688                         rec->objectid, rec->offset, rec->length);
6689                 free(rec);
6690         }
6691
6692         return ret;
6693 }
6694
6695 static int process_extent_item(struct btrfs_root *root,
6696                                struct cache_tree *extent_cache,
6697                                struct extent_buffer *eb, int slot)
6698 {
6699         struct btrfs_extent_item *ei;
6700         struct btrfs_extent_inline_ref *iref;
6701         struct btrfs_extent_data_ref *dref;
6702         struct btrfs_shared_data_ref *sref;
6703         struct btrfs_key key;
6704         struct extent_record tmpl;
6705         unsigned long end;
6706         unsigned long ptr;
6707         int ret;
6708         int type;
6709         u32 item_size = btrfs_item_size_nr(eb, slot);
6710         u64 refs = 0;
6711         u64 offset;
6712         u64 num_bytes;
6713         int metadata = 0;
6714
6715         btrfs_item_key_to_cpu(eb, &key, slot);
6716
6717         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6718                 metadata = 1;
6719                 num_bytes = root->nodesize;
6720         } else {
6721                 num_bytes = key.offset;
6722         }
6723
6724         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6725                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6726                       key.objectid, root->sectorsize);
6727                 return -EIO;
6728         }
6729         if (item_size < sizeof(*ei)) {
6730 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6731                 struct btrfs_extent_item_v0 *ei0;
6732                 BUG_ON(item_size != sizeof(*ei0));
6733                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6734                 refs = btrfs_extent_refs_v0(eb, ei0);
6735 #else
6736                 BUG();
6737 #endif
6738                 memset(&tmpl, 0, sizeof(tmpl));
6739                 tmpl.start = key.objectid;
6740                 tmpl.nr = num_bytes;
6741                 tmpl.extent_item_refs = refs;
6742                 tmpl.metadata = metadata;
6743                 tmpl.found_rec = 1;
6744                 tmpl.max_size = num_bytes;
6745
6746                 return add_extent_rec(extent_cache, &tmpl);
6747         }
6748
6749         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6750         refs = btrfs_extent_refs(eb, ei);
6751         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6752                 metadata = 1;
6753         else
6754                 metadata = 0;
6755         if (metadata && num_bytes != root->nodesize) {
6756                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6757                       num_bytes, root->nodesize);
6758                 return -EIO;
6759         }
6760         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6761                 error("ignore invalid data extent, length %llu is not aligned to %u",
6762                       num_bytes, root->sectorsize);
6763                 return -EIO;
6764         }
6765
6766         memset(&tmpl, 0, sizeof(tmpl));
6767         tmpl.start = key.objectid;
6768         tmpl.nr = num_bytes;
6769         tmpl.extent_item_refs = refs;
6770         tmpl.metadata = metadata;
6771         tmpl.found_rec = 1;
6772         tmpl.max_size = num_bytes;
6773         add_extent_rec(extent_cache, &tmpl);
6774
6775         ptr = (unsigned long)(ei + 1);
6776         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6777             key.type == BTRFS_EXTENT_ITEM_KEY)
6778                 ptr += sizeof(struct btrfs_tree_block_info);
6779
6780         end = (unsigned long)ei + item_size;
6781         while (ptr < end) {
6782                 iref = (struct btrfs_extent_inline_ref *)ptr;
6783                 type = btrfs_extent_inline_ref_type(eb, iref);
6784                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6785                 switch (type) {
6786                 case BTRFS_TREE_BLOCK_REF_KEY:
6787                         ret = add_tree_backref(extent_cache, key.objectid,
6788                                         0, offset, 0);
6789                         if (ret < 0)
6790                                 error("add_tree_backref failed: %s",
6791                                       strerror(-ret));
6792                         break;
6793                 case BTRFS_SHARED_BLOCK_REF_KEY:
6794                         ret = add_tree_backref(extent_cache, key.objectid,
6795                                         offset, 0, 0);
6796                         if (ret < 0)
6797                                 error("add_tree_backref failed: %s",
6798                                       strerror(-ret));
6799                         break;
6800                 case BTRFS_EXTENT_DATA_REF_KEY:
6801                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6802                         add_data_backref(extent_cache, key.objectid, 0,
6803                                         btrfs_extent_data_ref_root(eb, dref),
6804                                         btrfs_extent_data_ref_objectid(eb,
6805                                                                        dref),
6806                                         btrfs_extent_data_ref_offset(eb, dref),
6807                                         btrfs_extent_data_ref_count(eb, dref),
6808                                         0, num_bytes);
6809                         break;
6810                 case BTRFS_SHARED_DATA_REF_KEY:
6811                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6812                         add_data_backref(extent_cache, key.objectid, offset,
6813                                         0, 0, 0,
6814                                         btrfs_shared_data_ref_count(eb, sref),
6815                                         0, num_bytes);
6816                         break;
6817                 default:
6818                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6819                                 key.objectid, key.type, num_bytes);
6820                         goto out;
6821                 }
6822                 ptr += btrfs_extent_inline_ref_size(type);
6823         }
6824         WARN_ON(ptr > end);
6825 out:
6826         return 0;
6827 }
6828
6829 static int check_cache_range(struct btrfs_root *root,
6830                              struct btrfs_block_group_cache *cache,
6831                              u64 offset, u64 bytes)
6832 {
6833         struct btrfs_free_space *entry;
6834         u64 *logical;
6835         u64 bytenr;
6836         int stripe_len;
6837         int i, nr, ret;
6838
6839         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6840                 bytenr = btrfs_sb_offset(i);
6841                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6842                                        cache->key.objectid, bytenr, 0,
6843                                        &logical, &nr, &stripe_len);
6844                 if (ret)
6845                         return ret;
6846
6847                 while (nr--) {
6848                         if (logical[nr] + stripe_len <= offset)
6849                                 continue;
6850                         if (offset + bytes <= logical[nr])
6851                                 continue;
6852                         if (logical[nr] == offset) {
6853                                 if (stripe_len >= bytes) {
6854                                         free(logical);
6855                                         return 0;
6856                                 }
6857                                 bytes -= stripe_len;
6858                                 offset += stripe_len;
6859                         } else if (logical[nr] < offset) {
6860                                 if (logical[nr] + stripe_len >=
6861                                     offset + bytes) {
6862                                         free(logical);
6863                                         return 0;
6864                                 }
6865                                 bytes = (offset + bytes) -
6866                                         (logical[nr] + stripe_len);
6867                                 offset = logical[nr] + stripe_len;
6868                         } else {
6869                                 /*
6870                                  * Could be tricky, the super may land in the
6871                                  * middle of the area we're checking.  First
6872                                  * check the easiest case, it's at the end.
6873                                  */
6874                                 if (logical[nr] + stripe_len >=
6875                                     bytes + offset) {
6876                                         bytes = logical[nr] - offset;
6877                                         continue;
6878                                 }
6879
6880                                 /* Check the left side */
6881                                 ret = check_cache_range(root, cache,
6882                                                         offset,
6883                                                         logical[nr] - offset);
6884                                 if (ret) {
6885                                         free(logical);
6886                                         return ret;
6887                                 }
6888
6889                                 /* Now we continue with the right side */
6890                                 bytes = (offset + bytes) -
6891                                         (logical[nr] + stripe_len);
6892                                 offset = logical[nr] + stripe_len;
6893                         }
6894                 }
6895
6896                 free(logical);
6897         }
6898
6899         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6900         if (!entry) {
6901                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6902                         offset, offset+bytes);
6903                 return -EINVAL;
6904         }
6905
6906         if (entry->offset != offset) {
6907                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6908                         entry->offset);
6909                 return -EINVAL;
6910         }
6911
6912         if (entry->bytes != bytes) {
6913                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6914                         bytes, entry->bytes, offset);
6915                 return -EINVAL;
6916         }
6917
6918         unlink_free_space(cache->free_space_ctl, entry);
6919         free(entry);
6920         return 0;
6921 }
6922
6923 static int verify_space_cache(struct btrfs_root *root,
6924                               struct btrfs_block_group_cache *cache)
6925 {
6926         struct btrfs_path path;
6927         struct extent_buffer *leaf;
6928         struct btrfs_key key;
6929         u64 last;
6930         int ret = 0;
6931
6932         root = root->fs_info->extent_root;
6933
6934         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6935
6936         btrfs_init_path(&path);
6937         key.objectid = last;
6938         key.offset = 0;
6939         key.type = BTRFS_EXTENT_ITEM_KEY;
6940         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6941         if (ret < 0)
6942                 goto out;
6943         ret = 0;
6944         while (1) {
6945                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6946                         ret = btrfs_next_leaf(root, &path);
6947                         if (ret < 0)
6948                                 goto out;
6949                         if (ret > 0) {
6950                                 ret = 0;
6951                                 break;
6952                         }
6953                 }
6954                 leaf = path.nodes[0];
6955                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6956                 if (key.objectid >= cache->key.offset + cache->key.objectid)
6957                         break;
6958                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6959                     key.type != BTRFS_METADATA_ITEM_KEY) {
6960                         path.slots[0]++;
6961                         continue;
6962                 }
6963
6964                 if (last == key.objectid) {
6965                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
6966                                 last = key.objectid + key.offset;
6967                         else
6968                                 last = key.objectid + root->nodesize;
6969                         path.slots[0]++;
6970                         continue;
6971                 }
6972
6973                 ret = check_cache_range(root, cache, last,
6974                                         key.objectid - last);
6975                 if (ret)
6976                         break;
6977                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6978                         last = key.objectid + key.offset;
6979                 else
6980                         last = key.objectid + root->nodesize;
6981                 path.slots[0]++;
6982         }
6983
6984         if (last < cache->key.objectid + cache->key.offset)
6985                 ret = check_cache_range(root, cache, last,
6986                                         cache->key.objectid +
6987                                         cache->key.offset - last);
6988
6989 out:
6990         btrfs_release_path(&path);
6991
6992         if (!ret &&
6993             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
6994                 fprintf(stderr, "There are still entries left in the space "
6995                         "cache\n");
6996                 ret = -EINVAL;
6997         }
6998
6999         return ret;
7000 }
7001
7002 static int check_space_cache(struct btrfs_root *root)
7003 {
7004         struct btrfs_block_group_cache *cache;
7005         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7006         int ret;
7007         int error = 0;
7008
7009         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7010             btrfs_super_generation(root->fs_info->super_copy) !=
7011             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7012                 printf("cache and super generation don't match, space cache "
7013                        "will be invalidated\n");
7014                 return 0;
7015         }
7016
7017         if (ctx.progress_enabled) {
7018                 ctx.tp = TASK_FREE_SPACE;
7019                 task_start(ctx.info);
7020         }
7021
7022         while (1) {
7023                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7024                 if (!cache)
7025                         break;
7026
7027                 start = cache->key.objectid + cache->key.offset;
7028                 if (!cache->free_space_ctl) {
7029                         if (btrfs_init_free_space_ctl(cache,
7030                                                       root->sectorsize)) {
7031                                 ret = -ENOMEM;
7032                                 break;
7033                         }
7034                 } else {
7035                         btrfs_remove_free_space_cache(cache);
7036                 }
7037
7038                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7039                         ret = exclude_super_stripes(root, cache);
7040                         if (ret) {
7041                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7042                                         strerror(-ret));
7043                                 error++;
7044                                 continue;
7045                         }
7046                         ret = load_free_space_tree(root->fs_info, cache);
7047                         free_excluded_extents(root, cache);
7048                         if (ret < 0) {
7049                                 fprintf(stderr, "could not load free space tree: %s\n",
7050                                         strerror(-ret));
7051                                 error++;
7052                                 continue;
7053                         }
7054                         error += ret;
7055                 } else {
7056                         ret = load_free_space_cache(root->fs_info, cache);
7057                         if (!ret)
7058                                 continue;
7059                 }
7060
7061                 ret = verify_space_cache(root, cache);
7062                 if (ret) {
7063                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7064                                 cache->key.objectid);
7065                         error++;
7066                 }
7067         }
7068
7069         task_stop(ctx.info);
7070
7071         return error ? -EINVAL : 0;
7072 }
7073
7074 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7075                         u64 num_bytes, unsigned long leaf_offset,
7076                         struct extent_buffer *eb) {
7077
7078         u64 offset = 0;
7079         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7080         char *data;
7081         unsigned long csum_offset;
7082         u32 csum;
7083         u32 csum_expected;
7084         u64 read_len;
7085         u64 data_checked = 0;
7086         u64 tmp;
7087         int ret = 0;
7088         int mirror;
7089         int num_copies;
7090
7091         if (num_bytes % root->sectorsize)
7092                 return -EINVAL;
7093
7094         data = malloc(num_bytes);
7095         if (!data)
7096                 return -ENOMEM;
7097
7098         while (offset < num_bytes) {
7099                 mirror = 0;
7100 again:
7101                 read_len = num_bytes - offset;
7102                 /* read as much space once a time */
7103                 ret = read_extent_data(root, data + offset,
7104                                 bytenr + offset, &read_len, mirror);
7105                 if (ret)
7106                         goto out;
7107                 data_checked = 0;
7108                 /* verify every 4k data's checksum */
7109                 while (data_checked < read_len) {
7110                         csum = ~(u32)0;
7111                         tmp = offset + data_checked;
7112
7113                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
7114                                                csum, root->sectorsize);
7115                         btrfs_csum_final(csum, (u8 *)&csum);
7116
7117                         csum_offset = leaf_offset +
7118                                  tmp / root->sectorsize * csum_size;
7119                         read_extent_buffer(eb, (char *)&csum_expected,
7120                                            csum_offset, csum_size);
7121                         /* try another mirror */
7122                         if (csum != csum_expected) {
7123                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7124                                                 mirror, bytenr + tmp,
7125                                                 csum, csum_expected);
7126                                 num_copies = btrfs_num_copies(
7127                                                 &root->fs_info->mapping_tree,
7128                                                 bytenr, num_bytes);
7129                                 if (mirror < num_copies - 1) {
7130                                         mirror += 1;
7131                                         goto again;
7132                                 }
7133                         }
7134                         data_checked += root->sectorsize;
7135                 }
7136                 offset += read_len;
7137         }
7138 out:
7139         free(data);
7140         return ret;
7141 }
7142
7143 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7144                                u64 num_bytes)
7145 {
7146         struct btrfs_path path;
7147         struct extent_buffer *leaf;
7148         struct btrfs_key key;
7149         int ret;
7150
7151         btrfs_init_path(&path);
7152         key.objectid = bytenr;
7153         key.type = BTRFS_EXTENT_ITEM_KEY;
7154         key.offset = (u64)-1;
7155
7156 again:
7157         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7158                                 0, 0);
7159         if (ret < 0) {
7160                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7161                 btrfs_release_path(&path);
7162                 return ret;
7163         } else if (ret) {
7164                 if (path.slots[0] > 0) {
7165                         path.slots[0]--;
7166                 } else {
7167                         ret = btrfs_prev_leaf(root, &path);
7168                         if (ret < 0) {
7169                                 goto out;
7170                         } else if (ret > 0) {
7171                                 ret = 0;
7172                                 goto out;
7173                         }
7174                 }
7175         }
7176
7177         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7178
7179         /*
7180          * Block group items come before extent items if they have the same
7181          * bytenr, so walk back one more just in case.  Dear future traveller,
7182          * first congrats on mastering time travel.  Now if it's not too much
7183          * trouble could you go back to 2006 and tell Chris to make the
7184          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7185          * EXTENT_ITEM_KEY please?
7186          */
7187         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7188                 if (path.slots[0] > 0) {
7189                         path.slots[0]--;
7190                 } else {
7191                         ret = btrfs_prev_leaf(root, &path);
7192                         if (ret < 0) {
7193                                 goto out;
7194                         } else if (ret > 0) {
7195                                 ret = 0;
7196                                 goto out;
7197                         }
7198                 }
7199                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7200         }
7201
7202         while (num_bytes) {
7203                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7204                         ret = btrfs_next_leaf(root, &path);
7205                         if (ret < 0) {
7206                                 fprintf(stderr, "Error going to next leaf "
7207                                         "%d\n", ret);
7208                                 btrfs_release_path(&path);
7209                                 return ret;
7210                         } else if (ret) {
7211                                 break;
7212                         }
7213                 }
7214                 leaf = path.nodes[0];
7215                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7216                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7217                         path.slots[0]++;
7218                         continue;
7219                 }
7220                 if (key.objectid + key.offset < bytenr) {
7221                         path.slots[0]++;
7222                         continue;
7223                 }
7224                 if (key.objectid > bytenr + num_bytes)
7225                         break;
7226
7227                 if (key.objectid == bytenr) {
7228                         if (key.offset >= num_bytes) {
7229                                 num_bytes = 0;
7230                                 break;
7231                         }
7232                         num_bytes -= key.offset;
7233                         bytenr += key.offset;
7234                 } else if (key.objectid < bytenr) {
7235                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7236                                 num_bytes = 0;
7237                                 break;
7238                         }
7239                         num_bytes = (bytenr + num_bytes) -
7240                                 (key.objectid + key.offset);
7241                         bytenr = key.objectid + key.offset;
7242                 } else {
7243                         if (key.objectid + key.offset < bytenr + num_bytes) {
7244                                 u64 new_start = key.objectid + key.offset;
7245                                 u64 new_bytes = bytenr + num_bytes - new_start;
7246
7247                                 /*
7248                                  * Weird case, the extent is in the middle of
7249                                  * our range, we'll have to search one side
7250                                  * and then the other.  Not sure if this happens
7251                                  * in real life, but no harm in coding it up
7252                                  * anyway just in case.
7253                                  */
7254                                 btrfs_release_path(&path);
7255                                 ret = check_extent_exists(root, new_start,
7256                                                           new_bytes);
7257                                 if (ret) {
7258                                         fprintf(stderr, "Right section didn't "
7259                                                 "have a record\n");
7260                                         break;
7261                                 }
7262                                 num_bytes = key.objectid - bytenr;
7263                                 goto again;
7264                         }
7265                         num_bytes = key.objectid - bytenr;
7266                 }
7267                 path.slots[0]++;
7268         }
7269         ret = 0;
7270
7271 out:
7272         if (num_bytes && !ret) {
7273                 fprintf(stderr, "There are no extents for csum range "
7274                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7275                 ret = 1;
7276         }
7277
7278         btrfs_release_path(&path);
7279         return ret;
7280 }
7281
7282 static int check_csums(struct btrfs_root *root)
7283 {
7284         struct btrfs_path path;
7285         struct extent_buffer *leaf;
7286         struct btrfs_key key;
7287         u64 offset = 0, num_bytes = 0;
7288         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7289         int errors = 0;
7290         int ret;
7291         u64 data_len;
7292         unsigned long leaf_offset;
7293
7294         root = root->fs_info->csum_root;
7295         if (!extent_buffer_uptodate(root->node)) {
7296                 fprintf(stderr, "No valid csum tree found\n");
7297                 return -ENOENT;
7298         }
7299
7300         btrfs_init_path(&path);
7301         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7302         key.type = BTRFS_EXTENT_CSUM_KEY;
7303         key.offset = 0;
7304         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7305         if (ret < 0) {
7306                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7307                 btrfs_release_path(&path);
7308                 return ret;
7309         }
7310
7311         if (ret > 0 && path.slots[0])
7312                 path.slots[0]--;
7313         ret = 0;
7314
7315         while (1) {
7316                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7317                         ret = btrfs_next_leaf(root, &path);
7318                         if (ret < 0) {
7319                                 fprintf(stderr, "Error going to next leaf "
7320                                         "%d\n", ret);
7321                                 break;
7322                         }
7323                         if (ret)
7324                                 break;
7325                 }
7326                 leaf = path.nodes[0];
7327
7328                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7329                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7330                         path.slots[0]++;
7331                         continue;
7332                 }
7333
7334                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7335                               csum_size) * root->sectorsize;
7336                 if (!check_data_csum)
7337                         goto skip_csum_check;
7338                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7339                 ret = check_extent_csums(root, key.offset, data_len,
7340                                          leaf_offset, leaf);
7341                 if (ret)
7342                         break;
7343 skip_csum_check:
7344                 if (!num_bytes) {
7345                         offset = key.offset;
7346                 } else if (key.offset != offset + num_bytes) {
7347                         ret = check_extent_exists(root, offset, num_bytes);
7348                         if (ret) {
7349                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7350                                         "there is no extent record\n",
7351                                         offset, offset+num_bytes);
7352                                 errors++;
7353                         }
7354                         offset = key.offset;
7355                         num_bytes = 0;
7356                 }
7357                 num_bytes += data_len;
7358                 path.slots[0]++;
7359         }
7360
7361         btrfs_release_path(&path);
7362         return errors;
7363 }
7364
7365 static int is_dropped_key(struct btrfs_key *key,
7366                           struct btrfs_key *drop_key) {
7367         if (key->objectid < drop_key->objectid)
7368                 return 1;
7369         else if (key->objectid == drop_key->objectid) {
7370                 if (key->type < drop_key->type)
7371                         return 1;
7372                 else if (key->type == drop_key->type) {
7373                         if (key->offset < drop_key->offset)
7374                                 return 1;
7375                 }
7376         }
7377         return 0;
7378 }
7379
7380 /*
7381  * Here are the rules for FULL_BACKREF.
7382  *
7383  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7384  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7385  *      FULL_BACKREF set.
7386  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7387  *    if it happened after the relocation occurred since we'll have dropped the
7388  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7389  *    have no real way to know for sure.
7390  *
7391  * We process the blocks one root at a time, and we start from the lowest root
7392  * objectid and go to the highest.  So we can just lookup the owner backref for
7393  * the record and if we don't find it then we know it doesn't exist and we have
7394  * a FULL BACKREF.
7395  *
7396  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7397  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7398  * be set or not and then we can check later once we've gathered all the refs.
7399  */
7400 static int calc_extent_flag(struct btrfs_root *root,
7401                            struct cache_tree *extent_cache,
7402                            struct extent_buffer *buf,
7403                            struct root_item_record *ri,
7404                            u64 *flags)
7405 {
7406         struct extent_record *rec;
7407         struct cache_extent *cache;
7408         struct tree_backref *tback;
7409         u64 owner = 0;
7410
7411         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7412         /* we have added this extent before */
7413         if (!cache)
7414                 return -ENOENT;
7415
7416         rec = container_of(cache, struct extent_record, cache);
7417
7418         /*
7419          * Except file/reloc tree, we can not have
7420          * FULL BACKREF MODE
7421          */
7422         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7423                 goto normal;
7424         /*
7425          * root node
7426          */
7427         if (buf->start == ri->bytenr)
7428                 goto normal;
7429
7430         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7431                 goto full_backref;
7432
7433         owner = btrfs_header_owner(buf);
7434         if (owner == ri->objectid)
7435                 goto normal;
7436
7437         tback = find_tree_backref(rec, 0, owner);
7438         if (!tback)
7439                 goto full_backref;
7440 normal:
7441         *flags = 0;
7442         if (rec->flag_block_full_backref != FLAG_UNSET &&
7443             rec->flag_block_full_backref != 0)
7444                 rec->bad_full_backref = 1;
7445         return 0;
7446 full_backref:
7447         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7448         if (rec->flag_block_full_backref != FLAG_UNSET &&
7449             rec->flag_block_full_backref != 1)
7450                 rec->bad_full_backref = 1;
7451         return 0;
7452 }
7453
7454 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7455 {
7456         fprintf(stderr, "Invalid key type(");
7457         print_key_type(stderr, 0, key_type);
7458         fprintf(stderr, ") found in root(");
7459         print_objectid(stderr, rootid, 0);
7460         fprintf(stderr, ")\n");
7461 }
7462
7463 /*
7464  * Check if the key is valid with its extent buffer.
7465  *
7466  * This is a early check in case invalid key exists in a extent buffer
7467  * This is not comprehensive yet, but should prevent wrong key/item passed
7468  * further
7469  */
7470 static int check_type_with_root(u64 rootid, u8 key_type)
7471 {
7472         switch (key_type) {
7473         /* Only valid in chunk tree */
7474         case BTRFS_DEV_ITEM_KEY:
7475         case BTRFS_CHUNK_ITEM_KEY:
7476                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7477                         goto err;
7478                 break;
7479         /* valid in csum and log tree */
7480         case BTRFS_CSUM_TREE_OBJECTID:
7481                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7482                       is_fstree(rootid)))
7483                         goto err;
7484                 break;
7485         case BTRFS_EXTENT_ITEM_KEY:
7486         case BTRFS_METADATA_ITEM_KEY:
7487         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7488                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7489                         goto err;
7490                 break;
7491         case BTRFS_ROOT_ITEM_KEY:
7492                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7493                         goto err;
7494                 break;
7495         case BTRFS_DEV_EXTENT_KEY:
7496                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7497                         goto err;
7498                 break;
7499         }
7500         return 0;
7501 err:
7502         report_mismatch_key_root(key_type, rootid);
7503         return -EINVAL;
7504 }
7505
7506 static int run_next_block(struct btrfs_root *root,
7507                           struct block_info *bits,
7508                           int bits_nr,
7509                           u64 *last,
7510                           struct cache_tree *pending,
7511                           struct cache_tree *seen,
7512                           struct cache_tree *reada,
7513                           struct cache_tree *nodes,
7514                           struct cache_tree *extent_cache,
7515                           struct cache_tree *chunk_cache,
7516                           struct rb_root *dev_cache,
7517                           struct block_group_tree *block_group_cache,
7518                           struct device_extent_tree *dev_extent_cache,
7519                           struct root_item_record *ri)
7520 {
7521         struct extent_buffer *buf;
7522         struct extent_record *rec = NULL;
7523         u64 bytenr;
7524         u32 size;
7525         u64 parent;
7526         u64 owner;
7527         u64 flags;
7528         u64 ptr;
7529         u64 gen = 0;
7530         int ret = 0;
7531         int i;
7532         int nritems;
7533         struct btrfs_key key;
7534         struct cache_extent *cache;
7535         int reada_bits;
7536
7537         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7538                                     bits_nr, &reada_bits);
7539         if (nritems == 0)
7540                 return 1;
7541
7542         if (!reada_bits) {
7543                 for(i = 0; i < nritems; i++) {
7544                         ret = add_cache_extent(reada, bits[i].start,
7545                                                bits[i].size);
7546                         if (ret == -EEXIST)
7547                                 continue;
7548
7549                         /* fixme, get the parent transid */
7550                         readahead_tree_block(root, bits[i].start,
7551                                              bits[i].size, 0);
7552                 }
7553         }
7554         *last = bits[0].start;
7555         bytenr = bits[0].start;
7556         size = bits[0].size;
7557
7558         cache = lookup_cache_extent(pending, bytenr, size);
7559         if (cache) {
7560                 remove_cache_extent(pending, cache);
7561                 free(cache);
7562         }
7563         cache = lookup_cache_extent(reada, bytenr, size);
7564         if (cache) {
7565                 remove_cache_extent(reada, cache);
7566                 free(cache);
7567         }
7568         cache = lookup_cache_extent(nodes, bytenr, size);
7569         if (cache) {
7570                 remove_cache_extent(nodes, cache);
7571                 free(cache);
7572         }
7573         cache = lookup_cache_extent(extent_cache, bytenr, size);
7574         if (cache) {
7575                 rec = container_of(cache, struct extent_record, cache);
7576                 gen = rec->parent_generation;
7577         }
7578
7579         /* fixme, get the real parent transid */
7580         buf = read_tree_block(root, bytenr, size, gen);
7581         if (!extent_buffer_uptodate(buf)) {
7582                 record_bad_block_io(root->fs_info,
7583                                     extent_cache, bytenr, size);
7584                 goto out;
7585         }
7586
7587         nritems = btrfs_header_nritems(buf);
7588
7589         flags = 0;
7590         if (!init_extent_tree) {
7591                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7592                                        btrfs_header_level(buf), 1, NULL,
7593                                        &flags);
7594                 if (ret < 0) {
7595                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7596                         if (ret < 0) {
7597                                 fprintf(stderr, "Couldn't calc extent flags\n");
7598                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7599                         }
7600                 }
7601         } else {
7602                 flags = 0;
7603                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7604                 if (ret < 0) {
7605                         fprintf(stderr, "Couldn't calc extent flags\n");
7606                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7607                 }
7608         }
7609
7610         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7611                 if (ri != NULL &&
7612                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7613                     ri->objectid == btrfs_header_owner(buf)) {
7614                         /*
7615                          * Ok we got to this block from it's original owner and
7616                          * we have FULL_BACKREF set.  Relocation can leave
7617                          * converted blocks over so this is altogether possible,
7618                          * however it's not possible if the generation > the
7619                          * last snapshot, so check for this case.
7620                          */
7621                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7622                             btrfs_header_generation(buf) > ri->last_snapshot) {
7623                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7624                                 rec->bad_full_backref = 1;
7625                         }
7626                 }
7627         } else {
7628                 if (ri != NULL &&
7629                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7630                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7631                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7632                         rec->bad_full_backref = 1;
7633                 }
7634         }
7635
7636         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7637                 rec->flag_block_full_backref = 1;
7638                 parent = bytenr;
7639                 owner = 0;
7640         } else {
7641                 rec->flag_block_full_backref = 0;
7642                 parent = 0;
7643                 owner = btrfs_header_owner(buf);
7644         }
7645
7646         ret = check_block(root, extent_cache, buf, flags);
7647         if (ret)
7648                 goto out;
7649
7650         if (btrfs_is_leaf(buf)) {
7651                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7652                 for (i = 0; i < nritems; i++) {
7653                         struct btrfs_file_extent_item *fi;
7654                         btrfs_item_key_to_cpu(buf, &key, i);
7655                         /*
7656                          * Check key type against the leaf owner.
7657                          * Could filter quite a lot of early error if
7658                          * owner is correct
7659                          */
7660                         if (check_type_with_root(btrfs_header_owner(buf),
7661                                                  key.type)) {
7662                                 fprintf(stderr, "ignoring invalid key\n");
7663                                 continue;
7664                         }
7665                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7666                                 process_extent_item(root, extent_cache, buf,
7667                                                     i);
7668                                 continue;
7669                         }
7670                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7671                                 process_extent_item(root, extent_cache, buf,
7672                                                     i);
7673                                 continue;
7674                         }
7675                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7676                                 total_csum_bytes +=
7677                                         btrfs_item_size_nr(buf, i);
7678                                 continue;
7679                         }
7680                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7681                                 process_chunk_item(chunk_cache, &key, buf, i);
7682                                 continue;
7683                         }
7684                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7685                                 process_device_item(dev_cache, &key, buf, i);
7686                                 continue;
7687                         }
7688                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7689                                 process_block_group_item(block_group_cache,
7690                                         &key, buf, i);
7691                                 continue;
7692                         }
7693                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7694                                 process_device_extent_item(dev_extent_cache,
7695                                         &key, buf, i);
7696                                 continue;
7697
7698                         }
7699                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7700 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7701                                 process_extent_ref_v0(extent_cache, buf, i);
7702 #else
7703                                 BUG();
7704 #endif
7705                                 continue;
7706                         }
7707
7708                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7709                                 ret = add_tree_backref(extent_cache,
7710                                                 key.objectid, 0, key.offset, 0);
7711                                 if (ret < 0)
7712                                         error("add_tree_backref failed: %s",
7713                                               strerror(-ret));
7714                                 continue;
7715                         }
7716                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7717                                 ret = add_tree_backref(extent_cache,
7718                                                 key.objectid, key.offset, 0, 0);
7719                                 if (ret < 0)
7720                                         error("add_tree_backref failed: %s",
7721                                               strerror(-ret));
7722                                 continue;
7723                         }
7724                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7725                                 struct btrfs_extent_data_ref *ref;
7726                                 ref = btrfs_item_ptr(buf, i,
7727                                                 struct btrfs_extent_data_ref);
7728                                 add_data_backref(extent_cache,
7729                                         key.objectid, 0,
7730                                         btrfs_extent_data_ref_root(buf, ref),
7731                                         btrfs_extent_data_ref_objectid(buf,
7732                                                                        ref),
7733                                         btrfs_extent_data_ref_offset(buf, ref),
7734                                         btrfs_extent_data_ref_count(buf, ref),
7735                                         0, root->sectorsize);
7736                                 continue;
7737                         }
7738                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7739                                 struct btrfs_shared_data_ref *ref;
7740                                 ref = btrfs_item_ptr(buf, i,
7741                                                 struct btrfs_shared_data_ref);
7742                                 add_data_backref(extent_cache,
7743                                         key.objectid, key.offset, 0, 0, 0,
7744                                         btrfs_shared_data_ref_count(buf, ref),
7745                                         0, root->sectorsize);
7746                                 continue;
7747                         }
7748                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7749                                 struct bad_item *bad;
7750
7751                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7752                                         continue;
7753                                 if (!owner)
7754                                         continue;
7755                                 bad = malloc(sizeof(struct bad_item));
7756                                 if (!bad)
7757                                         continue;
7758                                 INIT_LIST_HEAD(&bad->list);
7759                                 memcpy(&bad->key, &key,
7760                                        sizeof(struct btrfs_key));
7761                                 bad->root_id = owner;
7762                                 list_add_tail(&bad->list, &delete_items);
7763                                 continue;
7764                         }
7765                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7766                                 continue;
7767                         fi = btrfs_item_ptr(buf, i,
7768                                             struct btrfs_file_extent_item);
7769                         if (btrfs_file_extent_type(buf, fi) ==
7770                             BTRFS_FILE_EXTENT_INLINE)
7771                                 continue;
7772                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7773                                 continue;
7774
7775                         data_bytes_allocated +=
7776                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7777                         if (data_bytes_allocated < root->sectorsize) {
7778                                 abort();
7779                         }
7780                         data_bytes_referenced +=
7781                                 btrfs_file_extent_num_bytes(buf, fi);
7782                         add_data_backref(extent_cache,
7783                                 btrfs_file_extent_disk_bytenr(buf, fi),
7784                                 parent, owner, key.objectid, key.offset -
7785                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7786                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7787                 }
7788         } else {
7789                 int level;
7790                 struct btrfs_key first_key;
7791
7792                 first_key.objectid = 0;
7793
7794                 if (nritems > 0)
7795                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7796                 level = btrfs_header_level(buf);
7797                 for (i = 0; i < nritems; i++) {
7798                         struct extent_record tmpl;
7799
7800                         ptr = btrfs_node_blockptr(buf, i);
7801                         size = root->nodesize;
7802                         btrfs_node_key_to_cpu(buf, &key, i);
7803                         if (ri != NULL) {
7804                                 if ((level == ri->drop_level)
7805                                     && is_dropped_key(&key, &ri->drop_key)) {
7806                                         continue;
7807                                 }
7808                         }
7809
7810                         memset(&tmpl, 0, sizeof(tmpl));
7811                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7812                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7813                         tmpl.start = ptr;
7814                         tmpl.nr = size;
7815                         tmpl.refs = 1;
7816                         tmpl.metadata = 1;
7817                         tmpl.max_size = size;
7818                         ret = add_extent_rec(extent_cache, &tmpl);
7819                         if (ret < 0)
7820                                 goto out;
7821
7822                         ret = add_tree_backref(extent_cache, ptr, parent,
7823                                         owner, 1);
7824                         if (ret < 0) {
7825                                 error("add_tree_backref failed: %s",
7826                                       strerror(-ret));
7827                                 continue;
7828                         }
7829
7830                         if (level > 1) {
7831                                 add_pending(nodes, seen, ptr, size);
7832                         } else {
7833                                 add_pending(pending, seen, ptr, size);
7834                         }
7835                 }
7836                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7837                                       nritems) * sizeof(struct btrfs_key_ptr);
7838         }
7839         total_btree_bytes += buf->len;
7840         if (fs_root_objectid(btrfs_header_owner(buf)))
7841                 total_fs_tree_bytes += buf->len;
7842         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7843                 total_extent_tree_bytes += buf->len;
7844         if (!found_old_backref &&
7845             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7846             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7847             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7848                 found_old_backref = 1;
7849 out:
7850         free_extent_buffer(buf);
7851         return ret;
7852 }
7853
7854 static int add_root_to_pending(struct extent_buffer *buf,
7855                                struct cache_tree *extent_cache,
7856                                struct cache_tree *pending,
7857                                struct cache_tree *seen,
7858                                struct cache_tree *nodes,
7859                                u64 objectid)
7860 {
7861         struct extent_record tmpl;
7862         int ret;
7863
7864         if (btrfs_header_level(buf) > 0)
7865                 add_pending(nodes, seen, buf->start, buf->len);
7866         else
7867                 add_pending(pending, seen, buf->start, buf->len);
7868
7869         memset(&tmpl, 0, sizeof(tmpl));
7870         tmpl.start = buf->start;
7871         tmpl.nr = buf->len;
7872         tmpl.is_root = 1;
7873         tmpl.refs = 1;
7874         tmpl.metadata = 1;
7875         tmpl.max_size = buf->len;
7876         add_extent_rec(extent_cache, &tmpl);
7877
7878         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7879             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7880                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7881                                 0, 1);
7882         else
7883                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7884                                 1);
7885         return ret;
7886 }
7887
7888 /* as we fix the tree, we might be deleting blocks that
7889  * we're tracking for repair.  This hook makes sure we
7890  * remove any backrefs for blocks as we are fixing them.
7891  */
7892 static int free_extent_hook(struct btrfs_trans_handle *trans,
7893                             struct btrfs_root *root,
7894                             u64 bytenr, u64 num_bytes, u64 parent,
7895                             u64 root_objectid, u64 owner, u64 offset,
7896                             int refs_to_drop)
7897 {
7898         struct extent_record *rec;
7899         struct cache_extent *cache;
7900         int is_data;
7901         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7902
7903         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7904         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7905         if (!cache)
7906                 return 0;
7907
7908         rec = container_of(cache, struct extent_record, cache);
7909         if (is_data) {
7910                 struct data_backref *back;
7911                 back = find_data_backref(rec, parent, root_objectid, owner,
7912                                          offset, 1, bytenr, num_bytes);
7913                 if (!back)
7914                         goto out;
7915                 if (back->node.found_ref) {
7916                         back->found_ref -= refs_to_drop;
7917                         if (rec->refs)
7918                                 rec->refs -= refs_to_drop;
7919                 }
7920                 if (back->node.found_extent_tree) {
7921                         back->num_refs -= refs_to_drop;
7922                         if (rec->extent_item_refs)
7923                                 rec->extent_item_refs -= refs_to_drop;
7924                 }
7925                 if (back->found_ref == 0)
7926                         back->node.found_ref = 0;
7927                 if (back->num_refs == 0)
7928                         back->node.found_extent_tree = 0;
7929
7930                 if (!back->node.found_extent_tree && back->node.found_ref) {
7931                         list_del(&back->node.list);
7932                         free(back);
7933                 }
7934         } else {
7935                 struct tree_backref *back;
7936                 back = find_tree_backref(rec, parent, root_objectid);
7937                 if (!back)
7938                         goto out;
7939                 if (back->node.found_ref) {
7940                         if (rec->refs)
7941                                 rec->refs--;
7942                         back->node.found_ref = 0;
7943                 }
7944                 if (back->node.found_extent_tree) {
7945                         if (rec->extent_item_refs)
7946                                 rec->extent_item_refs--;
7947                         back->node.found_extent_tree = 0;
7948                 }
7949                 if (!back->node.found_extent_tree && back->node.found_ref) {
7950                         list_del(&back->node.list);
7951                         free(back);
7952                 }
7953         }
7954         maybe_free_extent_rec(extent_cache, rec);
7955 out:
7956         return 0;
7957 }
7958
7959 static int delete_extent_records(struct btrfs_trans_handle *trans,
7960                                  struct btrfs_root *root,
7961                                  struct btrfs_path *path,
7962                                  u64 bytenr, u64 new_len)
7963 {
7964         struct btrfs_key key;
7965         struct btrfs_key found_key;
7966         struct extent_buffer *leaf;
7967         int ret;
7968         int slot;
7969
7970
7971         key.objectid = bytenr;
7972         key.type = (u8)-1;
7973         key.offset = (u64)-1;
7974
7975         while(1) {
7976                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7977                                         &key, path, 0, 1);
7978                 if (ret < 0)
7979                         break;
7980
7981                 if (ret > 0) {
7982                         ret = 0;
7983                         if (path->slots[0] == 0)
7984                                 break;
7985                         path->slots[0]--;
7986                 }
7987                 ret = 0;
7988
7989                 leaf = path->nodes[0];
7990                 slot = path->slots[0];
7991
7992                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7993                 if (found_key.objectid != bytenr)
7994                         break;
7995
7996                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
7997                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
7998                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7999                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8000                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8001                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8002                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8003                         btrfs_release_path(path);
8004                         if (found_key.type == 0) {
8005                                 if (found_key.offset == 0)
8006                                         break;
8007                                 key.offset = found_key.offset - 1;
8008                                 key.type = found_key.type;
8009                         }
8010                         key.type = found_key.type - 1;
8011                         key.offset = (u64)-1;
8012                         continue;
8013                 }
8014
8015                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8016                         found_key.objectid, found_key.type, found_key.offset);
8017
8018                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8019                 if (ret)
8020                         break;
8021                 btrfs_release_path(path);
8022
8023                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8024                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8025                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8026                                 found_key.offset : root->nodesize;
8027
8028                         ret = btrfs_update_block_group(trans, root, bytenr,
8029                                                        bytes, 0, 0);
8030                         if (ret)
8031                                 break;
8032                 }
8033         }
8034
8035         btrfs_release_path(path);
8036         return ret;
8037 }
8038
8039 /*
8040  * for a single backref, this will allocate a new extent
8041  * and add the backref to it.
8042  */
8043 static int record_extent(struct btrfs_trans_handle *trans,
8044                          struct btrfs_fs_info *info,
8045                          struct btrfs_path *path,
8046                          struct extent_record *rec,
8047                          struct extent_backref *back,
8048                          int allocated, u64 flags)
8049 {
8050         int ret;
8051         struct btrfs_root *extent_root = info->extent_root;
8052         struct extent_buffer *leaf;
8053         struct btrfs_key ins_key;
8054         struct btrfs_extent_item *ei;
8055         struct data_backref *dback;
8056         struct btrfs_tree_block_info *bi;
8057
8058         if (!back->is_data)
8059                 rec->max_size = max_t(u64, rec->max_size,
8060                                     info->extent_root->nodesize);
8061
8062         if (!allocated) {
8063                 u32 item_size = sizeof(*ei);
8064
8065                 if (!back->is_data)
8066                         item_size += sizeof(*bi);
8067
8068                 ins_key.objectid = rec->start;
8069                 ins_key.offset = rec->max_size;
8070                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8071
8072                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8073                                         &ins_key, item_size);
8074                 if (ret)
8075                         goto fail;
8076
8077                 leaf = path->nodes[0];
8078                 ei = btrfs_item_ptr(leaf, path->slots[0],
8079                                     struct btrfs_extent_item);
8080
8081                 btrfs_set_extent_refs(leaf, ei, 0);
8082                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8083
8084                 if (back->is_data) {
8085                         btrfs_set_extent_flags(leaf, ei,
8086                                                BTRFS_EXTENT_FLAG_DATA);
8087                 } else {
8088                         struct btrfs_disk_key copy_key;;
8089
8090                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8091                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8092                                              sizeof(*bi));
8093
8094                         btrfs_set_disk_key_objectid(&copy_key,
8095                                                     rec->info_objectid);
8096                         btrfs_set_disk_key_type(&copy_key, 0);
8097                         btrfs_set_disk_key_offset(&copy_key, 0);
8098
8099                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8100                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8101
8102                         btrfs_set_extent_flags(leaf, ei,
8103                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8104                 }
8105
8106                 btrfs_mark_buffer_dirty(leaf);
8107                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8108                                                rec->max_size, 1, 0);
8109                 if (ret)
8110                         goto fail;
8111                 btrfs_release_path(path);
8112         }
8113
8114         if (back->is_data) {
8115                 u64 parent;
8116                 int i;
8117
8118                 dback = to_data_backref(back);
8119                 if (back->full_backref)
8120                         parent = dback->parent;
8121                 else
8122                         parent = 0;
8123
8124                 for (i = 0; i < dback->found_ref; i++) {
8125                         /* if parent != 0, we're doing a full backref
8126                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8127                          * just makes the backref allocator create a data
8128                          * backref
8129                          */
8130                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8131                                                    rec->start, rec->max_size,
8132                                                    parent,
8133                                                    dback->root,
8134                                                    parent ?
8135                                                    BTRFS_FIRST_FREE_OBJECTID :
8136                                                    dback->owner,
8137                                                    dback->offset);
8138                         if (ret)
8139                                 break;
8140                 }
8141                 fprintf(stderr, "adding new data backref"
8142                                 " on %llu %s %llu owner %llu"
8143                                 " offset %llu found %d\n",
8144                                 (unsigned long long)rec->start,
8145                                 back->full_backref ?
8146                                 "parent" : "root",
8147                                 back->full_backref ?
8148                                 (unsigned long long)parent :
8149                                 (unsigned long long)dback->root,
8150                                 (unsigned long long)dback->owner,
8151                                 (unsigned long long)dback->offset,
8152                                 dback->found_ref);
8153         } else {
8154                 u64 parent;
8155                 struct tree_backref *tback;
8156
8157                 tback = to_tree_backref(back);
8158                 if (back->full_backref)
8159                         parent = tback->parent;
8160                 else
8161                         parent = 0;
8162
8163                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8164                                            rec->start, rec->max_size,
8165                                            parent, tback->root, 0, 0);
8166                 fprintf(stderr, "adding new tree backref on "
8167                         "start %llu len %llu parent %llu root %llu\n",
8168                         rec->start, rec->max_size, parent, tback->root);
8169         }
8170 fail:
8171         btrfs_release_path(path);
8172         return ret;
8173 }
8174
8175 static struct extent_entry *find_entry(struct list_head *entries,
8176                                        u64 bytenr, u64 bytes)
8177 {
8178         struct extent_entry *entry = NULL;
8179
8180         list_for_each_entry(entry, entries, list) {
8181                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8182                         return entry;
8183         }
8184
8185         return NULL;
8186 }
8187
8188 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8189 {
8190         struct extent_entry *entry, *best = NULL, *prev = NULL;
8191
8192         list_for_each_entry(entry, entries, list) {
8193                 /*
8194                  * If there are as many broken entries as entries then we know
8195                  * not to trust this particular entry.
8196                  */
8197                 if (entry->broken == entry->count)
8198                         continue;
8199
8200                 /*
8201                  * Special case, when there are only two entries and 'best' is
8202                  * the first one
8203                  */
8204                 if (!prev) {
8205                         best = entry;
8206                         prev = entry;
8207                         continue;
8208                 }
8209
8210                 /*
8211                  * If our current entry == best then we can't be sure our best
8212                  * is really the best, so we need to keep searching.
8213                  */
8214                 if (best && best->count == entry->count) {
8215                         prev = entry;
8216                         best = NULL;
8217                         continue;
8218                 }
8219
8220                 /* Prev == entry, not good enough, have to keep searching */
8221                 if (!prev->broken && prev->count == entry->count)
8222                         continue;
8223
8224                 if (!best)
8225                         best = (prev->count > entry->count) ? prev : entry;
8226                 else if (best->count < entry->count)
8227                         best = entry;
8228                 prev = entry;
8229         }
8230
8231         return best;
8232 }
8233
8234 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8235                       struct data_backref *dback, struct extent_entry *entry)
8236 {
8237         struct btrfs_trans_handle *trans;
8238         struct btrfs_root *root;
8239         struct btrfs_file_extent_item *fi;
8240         struct extent_buffer *leaf;
8241         struct btrfs_key key;
8242         u64 bytenr, bytes;
8243         int ret, err;
8244
8245         key.objectid = dback->root;
8246         key.type = BTRFS_ROOT_ITEM_KEY;
8247         key.offset = (u64)-1;
8248         root = btrfs_read_fs_root(info, &key);
8249         if (IS_ERR(root)) {
8250                 fprintf(stderr, "Couldn't find root for our ref\n");
8251                 return -EINVAL;
8252         }
8253
8254         /*
8255          * The backref points to the original offset of the extent if it was
8256          * split, so we need to search down to the offset we have and then walk
8257          * forward until we find the backref we're looking for.
8258          */
8259         key.objectid = dback->owner;
8260         key.type = BTRFS_EXTENT_DATA_KEY;
8261         key.offset = dback->offset;
8262         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8263         if (ret < 0) {
8264                 fprintf(stderr, "Error looking up ref %d\n", ret);
8265                 return ret;
8266         }
8267
8268         while (1) {
8269                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8270                         ret = btrfs_next_leaf(root, path);
8271                         if (ret) {
8272                                 fprintf(stderr, "Couldn't find our ref, next\n");
8273                                 return -EINVAL;
8274                         }
8275                 }
8276                 leaf = path->nodes[0];
8277                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8278                 if (key.objectid != dback->owner ||
8279                     key.type != BTRFS_EXTENT_DATA_KEY) {
8280                         fprintf(stderr, "Couldn't find our ref, search\n");
8281                         return -EINVAL;
8282                 }
8283                 fi = btrfs_item_ptr(leaf, path->slots[0],
8284                                     struct btrfs_file_extent_item);
8285                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8286                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8287
8288                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8289                         break;
8290                 path->slots[0]++;
8291         }
8292
8293         btrfs_release_path(path);
8294
8295         trans = btrfs_start_transaction(root, 1);
8296         if (IS_ERR(trans))
8297                 return PTR_ERR(trans);
8298
8299         /*
8300          * Ok we have the key of the file extent we want to fix, now we can cow
8301          * down to the thing and fix it.
8302          */
8303         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8304         if (ret < 0) {
8305                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8306                         key.objectid, key.type, key.offset, ret);
8307                 goto out;
8308         }
8309         if (ret > 0) {
8310                 fprintf(stderr, "Well that's odd, we just found this key "
8311                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8312                         key.offset);
8313                 ret = -EINVAL;
8314                 goto out;
8315         }
8316         leaf = path->nodes[0];
8317         fi = btrfs_item_ptr(leaf, path->slots[0],
8318                             struct btrfs_file_extent_item);
8319
8320         if (btrfs_file_extent_compression(leaf, fi) &&
8321             dback->disk_bytenr != entry->bytenr) {
8322                 fprintf(stderr, "Ref doesn't match the record start and is "
8323                         "compressed, please take a btrfs-image of this file "
8324                         "system and send it to a btrfs developer so they can "
8325                         "complete this functionality for bytenr %Lu\n",
8326                         dback->disk_bytenr);
8327                 ret = -EINVAL;
8328                 goto out;
8329         }
8330
8331         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8332                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8333         } else if (dback->disk_bytenr > entry->bytenr) {
8334                 u64 off_diff, offset;
8335
8336                 off_diff = dback->disk_bytenr - entry->bytenr;
8337                 offset = btrfs_file_extent_offset(leaf, fi);
8338                 if (dback->disk_bytenr + offset +
8339                     btrfs_file_extent_num_bytes(leaf, fi) >
8340                     entry->bytenr + entry->bytes) {
8341                         fprintf(stderr, "Ref is past the entry end, please "
8342                                 "take a btrfs-image of this file system and "
8343                                 "send it to a btrfs developer, ref %Lu\n",
8344                                 dback->disk_bytenr);
8345                         ret = -EINVAL;
8346                         goto out;
8347                 }
8348                 offset += off_diff;
8349                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8350                 btrfs_set_file_extent_offset(leaf, fi, offset);
8351         } else if (dback->disk_bytenr < entry->bytenr) {
8352                 u64 offset;
8353
8354                 offset = btrfs_file_extent_offset(leaf, fi);
8355                 if (dback->disk_bytenr + offset < entry->bytenr) {
8356                         fprintf(stderr, "Ref is before the entry start, please"
8357                                 " take a btrfs-image of this file system and "
8358                                 "send it to a btrfs developer, ref %Lu\n",
8359                                 dback->disk_bytenr);
8360                         ret = -EINVAL;
8361                         goto out;
8362                 }
8363
8364                 offset += dback->disk_bytenr;
8365                 offset -= entry->bytenr;
8366                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8367                 btrfs_set_file_extent_offset(leaf, fi, offset);
8368         }
8369
8370         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8371
8372         /*
8373          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8374          * only do this if we aren't using compression, otherwise it's a
8375          * trickier case.
8376          */
8377         if (!btrfs_file_extent_compression(leaf, fi))
8378                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8379         else
8380                 printf("ram bytes may be wrong?\n");
8381         btrfs_mark_buffer_dirty(leaf);
8382 out:
8383         err = btrfs_commit_transaction(trans, root);
8384         btrfs_release_path(path);
8385         return ret ? ret : err;
8386 }
8387
8388 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8389                            struct extent_record *rec)
8390 {
8391         struct extent_backref *back;
8392         struct data_backref *dback;
8393         struct extent_entry *entry, *best = NULL;
8394         LIST_HEAD(entries);
8395         int nr_entries = 0;
8396         int broken_entries = 0;
8397         int ret = 0;
8398         short mismatch = 0;
8399
8400         /*
8401          * Metadata is easy and the backrefs should always agree on bytenr and
8402          * size, if not we've got bigger issues.
8403          */
8404         if (rec->metadata)
8405                 return 0;
8406
8407         list_for_each_entry(back, &rec->backrefs, list) {
8408                 if (back->full_backref || !back->is_data)
8409                         continue;
8410
8411                 dback = to_data_backref(back);
8412
8413                 /*
8414                  * We only pay attention to backrefs that we found a real
8415                  * backref for.
8416                  */
8417                 if (dback->found_ref == 0)
8418                         continue;
8419
8420                 /*
8421                  * For now we only catch when the bytes don't match, not the
8422                  * bytenr.  We can easily do this at the same time, but I want
8423                  * to have a fs image to test on before we just add repair
8424                  * functionality willy-nilly so we know we won't screw up the
8425                  * repair.
8426                  */
8427
8428                 entry = find_entry(&entries, dback->disk_bytenr,
8429                                    dback->bytes);
8430                 if (!entry) {
8431                         entry = malloc(sizeof(struct extent_entry));
8432                         if (!entry) {
8433                                 ret = -ENOMEM;
8434                                 goto out;
8435                         }
8436                         memset(entry, 0, sizeof(*entry));
8437                         entry->bytenr = dback->disk_bytenr;
8438                         entry->bytes = dback->bytes;
8439                         list_add_tail(&entry->list, &entries);
8440                         nr_entries++;
8441                 }
8442
8443                 /*
8444                  * If we only have on entry we may think the entries agree when
8445                  * in reality they don't so we have to do some extra checking.
8446                  */
8447                 if (dback->disk_bytenr != rec->start ||
8448                     dback->bytes != rec->nr || back->broken)
8449                         mismatch = 1;
8450
8451                 if (back->broken) {
8452                         entry->broken++;
8453                         broken_entries++;
8454                 }
8455
8456                 entry->count++;
8457         }
8458
8459         /* Yay all the backrefs agree, carry on good sir */
8460         if (nr_entries <= 1 && !mismatch)
8461                 goto out;
8462
8463         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8464                 "%Lu\n", rec->start);
8465
8466         /*
8467          * First we want to see if the backrefs can agree amongst themselves who
8468          * is right, so figure out which one of the entries has the highest
8469          * count.
8470          */
8471         best = find_most_right_entry(&entries);
8472
8473         /*
8474          * Ok so we may have an even split between what the backrefs think, so
8475          * this is where we use the extent ref to see what it thinks.
8476          */
8477         if (!best) {
8478                 entry = find_entry(&entries, rec->start, rec->nr);
8479                 if (!entry && (!broken_entries || !rec->found_rec)) {
8480                         fprintf(stderr, "Backrefs don't agree with each other "
8481                                 "and extent record doesn't agree with anybody,"
8482                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8483                                 rec->start, rec->nr);
8484                         ret = -EINVAL;
8485                         goto out;
8486                 } else if (!entry) {
8487                         /*
8488                          * Ok our backrefs were broken, we'll assume this is the
8489                          * correct value and add an entry for this range.
8490                          */
8491                         entry = malloc(sizeof(struct extent_entry));
8492                         if (!entry) {
8493                                 ret = -ENOMEM;
8494                                 goto out;
8495                         }
8496                         memset(entry, 0, sizeof(*entry));
8497                         entry->bytenr = rec->start;
8498                         entry->bytes = rec->nr;
8499                         list_add_tail(&entry->list, &entries);
8500                         nr_entries++;
8501                 }
8502                 entry->count++;
8503                 best = find_most_right_entry(&entries);
8504                 if (!best) {
8505                         fprintf(stderr, "Backrefs and extent record evenly "
8506                                 "split on who is right, this is going to "
8507                                 "require user input to fix bytenr %Lu bytes "
8508                                 "%Lu\n", rec->start, rec->nr);
8509                         ret = -EINVAL;
8510                         goto out;
8511                 }
8512         }
8513
8514         /*
8515          * I don't think this can happen currently as we'll abort() if we catch
8516          * this case higher up, but in case somebody removes that we still can't
8517          * deal with it properly here yet, so just bail out of that's the case.
8518          */
8519         if (best->bytenr != rec->start) {
8520                 fprintf(stderr, "Extent start and backref starts don't match, "
8521                         "please use btrfs-image on this file system and send "
8522                         "it to a btrfs developer so they can make fsck fix "
8523                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8524                         rec->start, rec->nr);
8525                 ret = -EINVAL;
8526                 goto out;
8527         }
8528
8529         /*
8530          * Ok great we all agreed on an extent record, let's go find the real
8531          * references and fix up the ones that don't match.
8532          */
8533         list_for_each_entry(back, &rec->backrefs, list) {
8534                 if (back->full_backref || !back->is_data)
8535                         continue;
8536
8537                 dback = to_data_backref(back);
8538
8539                 /*
8540                  * Still ignoring backrefs that don't have a real ref attached
8541                  * to them.
8542                  */
8543                 if (dback->found_ref == 0)
8544                         continue;
8545
8546                 if (dback->bytes == best->bytes &&
8547                     dback->disk_bytenr == best->bytenr)
8548                         continue;
8549
8550                 ret = repair_ref(info, path, dback, best);
8551                 if (ret)
8552                         goto out;
8553         }
8554
8555         /*
8556          * Ok we messed with the actual refs, which means we need to drop our
8557          * entire cache and go back and rescan.  I know this is a huge pain and
8558          * adds a lot of extra work, but it's the only way to be safe.  Once all
8559          * the backrefs agree we may not need to do anything to the extent
8560          * record itself.
8561          */
8562         ret = -EAGAIN;
8563 out:
8564         while (!list_empty(&entries)) {
8565                 entry = list_entry(entries.next, struct extent_entry, list);
8566                 list_del_init(&entry->list);
8567                 free(entry);
8568         }
8569         return ret;
8570 }
8571
8572 static int process_duplicates(struct btrfs_root *root,
8573                               struct cache_tree *extent_cache,
8574                               struct extent_record *rec)
8575 {
8576         struct extent_record *good, *tmp;
8577         struct cache_extent *cache;
8578         int ret;
8579
8580         /*
8581          * If we found a extent record for this extent then return, or if we
8582          * have more than one duplicate we are likely going to need to delete
8583          * something.
8584          */
8585         if (rec->found_rec || rec->num_duplicates > 1)
8586                 return 0;
8587
8588         /* Shouldn't happen but just in case */
8589         BUG_ON(!rec->num_duplicates);
8590
8591         /*
8592          * So this happens if we end up with a backref that doesn't match the
8593          * actual extent entry.  So either the backref is bad or the extent
8594          * entry is bad.  Either way we want to have the extent_record actually
8595          * reflect what we found in the extent_tree, so we need to take the
8596          * duplicate out and use that as the extent_record since the only way we
8597          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8598          */
8599         remove_cache_extent(extent_cache, &rec->cache);
8600
8601         good = to_extent_record(rec->dups.next);
8602         list_del_init(&good->list);
8603         INIT_LIST_HEAD(&good->backrefs);
8604         INIT_LIST_HEAD(&good->dups);
8605         good->cache.start = good->start;
8606         good->cache.size = good->nr;
8607         good->content_checked = 0;
8608         good->owner_ref_checked = 0;
8609         good->num_duplicates = 0;
8610         good->refs = rec->refs;
8611         list_splice_init(&rec->backrefs, &good->backrefs);
8612         while (1) {
8613                 cache = lookup_cache_extent(extent_cache, good->start,
8614                                             good->nr);
8615                 if (!cache)
8616                         break;
8617                 tmp = container_of(cache, struct extent_record, cache);
8618
8619                 /*
8620                  * If we find another overlapping extent and it's found_rec is
8621                  * set then it's a duplicate and we need to try and delete
8622                  * something.
8623                  */
8624                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8625                         if (list_empty(&good->list))
8626                                 list_add_tail(&good->list,
8627                                               &duplicate_extents);
8628                         good->num_duplicates += tmp->num_duplicates + 1;
8629                         list_splice_init(&tmp->dups, &good->dups);
8630                         list_del_init(&tmp->list);
8631                         list_add_tail(&tmp->list, &good->dups);
8632                         remove_cache_extent(extent_cache, &tmp->cache);
8633                         continue;
8634                 }
8635
8636                 /*
8637                  * Ok we have another non extent item backed extent rec, so lets
8638                  * just add it to this extent and carry on like we did above.
8639                  */
8640                 good->refs += tmp->refs;
8641                 list_splice_init(&tmp->backrefs, &good->backrefs);
8642                 remove_cache_extent(extent_cache, &tmp->cache);
8643                 free(tmp);
8644         }
8645         ret = insert_cache_extent(extent_cache, &good->cache);
8646         BUG_ON(ret);
8647         free(rec);
8648         return good->num_duplicates ? 0 : 1;
8649 }
8650
8651 static int delete_duplicate_records(struct btrfs_root *root,
8652                                     struct extent_record *rec)
8653 {
8654         struct btrfs_trans_handle *trans;
8655         LIST_HEAD(delete_list);
8656         struct btrfs_path path;
8657         struct extent_record *tmp, *good, *n;
8658         int nr_del = 0;
8659         int ret = 0, err;
8660         struct btrfs_key key;
8661
8662         btrfs_init_path(&path);
8663
8664         good = rec;
8665         /* Find the record that covers all of the duplicates. */
8666         list_for_each_entry(tmp, &rec->dups, list) {
8667                 if (good->start < tmp->start)
8668                         continue;
8669                 if (good->nr > tmp->nr)
8670                         continue;
8671
8672                 if (tmp->start + tmp->nr < good->start + good->nr) {
8673                         fprintf(stderr, "Ok we have overlapping extents that "
8674                                 "aren't completely covered by each other, this "
8675                                 "is going to require more careful thought.  "
8676                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8677                                 tmp->start, tmp->nr, good->start, good->nr);
8678                         abort();
8679                 }
8680                 good = tmp;
8681         }
8682
8683         if (good != rec)
8684                 list_add_tail(&rec->list, &delete_list);
8685
8686         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8687                 if (tmp == good)
8688                         continue;
8689                 list_move_tail(&tmp->list, &delete_list);
8690         }
8691
8692         root = root->fs_info->extent_root;
8693         trans = btrfs_start_transaction(root, 1);
8694         if (IS_ERR(trans)) {
8695                 ret = PTR_ERR(trans);
8696                 goto out;
8697         }
8698
8699         list_for_each_entry(tmp, &delete_list, list) {
8700                 if (tmp->found_rec == 0)
8701                         continue;
8702                 key.objectid = tmp->start;
8703                 key.type = BTRFS_EXTENT_ITEM_KEY;
8704                 key.offset = tmp->nr;
8705
8706                 /* Shouldn't happen but just in case */
8707                 if (tmp->metadata) {
8708                         fprintf(stderr, "Well this shouldn't happen, extent "
8709                                 "record overlaps but is metadata? "
8710                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8711                         abort();
8712                 }
8713
8714                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8715                 if (ret) {
8716                         if (ret > 0)
8717                                 ret = -EINVAL;
8718                         break;
8719                 }
8720                 ret = btrfs_del_item(trans, root, &path);
8721                 if (ret)
8722                         break;
8723                 btrfs_release_path(&path);
8724                 nr_del++;
8725         }
8726         err = btrfs_commit_transaction(trans, root);
8727         if (err && !ret)
8728                 ret = err;
8729 out:
8730         while (!list_empty(&delete_list)) {
8731                 tmp = to_extent_record(delete_list.next);
8732                 list_del_init(&tmp->list);
8733                 if (tmp == rec)
8734                         continue;
8735                 free(tmp);
8736         }
8737
8738         while (!list_empty(&rec->dups)) {
8739                 tmp = to_extent_record(rec->dups.next);
8740                 list_del_init(&tmp->list);
8741                 free(tmp);
8742         }
8743
8744         btrfs_release_path(&path);
8745
8746         if (!ret && !nr_del)
8747                 rec->num_duplicates = 0;
8748
8749         return ret ? ret : nr_del;
8750 }
8751
8752 static int find_possible_backrefs(struct btrfs_fs_info *info,
8753                                   struct btrfs_path *path,
8754                                   struct cache_tree *extent_cache,
8755                                   struct extent_record *rec)
8756 {
8757         struct btrfs_root *root;
8758         struct extent_backref *back;
8759         struct data_backref *dback;
8760         struct cache_extent *cache;
8761         struct btrfs_file_extent_item *fi;
8762         struct btrfs_key key;
8763         u64 bytenr, bytes;
8764         int ret;
8765
8766         list_for_each_entry(back, &rec->backrefs, list) {
8767                 /* Don't care about full backrefs (poor unloved backrefs) */
8768                 if (back->full_backref || !back->is_data)
8769                         continue;
8770
8771                 dback = to_data_backref(back);
8772
8773                 /* We found this one, we don't need to do a lookup */
8774                 if (dback->found_ref)
8775                         continue;
8776
8777                 key.objectid = dback->root;
8778                 key.type = BTRFS_ROOT_ITEM_KEY;
8779                 key.offset = (u64)-1;
8780
8781                 root = btrfs_read_fs_root(info, &key);
8782
8783                 /* No root, definitely a bad ref, skip */
8784                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8785                         continue;
8786                 /* Other err, exit */
8787                 if (IS_ERR(root))
8788                         return PTR_ERR(root);
8789
8790                 key.objectid = dback->owner;
8791                 key.type = BTRFS_EXTENT_DATA_KEY;
8792                 key.offset = dback->offset;
8793                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8794                 if (ret) {
8795                         btrfs_release_path(path);
8796                         if (ret < 0)
8797                                 return ret;
8798                         /* Didn't find it, we can carry on */
8799                         ret = 0;
8800                         continue;
8801                 }
8802
8803                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8804                                     struct btrfs_file_extent_item);
8805                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8806                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8807                 btrfs_release_path(path);
8808                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8809                 if (cache) {
8810                         struct extent_record *tmp;
8811                         tmp = container_of(cache, struct extent_record, cache);
8812
8813                         /*
8814                          * If we found an extent record for the bytenr for this
8815                          * particular backref then we can't add it to our
8816                          * current extent record.  We only want to add backrefs
8817                          * that don't have a corresponding extent item in the
8818                          * extent tree since they likely belong to this record
8819                          * and we need to fix it if it doesn't match bytenrs.
8820                          */
8821                         if  (tmp->found_rec)
8822                                 continue;
8823                 }
8824
8825                 dback->found_ref += 1;
8826                 dback->disk_bytenr = bytenr;
8827                 dback->bytes = bytes;
8828
8829                 /*
8830                  * Set this so the verify backref code knows not to trust the
8831                  * values in this backref.
8832                  */
8833                 back->broken = 1;
8834         }
8835
8836         return 0;
8837 }
8838
8839 /*
8840  * Record orphan data ref into corresponding root.
8841  *
8842  * Return 0 if the extent item contains data ref and recorded.
8843  * Return 1 if the extent item contains no useful data ref
8844  *   On that case, it may contains only shared_dataref or metadata backref
8845  *   or the file extent exists(this should be handled by the extent bytenr
8846  *   recovery routine)
8847  * Return <0 if something goes wrong.
8848  */
8849 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8850                                       struct extent_record *rec)
8851 {
8852         struct btrfs_key key;
8853         struct btrfs_root *dest_root;
8854         struct extent_backref *back;
8855         struct data_backref *dback;
8856         struct orphan_data_extent *orphan;
8857         struct btrfs_path path;
8858         int recorded_data_ref = 0;
8859         int ret = 0;
8860
8861         if (rec->metadata)
8862                 return 1;
8863         btrfs_init_path(&path);
8864         list_for_each_entry(back, &rec->backrefs, list) {
8865                 if (back->full_backref || !back->is_data ||
8866                     !back->found_extent_tree)
8867                         continue;
8868                 dback = to_data_backref(back);
8869                 if (dback->found_ref)
8870                         continue;
8871                 key.objectid = dback->root;
8872                 key.type = BTRFS_ROOT_ITEM_KEY;
8873                 key.offset = (u64)-1;
8874
8875                 dest_root = btrfs_read_fs_root(fs_info, &key);
8876
8877                 /* For non-exist root we just skip it */
8878                 if (IS_ERR(dest_root) || !dest_root)
8879                         continue;
8880
8881                 key.objectid = dback->owner;
8882                 key.type = BTRFS_EXTENT_DATA_KEY;
8883                 key.offset = dback->offset;
8884
8885                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8886                 btrfs_release_path(&path);
8887                 /*
8888                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8889                  * we need to record it for inode/file extent rebuild.
8890                  * For ret > 0, we record it only for file extent rebuild.
8891                  * For ret == 0, the file extent exists but only bytenr
8892                  * mismatch, let the original bytenr fix routine to handle,
8893                  * don't record it.
8894                  */
8895                 if (ret == 0)
8896                         continue;
8897                 ret = 0;
8898                 orphan = malloc(sizeof(*orphan));
8899                 if (!orphan) {
8900                         ret = -ENOMEM;
8901                         goto out;
8902                 }
8903                 INIT_LIST_HEAD(&orphan->list);
8904                 orphan->root = dback->root;
8905                 orphan->objectid = dback->owner;
8906                 orphan->offset = dback->offset;
8907                 orphan->disk_bytenr = rec->cache.start;
8908                 orphan->disk_len = rec->cache.size;
8909                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8910                 recorded_data_ref = 1;
8911         }
8912 out:
8913         btrfs_release_path(&path);
8914         if (!ret)
8915                 return !recorded_data_ref;
8916         else
8917                 return ret;
8918 }
8919
8920 /*
8921  * when an incorrect extent item is found, this will delete
8922  * all of the existing entries for it and recreate them
8923  * based on what the tree scan found.
8924  */
8925 static int fixup_extent_refs(struct btrfs_fs_info *info,
8926                              struct cache_tree *extent_cache,
8927                              struct extent_record *rec)
8928 {
8929         struct btrfs_trans_handle *trans = NULL;
8930         int ret;
8931         struct btrfs_path path;
8932         struct list_head *cur = rec->backrefs.next;
8933         struct cache_extent *cache;
8934         struct extent_backref *back;
8935         int allocated = 0;
8936         u64 flags = 0;
8937
8938         if (rec->flag_block_full_backref)
8939                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8940
8941         btrfs_init_path(&path);
8942         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8943                 /*
8944                  * Sometimes the backrefs themselves are so broken they don't
8945                  * get attached to any meaningful rec, so first go back and
8946                  * check any of our backrefs that we couldn't find and throw
8947                  * them into the list if we find the backref so that
8948                  * verify_backrefs can figure out what to do.
8949                  */
8950                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8951                 if (ret < 0)
8952                         goto out;
8953         }
8954
8955         /* step one, make sure all of the backrefs agree */
8956         ret = verify_backrefs(info, &path, rec);
8957         if (ret < 0)
8958                 goto out;
8959
8960         trans = btrfs_start_transaction(info->extent_root, 1);
8961         if (IS_ERR(trans)) {
8962                 ret = PTR_ERR(trans);
8963                 goto out;
8964         }
8965
8966         /* step two, delete all the existing records */
8967         ret = delete_extent_records(trans, info->extent_root, &path,
8968                                     rec->start, rec->max_size);
8969
8970         if (ret < 0)
8971                 goto out;
8972
8973         /* was this block corrupt?  If so, don't add references to it */
8974         cache = lookup_cache_extent(info->corrupt_blocks,
8975                                     rec->start, rec->max_size);
8976         if (cache) {
8977                 ret = 0;
8978                 goto out;
8979         }
8980
8981         /* step three, recreate all the refs we did find */
8982         while(cur != &rec->backrefs) {
8983                 back = to_extent_backref(cur);
8984                 cur = cur->next;
8985
8986                 /*
8987                  * if we didn't find any references, don't create a
8988                  * new extent record
8989                  */
8990                 if (!back->found_ref)
8991                         continue;
8992
8993                 rec->bad_full_backref = 0;
8994                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
8995                 allocated = 1;
8996
8997                 if (ret)
8998                         goto out;
8999         }
9000 out:
9001         if (trans) {
9002                 int err = btrfs_commit_transaction(trans, info->extent_root);
9003                 if (!ret)
9004                         ret = err;
9005         }
9006
9007         btrfs_release_path(&path);
9008         return ret;
9009 }
9010
9011 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9012                               struct extent_record *rec)
9013 {
9014         struct btrfs_trans_handle *trans;
9015         struct btrfs_root *root = fs_info->extent_root;
9016         struct btrfs_path path;
9017         struct btrfs_extent_item *ei;
9018         struct btrfs_key key;
9019         u64 flags;
9020         int ret = 0;
9021
9022         key.objectid = rec->start;
9023         if (rec->metadata) {
9024                 key.type = BTRFS_METADATA_ITEM_KEY;
9025                 key.offset = rec->info_level;
9026         } else {
9027                 key.type = BTRFS_EXTENT_ITEM_KEY;
9028                 key.offset = rec->max_size;
9029         }
9030
9031         trans = btrfs_start_transaction(root, 0);
9032         if (IS_ERR(trans))
9033                 return PTR_ERR(trans);
9034
9035         btrfs_init_path(&path);
9036         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9037         if (ret < 0) {
9038                 btrfs_release_path(&path);
9039                 btrfs_commit_transaction(trans, root);
9040                 return ret;
9041         } else if (ret) {
9042                 fprintf(stderr, "Didn't find extent for %llu\n",
9043                         (unsigned long long)rec->start);
9044                 btrfs_release_path(&path);
9045                 btrfs_commit_transaction(trans, root);
9046                 return -ENOENT;
9047         }
9048
9049         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9050                             struct btrfs_extent_item);
9051         flags = btrfs_extent_flags(path.nodes[0], ei);
9052         if (rec->flag_block_full_backref) {
9053                 fprintf(stderr, "setting full backref on %llu\n",
9054                         (unsigned long long)key.objectid);
9055                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9056         } else {
9057                 fprintf(stderr, "clearing full backref on %llu\n",
9058                         (unsigned long long)key.objectid);
9059                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9060         }
9061         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9062         btrfs_mark_buffer_dirty(path.nodes[0]);
9063         btrfs_release_path(&path);
9064         return btrfs_commit_transaction(trans, root);
9065 }
9066
9067 /* right now we only prune from the extent allocation tree */
9068 static int prune_one_block(struct btrfs_trans_handle *trans,
9069                            struct btrfs_fs_info *info,
9070                            struct btrfs_corrupt_block *corrupt)
9071 {
9072         int ret;
9073         struct btrfs_path path;
9074         struct extent_buffer *eb;
9075         u64 found;
9076         int slot;
9077         int nritems;
9078         int level = corrupt->level + 1;
9079
9080         btrfs_init_path(&path);
9081 again:
9082         /* we want to stop at the parent to our busted block */
9083         path.lowest_level = level;
9084
9085         ret = btrfs_search_slot(trans, info->extent_root,
9086                                 &corrupt->key, &path, -1, 1);
9087
9088         if (ret < 0)
9089                 goto out;
9090
9091         eb = path.nodes[level];
9092         if (!eb) {
9093                 ret = -ENOENT;
9094                 goto out;
9095         }
9096
9097         /*
9098          * hopefully the search gave us the block we want to prune,
9099          * lets try that first
9100          */
9101         slot = path.slots[level];
9102         found =  btrfs_node_blockptr(eb, slot);
9103         if (found == corrupt->cache.start)
9104                 goto del_ptr;
9105
9106         nritems = btrfs_header_nritems(eb);
9107
9108         /* the search failed, lets scan this node and hope we find it */
9109         for (slot = 0; slot < nritems; slot++) {
9110                 found =  btrfs_node_blockptr(eb, slot);
9111                 if (found == corrupt->cache.start)
9112                         goto del_ptr;
9113         }
9114         /*
9115          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9116          * to this block
9117          */
9118         if (eb == info->extent_root->node) {
9119                 ret = -ENOENT;
9120                 goto out;
9121         } else {
9122                 level++;
9123                 btrfs_release_path(&path);
9124                 goto again;
9125         }
9126
9127 del_ptr:
9128         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9129         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
9130
9131 out:
9132         btrfs_release_path(&path);
9133         return ret;
9134 }
9135
9136 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9137 {
9138         struct btrfs_trans_handle *trans = NULL;
9139         struct cache_extent *cache;
9140         struct btrfs_corrupt_block *corrupt;
9141
9142         while (1) {
9143                 cache = search_cache_extent(info->corrupt_blocks, 0);
9144                 if (!cache)
9145                         break;
9146                 if (!trans) {
9147                         trans = btrfs_start_transaction(info->extent_root, 1);
9148                         if (IS_ERR(trans))
9149                                 return PTR_ERR(trans);
9150                 }
9151                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9152                 prune_one_block(trans, info, corrupt);
9153                 remove_cache_extent(info->corrupt_blocks, cache);
9154         }
9155         if (trans)
9156                 return btrfs_commit_transaction(trans, info->extent_root);
9157         return 0;
9158 }
9159
9160 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9161 {
9162         struct btrfs_block_group_cache *cache;
9163         u64 start, end;
9164         int ret;
9165
9166         while (1) {
9167                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9168                                             &start, &end, EXTENT_DIRTY);
9169                 if (ret)
9170                         break;
9171                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
9172                                    GFP_NOFS);
9173         }
9174
9175         start = 0;
9176         while (1) {
9177                 cache = btrfs_lookup_first_block_group(fs_info, start);
9178                 if (!cache)
9179                         break;
9180                 if (cache->cached)
9181                         cache->cached = 0;
9182                 start = cache->key.objectid + cache->key.offset;
9183         }
9184 }
9185
9186 static int check_extent_refs(struct btrfs_root *root,
9187                              struct cache_tree *extent_cache)
9188 {
9189         struct extent_record *rec;
9190         struct cache_extent *cache;
9191         int err = 0;
9192         int ret = 0;
9193         int fixed = 0;
9194         int had_dups = 0;
9195         int recorded = 0;
9196
9197         if (repair) {
9198                 /*
9199                  * if we're doing a repair, we have to make sure
9200                  * we don't allocate from the problem extents.
9201                  * In the worst case, this will be all the
9202                  * extents in the FS
9203                  */
9204                 cache = search_cache_extent(extent_cache, 0);
9205                 while(cache) {
9206                         rec = container_of(cache, struct extent_record, cache);
9207                         set_extent_dirty(root->fs_info->excluded_extents,
9208                                          rec->start,
9209                                          rec->start + rec->max_size - 1,
9210                                          GFP_NOFS);
9211                         cache = next_cache_extent(cache);
9212                 }
9213
9214                 /* pin down all the corrupted blocks too */
9215                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9216                 while(cache) {
9217                         set_extent_dirty(root->fs_info->excluded_extents,
9218                                          cache->start,
9219                                          cache->start + cache->size - 1,
9220                                          GFP_NOFS);
9221                         cache = next_cache_extent(cache);
9222                 }
9223                 prune_corrupt_blocks(root->fs_info);
9224                 reset_cached_block_groups(root->fs_info);
9225         }
9226
9227         reset_cached_block_groups(root->fs_info);
9228
9229         /*
9230          * We need to delete any duplicate entries we find first otherwise we
9231          * could mess up the extent tree when we have backrefs that actually
9232          * belong to a different extent item and not the weird duplicate one.
9233          */
9234         while (repair && !list_empty(&duplicate_extents)) {
9235                 rec = to_extent_record(duplicate_extents.next);
9236                 list_del_init(&rec->list);
9237
9238                 /* Sometimes we can find a backref before we find an actual
9239                  * extent, so we need to process it a little bit to see if there
9240                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9241                  * if this is a backref screwup.  If we need to delete stuff
9242                  * process_duplicates() will return 0, otherwise it will return
9243                  * 1 and we
9244                  */
9245                 if (process_duplicates(root, extent_cache, rec))
9246                         continue;
9247                 ret = delete_duplicate_records(root, rec);
9248                 if (ret < 0)
9249                         return ret;
9250                 /*
9251                  * delete_duplicate_records will return the number of entries
9252                  * deleted, so if it's greater than 0 then we know we actually
9253                  * did something and we need to remove.
9254                  */
9255                 if (ret)
9256                         had_dups = 1;
9257         }
9258
9259         if (had_dups)
9260                 return -EAGAIN;
9261
9262         while(1) {
9263                 int cur_err = 0;
9264
9265                 fixed = 0;
9266                 recorded = 0;
9267                 cache = search_cache_extent(extent_cache, 0);
9268                 if (!cache)
9269                         break;
9270                 rec = container_of(cache, struct extent_record, cache);
9271                 if (rec->num_duplicates) {
9272                         fprintf(stderr, "extent item %llu has multiple extent "
9273                                 "items\n", (unsigned long long)rec->start);
9274                         err = 1;
9275                         cur_err = 1;
9276                 }
9277
9278                 if (rec->refs != rec->extent_item_refs) {
9279                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9280                                 (unsigned long long)rec->start,
9281                                 (unsigned long long)rec->nr);
9282                         fprintf(stderr, "extent item %llu, found %llu\n",
9283                                 (unsigned long long)rec->extent_item_refs,
9284                                 (unsigned long long)rec->refs);
9285                         ret = record_orphan_data_extents(root->fs_info, rec);
9286                         if (ret < 0)
9287                                 goto repair_abort;
9288                         if (ret == 0) {
9289                                 recorded = 1;
9290                         } else {
9291                                 /*
9292                                  * we can't use the extent to repair file
9293                                  * extent, let the fallback method handle it.
9294                                  */
9295                                 if (!fixed && repair) {
9296                                         ret = fixup_extent_refs(
9297                                                         root->fs_info,
9298                                                         extent_cache, rec);
9299                                         if (ret)
9300                                                 goto repair_abort;
9301                                         fixed = 1;
9302                                 }
9303                         }
9304                         err = 1;
9305                         cur_err = 1;
9306                 }
9307                 if (all_backpointers_checked(rec, 1)) {
9308                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9309                                 (unsigned long long)rec->start,
9310                                 (unsigned long long)rec->nr);
9311
9312                         if (!fixed && !recorded && repair) {
9313                                 ret = fixup_extent_refs(root->fs_info,
9314                                                         extent_cache, rec);
9315                                 if (ret)
9316                                         goto repair_abort;
9317                                 fixed = 1;
9318                         }
9319                         cur_err = 1;
9320                         err = 1;
9321                 }
9322                 if (!rec->owner_ref_checked) {
9323                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9324                                 (unsigned long long)rec->start,
9325                                 (unsigned long long)rec->nr);
9326                         if (!fixed && !recorded && repair) {
9327                                 ret = fixup_extent_refs(root->fs_info,
9328                                                         extent_cache, rec);
9329                                 if (ret)
9330                                         goto repair_abort;
9331                                 fixed = 1;
9332                         }
9333                         err = 1;
9334                         cur_err = 1;
9335                 }
9336                 if (rec->bad_full_backref) {
9337                         fprintf(stderr, "bad full backref, on [%llu]\n",
9338                                 (unsigned long long)rec->start);
9339                         if (repair) {
9340                                 ret = fixup_extent_flags(root->fs_info, rec);
9341                                 if (ret)
9342                                         goto repair_abort;
9343                                 fixed = 1;
9344                         }
9345                         err = 1;
9346                         cur_err = 1;
9347                 }
9348                 /*
9349                  * Although it's not a extent ref's problem, we reuse this
9350                  * routine for error reporting.
9351                  * No repair function yet.
9352                  */
9353                 if (rec->crossing_stripes) {
9354                         fprintf(stderr,
9355                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9356                                 rec->start, rec->start + rec->max_size);
9357                         err = 1;
9358                         cur_err = 1;
9359                 }
9360
9361                 if (rec->wrong_chunk_type) {
9362                         fprintf(stderr,
9363                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9364                                 rec->start, rec->start + rec->max_size);
9365                         err = 1;
9366                         cur_err = 1;
9367                 }
9368
9369                 remove_cache_extent(extent_cache, cache);
9370                 free_all_extent_backrefs(rec);
9371                 if (!init_extent_tree && repair && (!cur_err || fixed))
9372                         clear_extent_dirty(root->fs_info->excluded_extents,
9373                                            rec->start,
9374                                            rec->start + rec->max_size - 1,
9375                                            GFP_NOFS);
9376                 free(rec);
9377         }
9378 repair_abort:
9379         if (repair) {
9380                 if (ret && ret != -EAGAIN) {
9381                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9382                         exit(1);
9383                 } else if (!ret) {
9384                         struct btrfs_trans_handle *trans;
9385
9386                         root = root->fs_info->extent_root;
9387                         trans = btrfs_start_transaction(root, 1);
9388                         if (IS_ERR(trans)) {
9389                                 ret = PTR_ERR(trans);
9390                                 goto repair_abort;
9391                         }
9392
9393                         btrfs_fix_block_accounting(trans, root);
9394                         ret = btrfs_commit_transaction(trans, root);
9395                         if (ret)
9396                                 goto repair_abort;
9397                 }
9398                 if (err)
9399                         fprintf(stderr, "repaired damaged extent references\n");
9400                 return ret;
9401         }
9402         return err;
9403 }
9404
9405 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9406 {
9407         u64 stripe_size;
9408
9409         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9410                 stripe_size = length;
9411                 stripe_size /= num_stripes;
9412         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9413                 stripe_size = length * 2;
9414                 stripe_size /= num_stripes;
9415         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9416                 stripe_size = length;
9417                 stripe_size /= (num_stripes - 1);
9418         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9419                 stripe_size = length;
9420                 stripe_size /= (num_stripes - 2);
9421         } else {
9422                 stripe_size = length;
9423         }
9424         return stripe_size;
9425 }
9426
9427 /*
9428  * Check the chunk with its block group/dev list ref:
9429  * Return 0 if all refs seems valid.
9430  * Return 1 if part of refs seems valid, need later check for rebuild ref
9431  * like missing block group and needs to search extent tree to rebuild them.
9432  * Return -1 if essential refs are missing and unable to rebuild.
9433  */
9434 static int check_chunk_refs(struct chunk_record *chunk_rec,
9435                             struct block_group_tree *block_group_cache,
9436                             struct device_extent_tree *dev_extent_cache,
9437                             int silent)
9438 {
9439         struct cache_extent *block_group_item;
9440         struct block_group_record *block_group_rec;
9441         struct cache_extent *dev_extent_item;
9442         struct device_extent_record *dev_extent_rec;
9443         u64 devid;
9444         u64 offset;
9445         u64 length;
9446         int metadump_v2 = 0;
9447         int i;
9448         int ret = 0;
9449
9450         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9451                                                chunk_rec->offset,
9452                                                chunk_rec->length);
9453         if (block_group_item) {
9454                 block_group_rec = container_of(block_group_item,
9455                                                struct block_group_record,
9456                                                cache);
9457                 if (chunk_rec->length != block_group_rec->offset ||
9458                     chunk_rec->offset != block_group_rec->objectid ||
9459                     (!metadump_v2 &&
9460                      chunk_rec->type_flags != block_group_rec->flags)) {
9461                         if (!silent)
9462                                 fprintf(stderr,
9463                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9464                                         chunk_rec->objectid,
9465                                         chunk_rec->type,
9466                                         chunk_rec->offset,
9467                                         chunk_rec->length,
9468                                         chunk_rec->offset,
9469                                         chunk_rec->type_flags,
9470                                         block_group_rec->objectid,
9471                                         block_group_rec->type,
9472                                         block_group_rec->offset,
9473                                         block_group_rec->offset,
9474                                         block_group_rec->objectid,
9475                                         block_group_rec->flags);
9476                         ret = -1;
9477                 } else {
9478                         list_del_init(&block_group_rec->list);
9479                         chunk_rec->bg_rec = block_group_rec;
9480                 }
9481         } else {
9482                 if (!silent)
9483                         fprintf(stderr,
9484                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9485                                 chunk_rec->objectid,
9486                                 chunk_rec->type,
9487                                 chunk_rec->offset,
9488                                 chunk_rec->length,
9489                                 chunk_rec->offset,
9490                                 chunk_rec->type_flags);
9491                 ret = 1;
9492         }
9493
9494         if (metadump_v2)
9495                 return ret;
9496
9497         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9498                                     chunk_rec->num_stripes);
9499         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9500                 devid = chunk_rec->stripes[i].devid;
9501                 offset = chunk_rec->stripes[i].offset;
9502                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9503                                                        devid, offset, length);
9504                 if (dev_extent_item) {
9505                         dev_extent_rec = container_of(dev_extent_item,
9506                                                 struct device_extent_record,
9507                                                 cache);
9508                         if (dev_extent_rec->objectid != devid ||
9509                             dev_extent_rec->offset != offset ||
9510                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9511                             dev_extent_rec->length != length) {
9512                                 if (!silent)
9513                                         fprintf(stderr,
9514                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9515                                                 chunk_rec->objectid,
9516                                                 chunk_rec->type,
9517                                                 chunk_rec->offset,
9518                                                 chunk_rec->stripes[i].devid,
9519                                                 chunk_rec->stripes[i].offset,
9520                                                 dev_extent_rec->objectid,
9521                                                 dev_extent_rec->offset,
9522                                                 dev_extent_rec->length);
9523                                 ret = -1;
9524                         } else {
9525                                 list_move(&dev_extent_rec->chunk_list,
9526                                           &chunk_rec->dextents);
9527                         }
9528                 } else {
9529                         if (!silent)
9530                                 fprintf(stderr,
9531                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9532                                         chunk_rec->objectid,
9533                                         chunk_rec->type,
9534                                         chunk_rec->offset,
9535                                         chunk_rec->stripes[i].devid,
9536                                         chunk_rec->stripes[i].offset);
9537                         ret = -1;
9538                 }
9539         }
9540         return ret;
9541 }
9542
9543 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9544 int check_chunks(struct cache_tree *chunk_cache,
9545                  struct block_group_tree *block_group_cache,
9546                  struct device_extent_tree *dev_extent_cache,
9547                  struct list_head *good, struct list_head *bad,
9548                  struct list_head *rebuild, int silent)
9549 {
9550         struct cache_extent *chunk_item;
9551         struct chunk_record *chunk_rec;
9552         struct block_group_record *bg_rec;
9553         struct device_extent_record *dext_rec;
9554         int err;
9555         int ret = 0;
9556
9557         chunk_item = first_cache_extent(chunk_cache);
9558         while (chunk_item) {
9559                 chunk_rec = container_of(chunk_item, struct chunk_record,
9560                                          cache);
9561                 err = check_chunk_refs(chunk_rec, block_group_cache,
9562                                        dev_extent_cache, silent);
9563                 if (err < 0)
9564                         ret = err;
9565                 if (err == 0 && good)
9566                         list_add_tail(&chunk_rec->list, good);
9567                 if (err > 0 && rebuild)
9568                         list_add_tail(&chunk_rec->list, rebuild);
9569                 if (err < 0 && bad)
9570                         list_add_tail(&chunk_rec->list, bad);
9571                 chunk_item = next_cache_extent(chunk_item);
9572         }
9573
9574         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9575                 if (!silent)
9576                         fprintf(stderr,
9577                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9578                                 bg_rec->objectid,
9579                                 bg_rec->offset,
9580                                 bg_rec->flags);
9581                 if (!ret)
9582                         ret = 1;
9583         }
9584
9585         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9586                             chunk_list) {
9587                 if (!silent)
9588                         fprintf(stderr,
9589                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9590                                 dext_rec->objectid,
9591                                 dext_rec->offset,
9592                                 dext_rec->length);
9593                 if (!ret)
9594                         ret = 1;
9595         }
9596         return ret;
9597 }
9598
9599
9600 static int check_device_used(struct device_record *dev_rec,
9601                              struct device_extent_tree *dext_cache)
9602 {
9603         struct cache_extent *cache;
9604         struct device_extent_record *dev_extent_rec;
9605         u64 total_byte = 0;
9606
9607         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9608         while (cache) {
9609                 dev_extent_rec = container_of(cache,
9610                                               struct device_extent_record,
9611                                               cache);
9612                 if (dev_extent_rec->objectid != dev_rec->devid)
9613                         break;
9614
9615                 list_del_init(&dev_extent_rec->device_list);
9616                 total_byte += dev_extent_rec->length;
9617                 cache = next_cache_extent(cache);
9618         }
9619
9620         if (total_byte != dev_rec->byte_used) {
9621                 fprintf(stderr,
9622                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9623                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9624                         dev_rec->type, dev_rec->offset);
9625                 return -1;
9626         } else {
9627                 return 0;
9628         }
9629 }
9630
9631 /* check btrfs_dev_item -> btrfs_dev_extent */
9632 static int check_devices(struct rb_root *dev_cache,
9633                          struct device_extent_tree *dev_extent_cache)
9634 {
9635         struct rb_node *dev_node;
9636         struct device_record *dev_rec;
9637         struct device_extent_record *dext_rec;
9638         int err;
9639         int ret = 0;
9640
9641         dev_node = rb_first(dev_cache);
9642         while (dev_node) {
9643                 dev_rec = container_of(dev_node, struct device_record, node);
9644                 err = check_device_used(dev_rec, dev_extent_cache);
9645                 if (err)
9646                         ret = err;
9647
9648                 dev_node = rb_next(dev_node);
9649         }
9650         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9651                             device_list) {
9652                 fprintf(stderr,
9653                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9654                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9655                 if (!ret)
9656                         ret = 1;
9657         }
9658         return ret;
9659 }
9660
9661 static int add_root_item_to_list(struct list_head *head,
9662                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9663                                   u8 level, u8 drop_level,
9664                                   int level_size, struct btrfs_key *drop_key)
9665 {
9666
9667         struct root_item_record *ri_rec;
9668         ri_rec = malloc(sizeof(*ri_rec));
9669         if (!ri_rec)
9670                 return -ENOMEM;
9671         ri_rec->bytenr = bytenr;
9672         ri_rec->objectid = objectid;
9673         ri_rec->level = level;
9674         ri_rec->level_size = level_size;
9675         ri_rec->drop_level = drop_level;
9676         ri_rec->last_snapshot = last_snapshot;
9677         if (drop_key)
9678                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9679         list_add_tail(&ri_rec->list, head);
9680
9681         return 0;
9682 }
9683
9684 static void free_root_item_list(struct list_head *list)
9685 {
9686         struct root_item_record *ri_rec;
9687
9688         while (!list_empty(list)) {
9689                 ri_rec = list_first_entry(list, struct root_item_record,
9690                                           list);
9691                 list_del_init(&ri_rec->list);
9692                 free(ri_rec);
9693         }
9694 }
9695
9696 static int deal_root_from_list(struct list_head *list,
9697                                struct btrfs_root *root,
9698                                struct block_info *bits,
9699                                int bits_nr,
9700                                struct cache_tree *pending,
9701                                struct cache_tree *seen,
9702                                struct cache_tree *reada,
9703                                struct cache_tree *nodes,
9704                                struct cache_tree *extent_cache,
9705                                struct cache_tree *chunk_cache,
9706                                struct rb_root *dev_cache,
9707                                struct block_group_tree *block_group_cache,
9708                                struct device_extent_tree *dev_extent_cache)
9709 {
9710         int ret = 0;
9711         u64 last;
9712
9713         while (!list_empty(list)) {
9714                 struct root_item_record *rec;
9715                 struct extent_buffer *buf;
9716                 rec = list_entry(list->next,
9717                                  struct root_item_record, list);
9718                 last = 0;
9719                 buf = read_tree_block(root->fs_info->tree_root,
9720                                       rec->bytenr, rec->level_size, 0);
9721                 if (!extent_buffer_uptodate(buf)) {
9722                         free_extent_buffer(buf);
9723                         ret = -EIO;
9724                         break;
9725                 }
9726                 ret = add_root_to_pending(buf, extent_cache, pending,
9727                                     seen, nodes, rec->objectid);
9728                 if (ret < 0)
9729                         break;
9730                 /*
9731                  * To rebuild extent tree, we need deal with snapshot
9732                  * one by one, otherwise we deal with node firstly which
9733                  * can maximize readahead.
9734                  */
9735                 while (1) {
9736                         ret = run_next_block(root, bits, bits_nr, &last,
9737                                              pending, seen, reada, nodes,
9738                                              extent_cache, chunk_cache,
9739                                              dev_cache, block_group_cache,
9740                                              dev_extent_cache, rec);
9741                         if (ret != 0)
9742                                 break;
9743                 }
9744                 free_extent_buffer(buf);
9745                 list_del(&rec->list);
9746                 free(rec);
9747                 if (ret < 0)
9748                         break;
9749         }
9750         while (ret >= 0) {
9751                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9752                                      reada, nodes, extent_cache, chunk_cache,
9753                                      dev_cache, block_group_cache,
9754                                      dev_extent_cache, NULL);
9755                 if (ret != 0) {
9756                         if (ret > 0)
9757                                 ret = 0;
9758                         break;
9759                 }
9760         }
9761         return ret;
9762 }
9763
9764 static int check_chunks_and_extents(struct btrfs_root *root)
9765 {
9766         struct rb_root dev_cache;
9767         struct cache_tree chunk_cache;
9768         struct block_group_tree block_group_cache;
9769         struct device_extent_tree dev_extent_cache;
9770         struct cache_tree extent_cache;
9771         struct cache_tree seen;
9772         struct cache_tree pending;
9773         struct cache_tree reada;
9774         struct cache_tree nodes;
9775         struct extent_io_tree excluded_extents;
9776         struct cache_tree corrupt_blocks;
9777         struct btrfs_path path;
9778         struct btrfs_key key;
9779         struct btrfs_key found_key;
9780         int ret, err = 0;
9781         struct block_info *bits;
9782         int bits_nr;
9783         struct extent_buffer *leaf;
9784         int slot;
9785         struct btrfs_root_item ri;
9786         struct list_head dropping_trees;
9787         struct list_head normal_trees;
9788         struct btrfs_root *root1;
9789         u64 objectid;
9790         u32 level_size;
9791         u8 level;
9792
9793         dev_cache = RB_ROOT;
9794         cache_tree_init(&chunk_cache);
9795         block_group_tree_init(&block_group_cache);
9796         device_extent_tree_init(&dev_extent_cache);
9797
9798         cache_tree_init(&extent_cache);
9799         cache_tree_init(&seen);
9800         cache_tree_init(&pending);
9801         cache_tree_init(&nodes);
9802         cache_tree_init(&reada);
9803         cache_tree_init(&corrupt_blocks);
9804         extent_io_tree_init(&excluded_extents);
9805         INIT_LIST_HEAD(&dropping_trees);
9806         INIT_LIST_HEAD(&normal_trees);
9807
9808         if (repair) {
9809                 root->fs_info->excluded_extents = &excluded_extents;
9810                 root->fs_info->fsck_extent_cache = &extent_cache;
9811                 root->fs_info->free_extent_hook = free_extent_hook;
9812                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9813         }
9814
9815         bits_nr = 1024;
9816         bits = malloc(bits_nr * sizeof(struct block_info));
9817         if (!bits) {
9818                 perror("malloc");
9819                 exit(1);
9820         }
9821
9822         if (ctx.progress_enabled) {
9823                 ctx.tp = TASK_EXTENTS;
9824                 task_start(ctx.info);
9825         }
9826
9827 again:
9828         root1 = root->fs_info->tree_root;
9829         level = btrfs_header_level(root1->node);
9830         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9831                                     root1->node->start, 0, level, 0,
9832                                     root1->nodesize, NULL);
9833         if (ret < 0)
9834                 goto out;
9835         root1 = root->fs_info->chunk_root;
9836         level = btrfs_header_level(root1->node);
9837         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9838                                     root1->node->start, 0, level, 0,
9839                                     root1->nodesize, NULL);
9840         if (ret < 0)
9841                 goto out;
9842         btrfs_init_path(&path);
9843         key.offset = 0;
9844         key.objectid = 0;
9845         key.type = BTRFS_ROOT_ITEM_KEY;
9846         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9847                                         &key, &path, 0, 0);
9848         if (ret < 0)
9849                 goto out;
9850         while(1) {
9851                 leaf = path.nodes[0];
9852                 slot = path.slots[0];
9853                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9854                         ret = btrfs_next_leaf(root, &path);
9855                         if (ret != 0)
9856                                 break;
9857                         leaf = path.nodes[0];
9858                         slot = path.slots[0];
9859                 }
9860                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9861                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9862                         unsigned long offset;
9863                         u64 last_snapshot;
9864
9865                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9866                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9867                         last_snapshot = btrfs_root_last_snapshot(&ri);
9868                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9869                                 level = btrfs_root_level(&ri);
9870                                 level_size = root->nodesize;
9871                                 ret = add_root_item_to_list(&normal_trees,
9872                                                 found_key.objectid,
9873                                                 btrfs_root_bytenr(&ri),
9874                                                 last_snapshot, level,
9875                                                 0, level_size, NULL);
9876                                 if (ret < 0)
9877                                         goto out;
9878                         } else {
9879                                 level = btrfs_root_level(&ri);
9880                                 level_size = root->nodesize;
9881                                 objectid = found_key.objectid;
9882                                 btrfs_disk_key_to_cpu(&found_key,
9883                                                       &ri.drop_progress);
9884                                 ret = add_root_item_to_list(&dropping_trees,
9885                                                 objectid,
9886                                                 btrfs_root_bytenr(&ri),
9887                                                 last_snapshot, level,
9888                                                 ri.drop_level,
9889                                                 level_size, &found_key);
9890                                 if (ret < 0)
9891                                         goto out;
9892                         }
9893                 }
9894                 path.slots[0]++;
9895         }
9896         btrfs_release_path(&path);
9897
9898         /*
9899          * check_block can return -EAGAIN if it fixes something, please keep
9900          * this in mind when dealing with return values from these functions, if
9901          * we get -EAGAIN we want to fall through and restart the loop.
9902          */
9903         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9904                                   &seen, &reada, &nodes, &extent_cache,
9905                                   &chunk_cache, &dev_cache, &block_group_cache,
9906                                   &dev_extent_cache);
9907         if (ret < 0) {
9908                 if (ret == -EAGAIN)
9909                         goto loop;
9910                 goto out;
9911         }
9912         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9913                                   &pending, &seen, &reada, &nodes,
9914                                   &extent_cache, &chunk_cache, &dev_cache,
9915                                   &block_group_cache, &dev_extent_cache);
9916         if (ret < 0) {
9917                 if (ret == -EAGAIN)
9918                         goto loop;
9919                 goto out;
9920         }
9921
9922         ret = check_chunks(&chunk_cache, &block_group_cache,
9923                            &dev_extent_cache, NULL, NULL, NULL, 0);
9924         if (ret) {
9925                 if (ret == -EAGAIN)
9926                         goto loop;
9927                 err = ret;
9928         }
9929
9930         ret = check_extent_refs(root, &extent_cache);
9931         if (ret < 0) {
9932                 if (ret == -EAGAIN)
9933                         goto loop;
9934                 goto out;
9935         }
9936
9937         ret = check_devices(&dev_cache, &dev_extent_cache);
9938         if (ret && err)
9939                 ret = err;
9940
9941 out:
9942         task_stop(ctx.info);
9943         if (repair) {
9944                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9945                 extent_io_tree_cleanup(&excluded_extents);
9946                 root->fs_info->fsck_extent_cache = NULL;
9947                 root->fs_info->free_extent_hook = NULL;
9948                 root->fs_info->corrupt_blocks = NULL;
9949                 root->fs_info->excluded_extents = NULL;
9950         }
9951         free(bits);
9952         free_chunk_cache_tree(&chunk_cache);
9953         free_device_cache_tree(&dev_cache);
9954         free_block_group_tree(&block_group_cache);
9955         free_device_extent_tree(&dev_extent_cache);
9956         free_extent_cache_tree(&seen);
9957         free_extent_cache_tree(&pending);
9958         free_extent_cache_tree(&reada);
9959         free_extent_cache_tree(&nodes);
9960         return ret;
9961 loop:
9962         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9963         free_extent_cache_tree(&seen);
9964         free_extent_cache_tree(&pending);
9965         free_extent_cache_tree(&reada);
9966         free_extent_cache_tree(&nodes);
9967         free_chunk_cache_tree(&chunk_cache);
9968         free_block_group_tree(&block_group_cache);
9969         free_device_cache_tree(&dev_cache);
9970         free_device_extent_tree(&dev_extent_cache);
9971         free_extent_record_cache(root->fs_info, &extent_cache);
9972         free_root_item_list(&normal_trees);
9973         free_root_item_list(&dropping_trees);
9974         extent_io_tree_cleanup(&excluded_extents);
9975         goto again;
9976 }
9977
9978 /*
9979  * Check backrefs of a tree block given by @bytenr or @eb.
9980  *
9981  * @root:       the root containing the @bytenr or @eb
9982  * @eb:         tree block extent buffer, can be NULL
9983  * @bytenr:     bytenr of the tree block to search
9984  * @level:      tree level of the tree block
9985  * @owner:      owner of the tree block
9986  *
9987  * Return >0 for any error found and output error message
9988  * Return 0 for no error found
9989  */
9990 static int check_tree_block_ref(struct btrfs_root *root,
9991                                 struct extent_buffer *eb, u64 bytenr,
9992                                 int level, u64 owner)
9993 {
9994         struct btrfs_key key;
9995         struct btrfs_root *extent_root = root->fs_info->extent_root;
9996         struct btrfs_path path;
9997         struct btrfs_extent_item *ei;
9998         struct btrfs_extent_inline_ref *iref;
9999         struct extent_buffer *leaf;
10000         unsigned long end;
10001         unsigned long ptr;
10002         int slot;
10003         int skinny_level;
10004         int type;
10005         u32 nodesize = root->nodesize;
10006         u32 item_size;
10007         u64 offset;
10008         int found_ref = 0;
10009         int err = 0;
10010         int ret;
10011
10012         btrfs_init_path(&path);
10013         key.objectid = bytenr;
10014         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10015                 key.type = BTRFS_METADATA_ITEM_KEY;
10016         else
10017                 key.type = BTRFS_EXTENT_ITEM_KEY;
10018         key.offset = (u64)-1;
10019
10020         /* Search for the backref in extent tree */
10021         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10022         if (ret < 0) {
10023                 err |= BACKREF_MISSING;
10024                 goto out;
10025         }
10026         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10027         if (ret) {
10028                 err |= BACKREF_MISSING;
10029                 goto out;
10030         }
10031
10032         leaf = path.nodes[0];
10033         slot = path.slots[0];
10034         btrfs_item_key_to_cpu(leaf, &key, slot);
10035
10036         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10037
10038         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10039                 skinny_level = (int)key.offset;
10040                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10041         } else {
10042                 struct btrfs_tree_block_info *info;
10043
10044                 info = (struct btrfs_tree_block_info *)(ei + 1);
10045                 skinny_level = btrfs_tree_block_level(leaf, info);
10046                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10047         }
10048
10049         if (eb) {
10050                 u64 header_gen;
10051                 u64 extent_gen;
10052
10053                 if (!(btrfs_extent_flags(leaf, ei) &
10054                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10055                         error(
10056                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10057                                 key.objectid, nodesize,
10058                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10059                         err = BACKREF_MISMATCH;
10060                 }
10061                 header_gen = btrfs_header_generation(eb);
10062                 extent_gen = btrfs_extent_generation(leaf, ei);
10063                 if (header_gen != extent_gen) {
10064                         error(
10065         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10066                                 key.objectid, nodesize, header_gen,
10067                                 extent_gen);
10068                         err = BACKREF_MISMATCH;
10069                 }
10070                 if (level != skinny_level) {
10071                         error(
10072                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10073                                 key.objectid, nodesize, level, skinny_level);
10074                         err = BACKREF_MISMATCH;
10075                 }
10076                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10077                         error(
10078                         "extent[%llu %u] is referred by other roots than %llu",
10079                                 key.objectid, nodesize, root->objectid);
10080                         err = BACKREF_MISMATCH;
10081                 }
10082         }
10083
10084         /*
10085          * Iterate the extent/metadata item to find the exact backref
10086          */
10087         item_size = btrfs_item_size_nr(leaf, slot);
10088         ptr = (unsigned long)iref;
10089         end = (unsigned long)ei + item_size;
10090         while (ptr < end) {
10091                 iref = (struct btrfs_extent_inline_ref *)ptr;
10092                 type = btrfs_extent_inline_ref_type(leaf, iref);
10093                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10094
10095                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10096                         (offset == root->objectid || offset == owner)) {
10097                         found_ref = 1;
10098                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10099                         /* Check if the backref points to valid referencer */
10100                         found_ref = !check_tree_block_ref(root, NULL, offset,
10101                                                           level + 1, owner);
10102                 }
10103
10104                 if (found_ref)
10105                         break;
10106                 ptr += btrfs_extent_inline_ref_size(type);
10107         }
10108
10109         /*
10110          * Inlined extent item doesn't have what we need, check
10111          * TREE_BLOCK_REF_KEY
10112          */
10113         if (!found_ref) {
10114                 btrfs_release_path(&path);
10115                 key.objectid = bytenr;
10116                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10117                 key.offset = root->objectid;
10118
10119                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10120                 if (!ret)
10121                         found_ref = 1;
10122         }
10123         if (!found_ref)
10124                 err |= BACKREF_MISSING;
10125 out:
10126         btrfs_release_path(&path);
10127         if (eb && (err & BACKREF_MISSING))
10128                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10129                         bytenr, nodesize, owner, level);
10130         return err;
10131 }
10132
10133 /*
10134  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10135  *
10136  * Return >0 any error found and output error message
10137  * Return 0 for no error found
10138  */
10139 static int check_extent_data_item(struct btrfs_root *root,
10140                                   struct extent_buffer *eb, int slot)
10141 {
10142         struct btrfs_file_extent_item *fi;
10143         struct btrfs_path path;
10144         struct btrfs_root *extent_root = root->fs_info->extent_root;
10145         struct btrfs_key fi_key;
10146         struct btrfs_key dbref_key;
10147         struct extent_buffer *leaf;
10148         struct btrfs_extent_item *ei;
10149         struct btrfs_extent_inline_ref *iref;
10150         struct btrfs_extent_data_ref *dref;
10151         u64 owner;
10152         u64 file_extent_gen;
10153         u64 disk_bytenr;
10154         u64 disk_num_bytes;
10155         u64 extent_num_bytes;
10156         u64 extent_flags;
10157         u64 extent_gen;
10158         u32 item_size;
10159         unsigned long end;
10160         unsigned long ptr;
10161         int type;
10162         u64 ref_root;
10163         int found_dbackref = 0;
10164         int err = 0;
10165         int ret;
10166
10167         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10168         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10169         file_extent_gen = btrfs_file_extent_generation(eb, fi);
10170
10171         /* Nothing to check for hole and inline data extents */
10172         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10173             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10174                 return 0;
10175
10176         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10177         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10178         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10179
10180         /* Check unaligned disk_num_bytes and num_bytes */
10181         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10182                 error(
10183 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10184                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10185                         root->sectorsize);
10186                 err |= BYTES_UNALIGNED;
10187         } else {
10188                 data_bytes_allocated += disk_num_bytes;
10189         }
10190         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10191                 error(
10192 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10193                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10194                         root->sectorsize);
10195                 err |= BYTES_UNALIGNED;
10196         } else {
10197                 data_bytes_referenced += extent_num_bytes;
10198         }
10199         owner = btrfs_header_owner(eb);
10200
10201         /* Check the extent item of the file extent in extent tree */
10202         btrfs_init_path(&path);
10203         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10204         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10205         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10206
10207         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10208         if (ret) {
10209                 err |= BACKREF_MISSING;
10210                 goto error;
10211         }
10212
10213         leaf = path.nodes[0];
10214         slot = path.slots[0];
10215         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10216
10217         extent_flags = btrfs_extent_flags(leaf, ei);
10218         extent_gen = btrfs_extent_generation(leaf, ei);
10219
10220         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10221                 error(
10222                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10223                     disk_bytenr, disk_num_bytes,
10224                     BTRFS_EXTENT_FLAG_DATA);
10225                 err |= BACKREF_MISMATCH;
10226         }
10227
10228         if (file_extent_gen < extent_gen) {
10229                 error(
10230 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
10231                         disk_bytenr, disk_num_bytes, file_extent_gen,
10232                         extent_gen);
10233                 err |= BACKREF_MISMATCH;
10234         }
10235
10236         /* Check data backref inside that extent item */
10237         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10238         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10239         ptr = (unsigned long)iref;
10240         end = (unsigned long)ei + item_size;
10241         while (ptr < end) {
10242                 iref = (struct btrfs_extent_inline_ref *)ptr;
10243                 type = btrfs_extent_inline_ref_type(leaf, iref);
10244                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10245
10246                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10247                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10248                         if (ref_root == owner || ref_root == root->objectid)
10249                                 found_dbackref = 1;
10250                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10251                         found_dbackref = !check_tree_block_ref(root, NULL,
10252                                 btrfs_extent_inline_ref_offset(leaf, iref),
10253                                 0, owner);
10254                 }
10255
10256                 if (found_dbackref)
10257                         break;
10258                 ptr += btrfs_extent_inline_ref_size(type);
10259         }
10260
10261         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
10262         if (!found_dbackref) {
10263                 btrfs_release_path(&path);
10264
10265                 btrfs_init_path(&path);
10266                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10267                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10268                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10269                                 fi_key.objectid, fi_key.offset);
10270
10271                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10272                                         &dbref_key, &path, 0, 0);
10273                 if (!ret)
10274                         found_dbackref = 1;
10275         }
10276
10277         if (!found_dbackref)
10278                 err |= BACKREF_MISSING;
10279 error:
10280         btrfs_release_path(&path);
10281         if (err & BACKREF_MISSING) {
10282                 error("data extent[%llu %llu] backref lost",
10283                       disk_bytenr, disk_num_bytes);
10284         }
10285         return err;
10286 }
10287
10288 /*
10289  * Get real tree block level for the case like shared block
10290  * Return >= 0 as tree level
10291  * Return <0 for error
10292  */
10293 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10294 {
10295         struct extent_buffer *eb;
10296         struct btrfs_path path;
10297         struct btrfs_key key;
10298         struct btrfs_extent_item *ei;
10299         u64 flags;
10300         u64 transid;
10301         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10302         u8 backref_level;
10303         u8 header_level;
10304         int ret;
10305
10306         /* Search extent tree for extent generation and level */
10307         key.objectid = bytenr;
10308         key.type = BTRFS_METADATA_ITEM_KEY;
10309         key.offset = (u64)-1;
10310
10311         btrfs_init_path(&path);
10312         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10313         if (ret < 0)
10314                 goto release_out;
10315         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10316         if (ret < 0)
10317                 goto release_out;
10318         if (ret > 0) {
10319                 ret = -ENOENT;
10320                 goto release_out;
10321         }
10322
10323         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10324         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10325                             struct btrfs_extent_item);
10326         flags = btrfs_extent_flags(path.nodes[0], ei);
10327         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10328                 ret = -ENOENT;
10329                 goto release_out;
10330         }
10331
10332         /* Get transid for later read_tree_block() check */
10333         transid = btrfs_extent_generation(path.nodes[0], ei);
10334
10335         /* Get backref level as one source */
10336         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10337                 backref_level = key.offset;
10338         } else {
10339                 struct btrfs_tree_block_info *info;
10340
10341                 info = (struct btrfs_tree_block_info *)(ei + 1);
10342                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10343         }
10344         btrfs_release_path(&path);
10345
10346         /* Get level from tree block as an alternative source */
10347         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10348         if (!extent_buffer_uptodate(eb)) {
10349                 free_extent_buffer(eb);
10350                 return -EIO;
10351         }
10352         header_level = btrfs_header_level(eb);
10353         free_extent_buffer(eb);
10354
10355         if (header_level != backref_level)
10356                 return -EIO;
10357         return header_level;
10358
10359 release_out:
10360         btrfs_release_path(&path);
10361         return ret;
10362 }
10363
10364 /*
10365  * Check if a tree block backref is valid (points to a valid tree block)
10366  * if level == -1, level will be resolved
10367  * Return >0 for any error found and print error message
10368  */
10369 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10370                                     u64 bytenr, int level)
10371 {
10372         struct btrfs_root *root;
10373         struct btrfs_key key;
10374         struct btrfs_path path;
10375         struct extent_buffer *eb;
10376         struct extent_buffer *node;
10377         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10378         int err = 0;
10379         int ret;
10380
10381         /* Query level for level == -1 special case */
10382         if (level == -1)
10383                 level = query_tree_block_level(fs_info, bytenr);
10384         if (level < 0) {
10385                 err |= REFERENCER_MISSING;
10386                 goto out;
10387         }
10388
10389         key.objectid = root_id;
10390         key.type = BTRFS_ROOT_ITEM_KEY;
10391         key.offset = (u64)-1;
10392
10393         root = btrfs_read_fs_root(fs_info, &key);
10394         if (IS_ERR(root)) {
10395                 err |= REFERENCER_MISSING;
10396                 goto out;
10397         }
10398
10399         /* Read out the tree block to get item/node key */
10400         eb = read_tree_block(root, bytenr, root->nodesize, 0);
10401         if (!extent_buffer_uptodate(eb)) {
10402                 err |= REFERENCER_MISSING;
10403                 free_extent_buffer(eb);
10404                 goto out;
10405         }
10406
10407         /* Empty tree, no need to check key */
10408         if (!btrfs_header_nritems(eb) && !level) {
10409                 free_extent_buffer(eb);
10410                 goto out;
10411         }
10412
10413         if (level)
10414                 btrfs_node_key_to_cpu(eb, &key, 0);
10415         else
10416                 btrfs_item_key_to_cpu(eb, &key, 0);
10417
10418         free_extent_buffer(eb);
10419
10420         btrfs_init_path(&path);
10421         path.lowest_level = level;
10422         /* Search with the first key, to ensure we can reach it */
10423         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10424         if (ret < 0) {
10425                 err |= REFERENCER_MISSING;
10426                 goto release_out;
10427         }
10428
10429         node = path.nodes[level];
10430         if (btrfs_header_bytenr(node) != bytenr) {
10431                 error(
10432         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10433                         bytenr, nodesize, bytenr,
10434                         btrfs_header_bytenr(node));
10435                 err |= REFERENCER_MISMATCH;
10436         }
10437         if (btrfs_header_level(node) != level) {
10438                 error(
10439         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10440                         bytenr, nodesize, level,
10441                         btrfs_header_level(node));
10442                 err |= REFERENCER_MISMATCH;
10443         }
10444
10445 release_out:
10446         btrfs_release_path(&path);
10447 out:
10448         if (err & REFERENCER_MISSING) {
10449                 if (level < 0)
10450                         error("extent [%llu %d] lost referencer (owner: %llu)",
10451                                 bytenr, nodesize, root_id);
10452                 else
10453                         error(
10454                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10455                                 bytenr, nodesize, root_id, level);
10456         }
10457
10458         return err;
10459 }
10460
10461 /*
10462  * Check referencer for shared block backref
10463  * If level == -1, this function will resolve the level.
10464  */
10465 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10466                                      u64 parent, u64 bytenr, int level)
10467 {
10468         struct extent_buffer *eb;
10469         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10470         u32 nr;
10471         int found_parent = 0;
10472         int i;
10473
10474         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10475         if (!extent_buffer_uptodate(eb))
10476                 goto out;
10477
10478         if (level == -1)
10479                 level = query_tree_block_level(fs_info, bytenr);
10480         if (level < 0)
10481                 goto out;
10482
10483         if (level + 1 != btrfs_header_level(eb))
10484                 goto out;
10485
10486         nr = btrfs_header_nritems(eb);
10487         for (i = 0; i < nr; i++) {
10488                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10489                         found_parent = 1;
10490                         break;
10491                 }
10492         }
10493 out:
10494         free_extent_buffer(eb);
10495         if (!found_parent) {
10496                 error(
10497         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10498                         bytenr, nodesize, parent, level);
10499                 return REFERENCER_MISSING;
10500         }
10501         return 0;
10502 }
10503
10504 /*
10505  * Check referencer for normal (inlined) data ref
10506  * If len == 0, it will be resolved by searching in extent tree
10507  */
10508 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10509                                      u64 root_id, u64 objectid, u64 offset,
10510                                      u64 bytenr, u64 len, u32 count)
10511 {
10512         struct btrfs_root *root;
10513         struct btrfs_root *extent_root = fs_info->extent_root;
10514         struct btrfs_key key;
10515         struct btrfs_path path;
10516         struct extent_buffer *leaf;
10517         struct btrfs_file_extent_item *fi;
10518         u32 found_count = 0;
10519         int slot;
10520         int ret = 0;
10521
10522         if (!len) {
10523                 key.objectid = bytenr;
10524                 key.type = BTRFS_EXTENT_ITEM_KEY;
10525                 key.offset = (u64)-1;
10526
10527                 btrfs_init_path(&path);
10528                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10529                 if (ret < 0)
10530                         goto out;
10531                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10532                 if (ret)
10533                         goto out;
10534                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10535                 if (key.objectid != bytenr ||
10536                     key.type != BTRFS_EXTENT_ITEM_KEY)
10537                         goto out;
10538                 len = key.offset;
10539                 btrfs_release_path(&path);
10540         }
10541         key.objectid = root_id;
10542         key.type = BTRFS_ROOT_ITEM_KEY;
10543         key.offset = (u64)-1;
10544         btrfs_init_path(&path);
10545
10546         root = btrfs_read_fs_root(fs_info, &key);
10547         if (IS_ERR(root))
10548                 goto out;
10549
10550         key.objectid = objectid;
10551         key.type = BTRFS_EXTENT_DATA_KEY;
10552         /*
10553          * It can be nasty as data backref offset is
10554          * file offset - file extent offset, which is smaller or
10555          * equal to original backref offset.  The only special case is
10556          * overflow.  So we need to special check and do further search.
10557          */
10558         key.offset = offset & (1ULL << 63) ? 0 : offset;
10559
10560         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10561         if (ret < 0)
10562                 goto out;
10563
10564         /*
10565          * Search afterwards to get correct one
10566          * NOTE: As we must do a comprehensive check on the data backref to
10567          * make sure the dref count also matches, we must iterate all file
10568          * extents for that inode.
10569          */
10570         while (1) {
10571                 leaf = path.nodes[0];
10572                 slot = path.slots[0];
10573
10574                 btrfs_item_key_to_cpu(leaf, &key, slot);
10575                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10576                         break;
10577                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10578                 /*
10579                  * Except normal disk bytenr and disk num bytes, we still
10580                  * need to do extra check on dbackref offset as
10581                  * dbackref offset = file_offset - file_extent_offset
10582                  */
10583                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10584                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10585                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10586                     offset)
10587                         found_count++;
10588
10589                 ret = btrfs_next_item(root, &path);
10590                 if (ret)
10591                         break;
10592         }
10593 out:
10594         btrfs_release_path(&path);
10595         if (found_count != count) {
10596                 error(
10597 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10598                         bytenr, len, root_id, objectid, offset, count, found_count);
10599                 return REFERENCER_MISSING;
10600         }
10601         return 0;
10602 }
10603
10604 /*
10605  * Check if the referencer of a shared data backref exists
10606  */
10607 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10608                                      u64 parent, u64 bytenr)
10609 {
10610         struct extent_buffer *eb;
10611         struct btrfs_key key;
10612         struct btrfs_file_extent_item *fi;
10613         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10614         u32 nr;
10615         int found_parent = 0;
10616         int i;
10617
10618         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10619         if (!extent_buffer_uptodate(eb))
10620                 goto out;
10621
10622         nr = btrfs_header_nritems(eb);
10623         for (i = 0; i < nr; i++) {
10624                 btrfs_item_key_to_cpu(eb, &key, i);
10625                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10626                         continue;
10627
10628                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10629                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10630                         continue;
10631
10632                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10633                         found_parent = 1;
10634                         break;
10635                 }
10636         }
10637
10638 out:
10639         free_extent_buffer(eb);
10640         if (!found_parent) {
10641                 error("shared extent %llu referencer lost (parent: %llu)",
10642                         bytenr, parent);
10643                 return REFERENCER_MISSING;
10644         }
10645         return 0;
10646 }
10647
10648 /*
10649  * This function will check a given extent item, including its backref and
10650  * itself (like crossing stripe boundary and type)
10651  *
10652  * Since we don't use extent_record anymore, introduce new error bit
10653  */
10654 static int check_extent_item(struct btrfs_fs_info *fs_info,
10655                              struct extent_buffer *eb, int slot)
10656 {
10657         struct btrfs_extent_item *ei;
10658         struct btrfs_extent_inline_ref *iref;
10659         struct btrfs_extent_data_ref *dref;
10660         unsigned long end;
10661         unsigned long ptr;
10662         int type;
10663         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10664         u32 item_size = btrfs_item_size_nr(eb, slot);
10665         u64 flags;
10666         u64 offset;
10667         int metadata = 0;
10668         int level;
10669         struct btrfs_key key;
10670         int ret;
10671         int err = 0;
10672
10673         btrfs_item_key_to_cpu(eb, &key, slot);
10674         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10675                 bytes_used += key.offset;
10676         else
10677                 bytes_used += nodesize;
10678
10679         if (item_size < sizeof(*ei)) {
10680                 /*
10681                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10682                  * old thing when on disk format is still un-determined.
10683                  * No need to care about it anymore
10684                  */
10685                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10686                 return -ENOTTY;
10687         }
10688
10689         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10690         flags = btrfs_extent_flags(eb, ei);
10691
10692         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10693                 metadata = 1;
10694         if (metadata && check_crossing_stripes(global_info, key.objectid,
10695                                                eb->len)) {
10696                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10697                       key.objectid, key.objectid + nodesize);
10698                 err |= CROSSING_STRIPE_BOUNDARY;
10699         }
10700
10701         ptr = (unsigned long)(ei + 1);
10702
10703         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10704                 /* Old EXTENT_ITEM metadata */
10705                 struct btrfs_tree_block_info *info;
10706
10707                 info = (struct btrfs_tree_block_info *)ptr;
10708                 level = btrfs_tree_block_level(eb, info);
10709                 ptr += sizeof(struct btrfs_tree_block_info);
10710         } else {
10711                 /* New METADATA_ITEM */
10712                 level = key.offset;
10713         }
10714         end = (unsigned long)ei + item_size;
10715
10716         if (ptr >= end) {
10717                 err |= ITEM_SIZE_MISMATCH;
10718                 goto out;
10719         }
10720
10721         /* Now check every backref in this extent item */
10722 next:
10723         iref = (struct btrfs_extent_inline_ref *)ptr;
10724         type = btrfs_extent_inline_ref_type(eb, iref);
10725         offset = btrfs_extent_inline_ref_offset(eb, iref);
10726         switch (type) {
10727         case BTRFS_TREE_BLOCK_REF_KEY:
10728                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10729                                                level);
10730                 err |= ret;
10731                 break;
10732         case BTRFS_SHARED_BLOCK_REF_KEY:
10733                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10734                                                  level);
10735                 err |= ret;
10736                 break;
10737         case BTRFS_EXTENT_DATA_REF_KEY:
10738                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10739                 ret = check_extent_data_backref(fs_info,
10740                                 btrfs_extent_data_ref_root(eb, dref),
10741                                 btrfs_extent_data_ref_objectid(eb, dref),
10742                                 btrfs_extent_data_ref_offset(eb, dref),
10743                                 key.objectid, key.offset,
10744                                 btrfs_extent_data_ref_count(eb, dref));
10745                 err |= ret;
10746                 break;
10747         case BTRFS_SHARED_DATA_REF_KEY:
10748                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10749                 err |= ret;
10750                 break;
10751         default:
10752                 error("extent[%llu %d %llu] has unknown ref type: %d",
10753                         key.objectid, key.type, key.offset, type);
10754                 err |= UNKNOWN_TYPE;
10755                 goto out;
10756         }
10757
10758         ptr += btrfs_extent_inline_ref_size(type);
10759         if (ptr < end)
10760                 goto next;
10761
10762 out:
10763         return err;
10764 }
10765
10766 /*
10767  * Check if a dev extent item is referred correctly by its chunk
10768  */
10769 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10770                                  struct extent_buffer *eb, int slot)
10771 {
10772         struct btrfs_root *chunk_root = fs_info->chunk_root;
10773         struct btrfs_dev_extent *ptr;
10774         struct btrfs_path path;
10775         struct btrfs_key chunk_key;
10776         struct btrfs_key devext_key;
10777         struct btrfs_chunk *chunk;
10778         struct extent_buffer *l;
10779         int num_stripes;
10780         u64 length;
10781         int i;
10782         int found_chunk = 0;
10783         int ret;
10784
10785         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10786         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10787         length = btrfs_dev_extent_length(eb, ptr);
10788
10789         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10790         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10791         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10792
10793         btrfs_init_path(&path);
10794         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10795         if (ret)
10796                 goto out;
10797
10798         l = path.nodes[0];
10799         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10800         if (btrfs_chunk_length(l, chunk) != length)
10801                 goto out;
10802
10803         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10804         for (i = 0; i < num_stripes; i++) {
10805                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10806                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10807
10808                 if (devid == devext_key.objectid &&
10809                     offset == devext_key.offset) {
10810                         found_chunk = 1;
10811                         break;
10812                 }
10813         }
10814 out:
10815         btrfs_release_path(&path);
10816         if (!found_chunk) {
10817                 error(
10818                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10819                         devext_key.objectid, devext_key.offset, length);
10820                 return REFERENCER_MISSING;
10821         }
10822         return 0;
10823 }
10824
10825 /*
10826  * Check if the used space is correct with the dev item
10827  */
10828 static int check_dev_item(struct btrfs_fs_info *fs_info,
10829                           struct extent_buffer *eb, int slot)
10830 {
10831         struct btrfs_root *dev_root = fs_info->dev_root;
10832         struct btrfs_dev_item *dev_item;
10833         struct btrfs_path path;
10834         struct btrfs_key key;
10835         struct btrfs_dev_extent *ptr;
10836         u64 dev_id;
10837         u64 used;
10838         u64 total = 0;
10839         int ret;
10840
10841         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10842         dev_id = btrfs_device_id(eb, dev_item);
10843         used = btrfs_device_bytes_used(eb, dev_item);
10844
10845         key.objectid = dev_id;
10846         key.type = BTRFS_DEV_EXTENT_KEY;
10847         key.offset = 0;
10848
10849         btrfs_init_path(&path);
10850         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10851         if (ret < 0) {
10852                 btrfs_item_key_to_cpu(eb, &key, slot);
10853                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10854                         key.objectid, key.type, key.offset);
10855                 btrfs_release_path(&path);
10856                 return REFERENCER_MISSING;
10857         }
10858
10859         /* Iterate dev_extents to calculate the used space of a device */
10860         while (1) {
10861                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10862
10863                 if (key.objectid > dev_id)
10864                         break;
10865                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10866                         goto next;
10867
10868                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10869                                      struct btrfs_dev_extent);
10870                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10871 next:
10872                 ret = btrfs_next_item(dev_root, &path);
10873                 if (ret)
10874                         break;
10875         }
10876         btrfs_release_path(&path);
10877
10878         if (used != total) {
10879                 btrfs_item_key_to_cpu(eb, &key, slot);
10880                 error(
10881 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10882                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10883                         BTRFS_DEV_EXTENT_KEY, dev_id);
10884                 return ACCOUNTING_MISMATCH;
10885         }
10886         return 0;
10887 }
10888
10889 /*
10890  * Check a block group item with its referener (chunk) and its used space
10891  * with extent/metadata item
10892  */
10893 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10894                                   struct extent_buffer *eb, int slot)
10895 {
10896         struct btrfs_root *extent_root = fs_info->extent_root;
10897         struct btrfs_root *chunk_root = fs_info->chunk_root;
10898         struct btrfs_block_group_item *bi;
10899         struct btrfs_block_group_item bg_item;
10900         struct btrfs_path path;
10901         struct btrfs_key bg_key;
10902         struct btrfs_key chunk_key;
10903         struct btrfs_key extent_key;
10904         struct btrfs_chunk *chunk;
10905         struct extent_buffer *leaf;
10906         struct btrfs_extent_item *ei;
10907         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10908         u64 flags;
10909         u64 bg_flags;
10910         u64 used;
10911         u64 total = 0;
10912         int ret;
10913         int err = 0;
10914
10915         btrfs_item_key_to_cpu(eb, &bg_key, slot);
10916         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10917         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10918         used = btrfs_block_group_used(&bg_item);
10919         bg_flags = btrfs_block_group_flags(&bg_item);
10920
10921         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10922         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10923         chunk_key.offset = bg_key.objectid;
10924
10925         btrfs_init_path(&path);
10926         /* Search for the referencer chunk */
10927         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10928         if (ret) {
10929                 error(
10930                 "block group[%llu %llu] did not find the related chunk item",
10931                         bg_key.objectid, bg_key.offset);
10932                 err |= REFERENCER_MISSING;
10933         } else {
10934                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10935                                         struct btrfs_chunk);
10936                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10937                                                 bg_key.offset) {
10938                         error(
10939         "block group[%llu %llu] related chunk item length does not match",
10940                                 bg_key.objectid, bg_key.offset);
10941                         err |= REFERENCER_MISMATCH;
10942                 }
10943         }
10944         btrfs_release_path(&path);
10945
10946         /* Search from the block group bytenr */
10947         extent_key.objectid = bg_key.objectid;
10948         extent_key.type = 0;
10949         extent_key.offset = 0;
10950
10951         btrfs_init_path(&path);
10952         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10953         if (ret < 0)
10954                 goto out;
10955
10956         /* Iterate extent tree to account used space */
10957         while (1) {
10958                 leaf = path.nodes[0];
10959                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
10960                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
10961                         break;
10962
10963                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
10964                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
10965                         goto next;
10966                 if (extent_key.objectid < bg_key.objectid)
10967                         goto next;
10968
10969                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
10970                         total += nodesize;
10971                 else
10972                         total += extent_key.offset;
10973
10974                 ei = btrfs_item_ptr(leaf, path.slots[0],
10975                                     struct btrfs_extent_item);
10976                 flags = btrfs_extent_flags(leaf, ei);
10977                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
10978                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
10979                                 error(
10980                         "bad extent[%llu, %llu) type mismatch with chunk",
10981                                         extent_key.objectid,
10982                                         extent_key.objectid + extent_key.offset);
10983                                 err |= CHUNK_TYPE_MISMATCH;
10984                         }
10985                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
10986                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
10987                                     BTRFS_BLOCK_GROUP_METADATA))) {
10988                                 error(
10989                         "bad extent[%llu, %llu) type mismatch with chunk",
10990                                         extent_key.objectid,
10991                                         extent_key.objectid + nodesize);
10992                                 err |= CHUNK_TYPE_MISMATCH;
10993                         }
10994                 }
10995 next:
10996                 ret = btrfs_next_item(extent_root, &path);
10997                 if (ret)
10998                         break;
10999         }
11000
11001 out:
11002         btrfs_release_path(&path);
11003
11004         if (total != used) {
11005                 error(
11006                 "block group[%llu %llu] used %llu but extent items used %llu",
11007                         bg_key.objectid, bg_key.offset, used, total);
11008                 err |= ACCOUNTING_MISMATCH;
11009         }
11010         return err;
11011 }
11012
11013 /*
11014  * Check a chunk item.
11015  * Including checking all referred dev_extents and block group
11016  */
11017 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11018                             struct extent_buffer *eb, int slot)
11019 {
11020         struct btrfs_root *extent_root = fs_info->extent_root;
11021         struct btrfs_root *dev_root = fs_info->dev_root;
11022         struct btrfs_path path;
11023         struct btrfs_key chunk_key;
11024         struct btrfs_key bg_key;
11025         struct btrfs_key devext_key;
11026         struct btrfs_chunk *chunk;
11027         struct extent_buffer *leaf;
11028         struct btrfs_block_group_item *bi;
11029         struct btrfs_block_group_item bg_item;
11030         struct btrfs_dev_extent *ptr;
11031         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11032         u64 length;
11033         u64 chunk_end;
11034         u64 type;
11035         u64 profile;
11036         int num_stripes;
11037         u64 offset;
11038         u64 objectid;
11039         int i;
11040         int ret;
11041         int err = 0;
11042
11043         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11044         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11045         length = btrfs_chunk_length(eb, chunk);
11046         chunk_end = chunk_key.offset + length;
11047         if (!IS_ALIGNED(length, sectorsize)) {
11048                 error("chunk[%llu %llu) not aligned to %u",
11049                         chunk_key.offset, chunk_end, sectorsize);
11050                 err |= BYTES_UNALIGNED;
11051                 goto out;
11052         }
11053
11054         type = btrfs_chunk_type(eb, chunk);
11055         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11056         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11057                 error("chunk[%llu %llu) has no chunk type",
11058                         chunk_key.offset, chunk_end);
11059                 err |= UNKNOWN_TYPE;
11060         }
11061         if (profile && (profile & (profile - 1))) {
11062                 error("chunk[%llu %llu) multiple profiles detected: %llx",
11063                         chunk_key.offset, chunk_end, profile);
11064                 err |= UNKNOWN_TYPE;
11065         }
11066
11067         bg_key.objectid = chunk_key.offset;
11068         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11069         bg_key.offset = length;
11070
11071         btrfs_init_path(&path);
11072         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11073         if (ret) {
11074                 error(
11075                 "chunk[%llu %llu) did not find the related block group item",
11076                         chunk_key.offset, chunk_end);
11077                 err |= REFERENCER_MISSING;
11078         } else{
11079                 leaf = path.nodes[0];
11080                 bi = btrfs_item_ptr(leaf, path.slots[0],
11081                                     struct btrfs_block_group_item);
11082                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11083                                    sizeof(bg_item));
11084                 if (btrfs_block_group_flags(&bg_item) != type) {
11085                         error(
11086 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11087                                 chunk_key.offset, chunk_end, type,
11088                                 btrfs_block_group_flags(&bg_item));
11089                         err |= REFERENCER_MISSING;
11090                 }
11091         }
11092
11093         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11094         for (i = 0; i < num_stripes; i++) {
11095                 btrfs_release_path(&path);
11096                 btrfs_init_path(&path);
11097                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11098                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11099                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11100
11101                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11102                                         0, 0);
11103                 if (ret)
11104                         goto not_match_dev;
11105
11106                 leaf = path.nodes[0];
11107                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11108                                      struct btrfs_dev_extent);
11109                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11110                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11111                 if (objectid != chunk_key.objectid ||
11112                     offset != chunk_key.offset ||
11113                     btrfs_dev_extent_length(leaf, ptr) != length)
11114                         goto not_match_dev;
11115                 continue;
11116 not_match_dev:
11117                 err |= BACKREF_MISSING;
11118                 error(
11119                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11120                         chunk_key.objectid, chunk_end, i);
11121                 continue;
11122         }
11123         btrfs_release_path(&path);
11124 out:
11125         return err;
11126 }
11127
11128 /*
11129  * Main entry function to check known items and update related accounting info
11130  */
11131 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11132 {
11133         struct btrfs_fs_info *fs_info = root->fs_info;
11134         struct btrfs_key key;
11135         int slot = 0;
11136         int type;
11137         struct btrfs_extent_data_ref *dref;
11138         int ret;
11139         int err = 0;
11140
11141 next:
11142         btrfs_item_key_to_cpu(eb, &key, slot);
11143         type = key.type;
11144
11145         switch (type) {
11146         case BTRFS_EXTENT_DATA_KEY:
11147                 ret = check_extent_data_item(root, eb, slot);
11148                 err |= ret;
11149                 break;
11150         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11151                 ret = check_block_group_item(fs_info, eb, slot);
11152                 err |= ret;
11153                 break;
11154         case BTRFS_DEV_ITEM_KEY:
11155                 ret = check_dev_item(fs_info, eb, slot);
11156                 err |= ret;
11157                 break;
11158         case BTRFS_CHUNK_ITEM_KEY:
11159                 ret = check_chunk_item(fs_info, eb, slot);
11160                 err |= ret;
11161                 break;
11162         case BTRFS_DEV_EXTENT_KEY:
11163                 ret = check_dev_extent_item(fs_info, eb, slot);
11164                 err |= ret;
11165                 break;
11166         case BTRFS_EXTENT_ITEM_KEY:
11167         case BTRFS_METADATA_ITEM_KEY:
11168                 ret = check_extent_item(fs_info, eb, slot);
11169                 err |= ret;
11170                 break;
11171         case BTRFS_EXTENT_CSUM_KEY:
11172                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11173                 break;
11174         case BTRFS_TREE_BLOCK_REF_KEY:
11175                 ret = check_tree_block_backref(fs_info, key.offset,
11176                                                key.objectid, -1);
11177                 err |= ret;
11178                 break;
11179         case BTRFS_EXTENT_DATA_REF_KEY:
11180                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11181                 ret = check_extent_data_backref(fs_info,
11182                                 btrfs_extent_data_ref_root(eb, dref),
11183                                 btrfs_extent_data_ref_objectid(eb, dref),
11184                                 btrfs_extent_data_ref_offset(eb, dref),
11185                                 key.objectid, 0,
11186                                 btrfs_extent_data_ref_count(eb, dref));
11187                 err |= ret;
11188                 break;
11189         case BTRFS_SHARED_BLOCK_REF_KEY:
11190                 ret = check_shared_block_backref(fs_info, key.offset,
11191                                                  key.objectid, -1);
11192                 err |= ret;
11193                 break;
11194         case BTRFS_SHARED_DATA_REF_KEY:
11195                 ret = check_shared_data_backref(fs_info, key.offset,
11196                                                 key.objectid);
11197                 err |= ret;
11198                 break;
11199         default:
11200                 break;
11201         }
11202
11203         if (++slot < btrfs_header_nritems(eb))
11204                 goto next;
11205
11206         return err;
11207 }
11208
11209 /*
11210  * Helper function for later fs/subvol tree check.  To determine if a tree
11211  * block should be checked.
11212  * This function will ensure only the direct referencer with lowest rootid to
11213  * check a fs/subvolume tree block.
11214  *
11215  * Backref check at extent tree would detect errors like missing subvolume
11216  * tree, so we can do aggressive check to reduce duplicated checks.
11217  */
11218 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11219 {
11220         struct btrfs_root *extent_root = root->fs_info->extent_root;
11221         struct btrfs_key key;
11222         struct btrfs_path path;
11223         struct extent_buffer *leaf;
11224         int slot;
11225         struct btrfs_extent_item *ei;
11226         unsigned long ptr;
11227         unsigned long end;
11228         int type;
11229         u32 item_size;
11230         u64 offset;
11231         struct btrfs_extent_inline_ref *iref;
11232         int ret;
11233
11234         btrfs_init_path(&path);
11235         key.objectid = btrfs_header_bytenr(eb);
11236         key.type = BTRFS_METADATA_ITEM_KEY;
11237         key.offset = (u64)-1;
11238
11239         /*
11240          * Any failure in backref resolving means we can't determine
11241          * whom the tree block belongs to.
11242          * So in that case, we need to check that tree block
11243          */
11244         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11245         if (ret < 0)
11246                 goto need_check;
11247
11248         ret = btrfs_previous_extent_item(extent_root, &path,
11249                                          btrfs_header_bytenr(eb));
11250         if (ret)
11251                 goto need_check;
11252
11253         leaf = path.nodes[0];
11254         slot = path.slots[0];
11255         btrfs_item_key_to_cpu(leaf, &key, slot);
11256         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11257
11258         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11259                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11260         } else {
11261                 struct btrfs_tree_block_info *info;
11262
11263                 info = (struct btrfs_tree_block_info *)(ei + 1);
11264                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11265         }
11266
11267         item_size = btrfs_item_size_nr(leaf, slot);
11268         ptr = (unsigned long)iref;
11269         end = (unsigned long)ei + item_size;
11270         while (ptr < end) {
11271                 iref = (struct btrfs_extent_inline_ref *)ptr;
11272                 type = btrfs_extent_inline_ref_type(leaf, iref);
11273                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11274
11275                 /*
11276                  * We only check the tree block if current root is
11277                  * the lowest referencer of it.
11278                  */
11279                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11280                     offset < root->objectid) {
11281                         btrfs_release_path(&path);
11282                         return 0;
11283                 }
11284
11285                 ptr += btrfs_extent_inline_ref_size(type);
11286         }
11287         /*
11288          * Normally we should also check keyed tree block ref, but that may be
11289          * very time consuming.  Inlined ref should already make us skip a lot
11290          * of refs now.  So skip search keyed tree block ref.
11291          */
11292
11293 need_check:
11294         btrfs_release_path(&path);
11295         return 1;
11296 }
11297
11298 /*
11299  * Traversal function for tree block. We will do:
11300  * 1) Skip shared fs/subvolume tree blocks
11301  * 2) Update related bytes accounting
11302  * 3) Pre-order traversal
11303  */
11304 static int traverse_tree_block(struct btrfs_root *root,
11305                                 struct extent_buffer *node)
11306 {
11307         struct extent_buffer *eb;
11308         struct btrfs_key key;
11309         struct btrfs_key drop_key;
11310         int level;
11311         u64 nr;
11312         int i;
11313         int err = 0;
11314         int ret;
11315
11316         /*
11317          * Skip shared fs/subvolume tree block, in that case they will
11318          * be checked by referencer with lowest rootid
11319          */
11320         if (is_fstree(root->objectid) && !should_check(root, node))
11321                 return 0;
11322
11323         /* Update bytes accounting */
11324         total_btree_bytes += node->len;
11325         if (fs_root_objectid(btrfs_header_owner(node)))
11326                 total_fs_tree_bytes += node->len;
11327         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11328                 total_extent_tree_bytes += node->len;
11329         if (!found_old_backref &&
11330             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11331             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11332             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11333                 found_old_backref = 1;
11334
11335         /* pre-order tranversal, check itself first */
11336         level = btrfs_header_level(node);
11337         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11338                                    btrfs_header_level(node),
11339                                    btrfs_header_owner(node));
11340         err |= ret;
11341         if (err)
11342                 error(
11343         "check %s failed root %llu bytenr %llu level %d, force continue check",
11344                         level ? "node":"leaf", root->objectid,
11345                         btrfs_header_bytenr(node), btrfs_header_level(node));
11346
11347         if (!level) {
11348                 btree_space_waste += btrfs_leaf_free_space(root, node);
11349                 ret = check_leaf_items(root, node);
11350                 err |= ret;
11351                 return err;
11352         }
11353
11354         nr = btrfs_header_nritems(node);
11355         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11356         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11357                 sizeof(struct btrfs_key_ptr);
11358
11359         /* Then check all its children */
11360         for (i = 0; i < nr; i++) {
11361                 u64 blocknr = btrfs_node_blockptr(node, i);
11362
11363                 btrfs_node_key_to_cpu(node, &key, i);
11364                 if (level == root->root_item.drop_level &&
11365                     is_dropped_key(&key, &drop_key))
11366                         continue;
11367
11368                 /*
11369                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11370                  * to call the function itself.
11371                  */
11372                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11373                 if (extent_buffer_uptodate(eb)) {
11374                         ret = traverse_tree_block(root, eb);
11375                         err |= ret;
11376                 }
11377                 free_extent_buffer(eb);
11378         }
11379
11380         return err;
11381 }
11382
11383 /*
11384  * Low memory usage version check_chunks_and_extents.
11385  */
11386 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11387 {
11388         struct btrfs_path path;
11389         struct btrfs_key key;
11390         struct btrfs_root *root1;
11391         struct btrfs_root *cur_root;
11392         int err = 0;
11393         int ret;
11394
11395         root1 = root->fs_info->chunk_root;
11396         ret = traverse_tree_block(root1, root1->node);
11397         err |= ret;
11398
11399         root1 = root->fs_info->tree_root;
11400         ret = traverse_tree_block(root1, root1->node);
11401         err |= ret;
11402
11403         btrfs_init_path(&path);
11404         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11405         key.offset = 0;
11406         key.type = BTRFS_ROOT_ITEM_KEY;
11407
11408         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11409         if (ret) {
11410                 error("cannot find extent treet in tree_root");
11411                 goto out;
11412         }
11413
11414         while (1) {
11415                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11416                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11417                         goto next;
11418                 key.offset = (u64)-1;
11419
11420                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11421                 if (IS_ERR(cur_root) || !cur_root) {
11422                         error("failed to read tree: %lld", key.objectid);
11423                         goto next;
11424                 }
11425
11426                 ret = traverse_tree_block(cur_root, cur_root->node);
11427                 err |= ret;
11428
11429 next:
11430                 ret = btrfs_next_item(root1, &path);
11431                 if (ret)
11432                         goto out;
11433         }
11434
11435 out:
11436         btrfs_release_path(&path);
11437         return err;
11438 }
11439
11440 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11441                            struct btrfs_root *root, int overwrite)
11442 {
11443         struct extent_buffer *c;
11444         struct extent_buffer *old = root->node;
11445         int level;
11446         int ret;
11447         struct btrfs_disk_key disk_key = {0,0,0};
11448
11449         level = 0;
11450
11451         if (overwrite) {
11452                 c = old;
11453                 extent_buffer_get(c);
11454                 goto init;
11455         }
11456         c = btrfs_alloc_free_block(trans, root,
11457                                    root->nodesize,
11458                                    root->root_key.objectid,
11459                                    &disk_key, level, 0, 0);
11460         if (IS_ERR(c)) {
11461                 c = old;
11462                 extent_buffer_get(c);
11463                 overwrite = 1;
11464         }
11465 init:
11466         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11467         btrfs_set_header_level(c, level);
11468         btrfs_set_header_bytenr(c, c->start);
11469         btrfs_set_header_generation(c, trans->transid);
11470         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11471         btrfs_set_header_owner(c, root->root_key.objectid);
11472
11473         write_extent_buffer(c, root->fs_info->fsid,
11474                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11475
11476         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11477                             btrfs_header_chunk_tree_uuid(c),
11478                             BTRFS_UUID_SIZE);
11479
11480         btrfs_mark_buffer_dirty(c);
11481         /*
11482          * this case can happen in the following case:
11483          *
11484          * 1.overwrite previous root.
11485          *
11486          * 2.reinit reloc data root, this is because we skip pin
11487          * down reloc data tree before which means we can allocate
11488          * same block bytenr here.
11489          */
11490         if (old->start == c->start) {
11491                 btrfs_set_root_generation(&root->root_item,
11492                                           trans->transid);
11493                 root->root_item.level = btrfs_header_level(root->node);
11494                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11495                                         &root->root_key, &root->root_item);
11496                 if (ret) {
11497                         free_extent_buffer(c);
11498                         return ret;
11499                 }
11500         }
11501         free_extent_buffer(old);
11502         root->node = c;
11503         add_root_to_dirty_list(root);
11504         return 0;
11505 }
11506
11507 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11508                                 struct extent_buffer *eb, int tree_root)
11509 {
11510         struct extent_buffer *tmp;
11511         struct btrfs_root_item *ri;
11512         struct btrfs_key key;
11513         u64 bytenr;
11514         u32 nodesize;
11515         int level = btrfs_header_level(eb);
11516         int nritems;
11517         int ret;
11518         int i;
11519
11520         /*
11521          * If we have pinned this block before, don't pin it again.
11522          * This can not only avoid forever loop with broken filesystem
11523          * but also give us some speedups.
11524          */
11525         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11526                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11527                 return 0;
11528
11529         btrfs_pin_extent(fs_info, eb->start, eb->len);
11530
11531         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11532         nritems = btrfs_header_nritems(eb);
11533         for (i = 0; i < nritems; i++) {
11534                 if (level == 0) {
11535                         btrfs_item_key_to_cpu(eb, &key, i);
11536                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11537                                 continue;
11538                         /* Skip the extent root and reloc roots */
11539                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11540                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11541                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11542                                 continue;
11543                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11544                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11545
11546                         /*
11547                          * If at any point we start needing the real root we
11548                          * will have to build a stump root for the root we are
11549                          * in, but for now this doesn't actually use the root so
11550                          * just pass in extent_root.
11551                          */
11552                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11553                                               nodesize, 0);
11554                         if (!extent_buffer_uptodate(tmp)) {
11555                                 fprintf(stderr, "Error reading root block\n");
11556                                 return -EIO;
11557                         }
11558                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11559                         free_extent_buffer(tmp);
11560                         if (ret)
11561                                 return ret;
11562                 } else {
11563                         bytenr = btrfs_node_blockptr(eb, i);
11564
11565                         /* If we aren't the tree root don't read the block */
11566                         if (level == 1 && !tree_root) {
11567                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11568                                 continue;
11569                         }
11570
11571                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11572                                               nodesize, 0);
11573                         if (!extent_buffer_uptodate(tmp)) {
11574                                 fprintf(stderr, "Error reading tree block\n");
11575                                 return -EIO;
11576                         }
11577                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11578                         free_extent_buffer(tmp);
11579                         if (ret)
11580                                 return ret;
11581                 }
11582         }
11583
11584         return 0;
11585 }
11586
11587 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11588 {
11589         int ret;
11590
11591         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11592         if (ret)
11593                 return ret;
11594
11595         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11596 }
11597
11598 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11599 {
11600         struct btrfs_block_group_cache *cache;
11601         struct btrfs_path path;
11602         struct extent_buffer *leaf;
11603         struct btrfs_chunk *chunk;
11604         struct btrfs_key key;
11605         int ret;
11606         u64 start;
11607
11608         btrfs_init_path(&path);
11609         key.objectid = 0;
11610         key.type = BTRFS_CHUNK_ITEM_KEY;
11611         key.offset = 0;
11612         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11613         if (ret < 0) {
11614                 btrfs_release_path(&path);
11615                 return ret;
11616         }
11617
11618         /*
11619          * We do this in case the block groups were screwed up and had alloc
11620          * bits that aren't actually set on the chunks.  This happens with
11621          * restored images every time and could happen in real life I guess.
11622          */
11623         fs_info->avail_data_alloc_bits = 0;
11624         fs_info->avail_metadata_alloc_bits = 0;
11625         fs_info->avail_system_alloc_bits = 0;
11626
11627         /* First we need to create the in-memory block groups */
11628         while (1) {
11629                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11630                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11631                         if (ret < 0) {
11632                                 btrfs_release_path(&path);
11633                                 return ret;
11634                         }
11635                         if (ret) {
11636                                 ret = 0;
11637                                 break;
11638                         }
11639                 }
11640                 leaf = path.nodes[0];
11641                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11642                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11643                         path.slots[0]++;
11644                         continue;
11645                 }
11646
11647                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11648                 btrfs_add_block_group(fs_info, 0,
11649                                       btrfs_chunk_type(leaf, chunk),
11650                                       key.objectid, key.offset,
11651                                       btrfs_chunk_length(leaf, chunk));
11652                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11653                                  key.offset + btrfs_chunk_length(leaf, chunk),
11654                                  GFP_NOFS);
11655                 path.slots[0]++;
11656         }
11657         start = 0;
11658         while (1) {
11659                 cache = btrfs_lookup_first_block_group(fs_info, start);
11660                 if (!cache)
11661                         break;
11662                 cache->cached = 1;
11663                 start = cache->key.objectid + cache->key.offset;
11664         }
11665
11666         btrfs_release_path(&path);
11667         return 0;
11668 }
11669
11670 static int reset_balance(struct btrfs_trans_handle *trans,
11671                          struct btrfs_fs_info *fs_info)
11672 {
11673         struct btrfs_root *root = fs_info->tree_root;
11674         struct btrfs_path path;
11675         struct extent_buffer *leaf;
11676         struct btrfs_key key;
11677         int del_slot, del_nr = 0;
11678         int ret;
11679         int found = 0;
11680
11681         btrfs_init_path(&path);
11682         key.objectid = BTRFS_BALANCE_OBJECTID;
11683         key.type = BTRFS_BALANCE_ITEM_KEY;
11684         key.offset = 0;
11685         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11686         if (ret) {
11687                 if (ret > 0)
11688                         ret = 0;
11689                 if (!ret)
11690                         goto reinit_data_reloc;
11691                 else
11692                         goto out;
11693         }
11694
11695         ret = btrfs_del_item(trans, root, &path);
11696         if (ret)
11697                 goto out;
11698         btrfs_release_path(&path);
11699
11700         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11701         key.type = BTRFS_ROOT_ITEM_KEY;
11702         key.offset = 0;
11703         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11704         if (ret < 0)
11705                 goto out;
11706         while (1) {
11707                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11708                         if (!found)
11709                                 break;
11710
11711                         if (del_nr) {
11712                                 ret = btrfs_del_items(trans, root, &path,
11713                                                       del_slot, del_nr);
11714                                 del_nr = 0;
11715                                 if (ret)
11716                                         goto out;
11717                         }
11718                         key.offset++;
11719                         btrfs_release_path(&path);
11720
11721                         found = 0;
11722                         ret = btrfs_search_slot(trans, root, &key, &path,
11723                                                 -1, 1);
11724                         if (ret < 0)
11725                                 goto out;
11726                         continue;
11727                 }
11728                 found = 1;
11729                 leaf = path.nodes[0];
11730                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11731                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11732                         break;
11733                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11734                         path.slots[0]++;
11735                         continue;
11736                 }
11737                 if (!del_nr) {
11738                         del_slot = path.slots[0];
11739                         del_nr = 1;
11740                 } else {
11741                         del_nr++;
11742                 }
11743                 path.slots[0]++;
11744         }
11745
11746         if (del_nr) {
11747                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11748                 if (ret)
11749                         goto out;
11750         }
11751         btrfs_release_path(&path);
11752
11753 reinit_data_reloc:
11754         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11755         key.type = BTRFS_ROOT_ITEM_KEY;
11756         key.offset = (u64)-1;
11757         root = btrfs_read_fs_root(fs_info, &key);
11758         if (IS_ERR(root)) {
11759                 fprintf(stderr, "Error reading data reloc tree\n");
11760                 ret = PTR_ERR(root);
11761                 goto out;
11762         }
11763         record_root_in_trans(trans, root);
11764         ret = btrfs_fsck_reinit_root(trans, root, 0);
11765         if (ret)
11766                 goto out;
11767         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11768 out:
11769         btrfs_release_path(&path);
11770         return ret;
11771 }
11772
11773 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11774                               struct btrfs_fs_info *fs_info)
11775 {
11776         u64 start = 0;
11777         int ret;
11778
11779         /*
11780          * The only reason we don't do this is because right now we're just
11781          * walking the trees we find and pinning down their bytes, we don't look
11782          * at any of the leaves.  In order to do mixed groups we'd have to check
11783          * the leaves of any fs roots and pin down the bytes for any file
11784          * extents we find.  Not hard but why do it if we don't have to?
11785          */
11786         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11787                 fprintf(stderr, "We don't support re-initing the extent tree "
11788                         "for mixed block groups yet, please notify a btrfs "
11789                         "developer you want to do this so they can add this "
11790                         "functionality.\n");
11791                 return -EINVAL;
11792         }
11793
11794         /*
11795          * first we need to walk all of the trees except the extent tree and pin
11796          * down the bytes that are in use so we don't overwrite any existing
11797          * metadata.
11798          */
11799         ret = pin_metadata_blocks(fs_info);
11800         if (ret) {
11801                 fprintf(stderr, "error pinning down used bytes\n");
11802                 return ret;
11803         }
11804
11805         /*
11806          * Need to drop all the block groups since we're going to recreate all
11807          * of them again.
11808          */
11809         btrfs_free_block_groups(fs_info);
11810         ret = reset_block_groups(fs_info);
11811         if (ret) {
11812                 fprintf(stderr, "error resetting the block groups\n");
11813                 return ret;
11814         }
11815
11816         /* Ok we can allocate now, reinit the extent root */
11817         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11818         if (ret) {
11819                 fprintf(stderr, "extent root initialization failed\n");
11820                 /*
11821                  * When the transaction code is updated we should end the
11822                  * transaction, but for now progs only knows about commit so
11823                  * just return an error.
11824                  */
11825                 return ret;
11826         }
11827
11828         /*
11829          * Now we have all the in-memory block groups setup so we can make
11830          * allocations properly, and the metadata we care about is safe since we
11831          * pinned all of it above.
11832          */
11833         while (1) {
11834                 struct btrfs_block_group_cache *cache;
11835
11836                 cache = btrfs_lookup_first_block_group(fs_info, start);
11837                 if (!cache)
11838                         break;
11839                 start = cache->key.objectid + cache->key.offset;
11840                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11841                                         &cache->key, &cache->item,
11842                                         sizeof(cache->item));
11843                 if (ret) {
11844                         fprintf(stderr, "Error adding block group\n");
11845                         return ret;
11846                 }
11847                 btrfs_extent_post_op(trans, fs_info->extent_root);
11848         }
11849
11850         ret = reset_balance(trans, fs_info);
11851         if (ret)
11852                 fprintf(stderr, "error resetting the pending balance\n");
11853
11854         return ret;
11855 }
11856
11857 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11858 {
11859         struct btrfs_path path;
11860         struct btrfs_trans_handle *trans;
11861         struct btrfs_key key;
11862         int ret;
11863
11864         printf("Recowing metadata block %llu\n", eb->start);
11865         key.objectid = btrfs_header_owner(eb);
11866         key.type = BTRFS_ROOT_ITEM_KEY;
11867         key.offset = (u64)-1;
11868
11869         root = btrfs_read_fs_root(root->fs_info, &key);
11870         if (IS_ERR(root)) {
11871                 fprintf(stderr, "Couldn't find owner root %llu\n",
11872                         key.objectid);
11873                 return PTR_ERR(root);
11874         }
11875
11876         trans = btrfs_start_transaction(root, 1);
11877         if (IS_ERR(trans))
11878                 return PTR_ERR(trans);
11879
11880         btrfs_init_path(&path);
11881         path.lowest_level = btrfs_header_level(eb);
11882         if (path.lowest_level)
11883                 btrfs_node_key_to_cpu(eb, &key, 0);
11884         else
11885                 btrfs_item_key_to_cpu(eb, &key, 0);
11886
11887         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11888         btrfs_commit_transaction(trans, root);
11889         btrfs_release_path(&path);
11890         return ret;
11891 }
11892
11893 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11894 {
11895         struct btrfs_path path;
11896         struct btrfs_trans_handle *trans;
11897         struct btrfs_key key;
11898         int ret;
11899
11900         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11901                bad->key.type, bad->key.offset);
11902         key.objectid = bad->root_id;
11903         key.type = BTRFS_ROOT_ITEM_KEY;
11904         key.offset = (u64)-1;
11905
11906         root = btrfs_read_fs_root(root->fs_info, &key);
11907         if (IS_ERR(root)) {
11908                 fprintf(stderr, "Couldn't find owner root %llu\n",
11909                         key.objectid);
11910                 return PTR_ERR(root);
11911         }
11912
11913         trans = btrfs_start_transaction(root, 1);
11914         if (IS_ERR(trans))
11915                 return PTR_ERR(trans);
11916
11917         btrfs_init_path(&path);
11918         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11919         if (ret) {
11920                 if (ret > 0)
11921                         ret = 0;
11922                 goto out;
11923         }
11924         ret = btrfs_del_item(trans, root, &path);
11925 out:
11926         btrfs_commit_transaction(trans, root);
11927         btrfs_release_path(&path);
11928         return ret;
11929 }
11930
11931 static int zero_log_tree(struct btrfs_root *root)
11932 {
11933         struct btrfs_trans_handle *trans;
11934         int ret;
11935
11936         trans = btrfs_start_transaction(root, 1);
11937         if (IS_ERR(trans)) {
11938                 ret = PTR_ERR(trans);
11939                 return ret;
11940         }
11941         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11942         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11943         ret = btrfs_commit_transaction(trans, root);
11944         return ret;
11945 }
11946
11947 static int populate_csum(struct btrfs_trans_handle *trans,
11948                          struct btrfs_root *csum_root, char *buf, u64 start,
11949                          u64 len)
11950 {
11951         u64 offset = 0;
11952         u64 sectorsize;
11953         int ret = 0;
11954
11955         while (offset < len) {
11956                 sectorsize = csum_root->sectorsize;
11957                 ret = read_extent_data(csum_root, buf, start + offset,
11958                                        &sectorsize, 0);
11959                 if (ret)
11960                         break;
11961                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
11962                                             start + offset, buf, sectorsize);
11963                 if (ret)
11964                         break;
11965                 offset += sectorsize;
11966         }
11967         return ret;
11968 }
11969
11970 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
11971                                       struct btrfs_root *csum_root,
11972                                       struct btrfs_root *cur_root)
11973 {
11974         struct btrfs_path path;
11975         struct btrfs_key key;
11976         struct extent_buffer *node;
11977         struct btrfs_file_extent_item *fi;
11978         char *buf = NULL;
11979         u64 start = 0;
11980         u64 len = 0;
11981         int slot = 0;
11982         int ret = 0;
11983
11984         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
11985         if (!buf)
11986                 return -ENOMEM;
11987
11988         btrfs_init_path(&path);
11989         key.objectid = 0;
11990         key.offset = 0;
11991         key.type = 0;
11992         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
11993         if (ret < 0)
11994                 goto out;
11995         /* Iterate all regular file extents and fill its csum */
11996         while (1) {
11997                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11998
11999                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12000                         goto next;
12001                 node = path.nodes[0];
12002                 slot = path.slots[0];
12003                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12004                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12005                         goto next;
12006                 start = btrfs_file_extent_disk_bytenr(node, fi);
12007                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12008
12009                 ret = populate_csum(trans, csum_root, buf, start, len);
12010                 if (ret == -EEXIST)
12011                         ret = 0;
12012                 if (ret < 0)
12013                         goto out;
12014 next:
12015                 /*
12016                  * TODO: if next leaf is corrupted, jump to nearest next valid
12017                  * leaf.
12018                  */
12019                 ret = btrfs_next_item(cur_root, &path);
12020                 if (ret < 0)
12021                         goto out;
12022                 if (ret > 0) {
12023                         ret = 0;
12024                         goto out;
12025                 }
12026         }
12027
12028 out:
12029         btrfs_release_path(&path);
12030         free(buf);
12031         return ret;
12032 }
12033
12034 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12035                                   struct btrfs_root *csum_root)
12036 {
12037         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12038         struct btrfs_path path;
12039         struct btrfs_root *tree_root = fs_info->tree_root;
12040         struct btrfs_root *cur_root;
12041         struct extent_buffer *node;
12042         struct btrfs_key key;
12043         int slot = 0;
12044         int ret = 0;
12045
12046         btrfs_init_path(&path);
12047         key.objectid = BTRFS_FS_TREE_OBJECTID;
12048         key.offset = 0;
12049         key.type = BTRFS_ROOT_ITEM_KEY;
12050         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12051         if (ret < 0)
12052                 goto out;
12053         if (ret > 0) {
12054                 ret = -ENOENT;
12055                 goto out;
12056         }
12057
12058         while (1) {
12059                 node = path.nodes[0];
12060                 slot = path.slots[0];
12061                 btrfs_item_key_to_cpu(node, &key, slot);
12062                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12063                         goto out;
12064                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12065                         goto next;
12066                 if (!is_fstree(key.objectid))
12067                         goto next;
12068                 key.offset = (u64)-1;
12069
12070                 cur_root = btrfs_read_fs_root(fs_info, &key);
12071                 if (IS_ERR(cur_root) || !cur_root) {
12072                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12073                                 key.objectid);
12074                         goto out;
12075                 }
12076                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12077                                 cur_root);
12078                 if (ret < 0)
12079                         goto out;
12080 next:
12081                 ret = btrfs_next_item(tree_root, &path);
12082                 if (ret > 0) {
12083                         ret = 0;
12084                         goto out;
12085                 }
12086                 if (ret < 0)
12087                         goto out;
12088         }
12089
12090 out:
12091         btrfs_release_path(&path);
12092         return ret;
12093 }
12094
12095 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12096                                       struct btrfs_root *csum_root)
12097 {
12098         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12099         struct btrfs_path path;
12100         struct btrfs_extent_item *ei;
12101         struct extent_buffer *leaf;
12102         char *buf;
12103         struct btrfs_key key;
12104         int ret;
12105
12106         btrfs_init_path(&path);
12107         key.objectid = 0;
12108         key.type = BTRFS_EXTENT_ITEM_KEY;
12109         key.offset = 0;
12110         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12111         if (ret < 0) {
12112                 btrfs_release_path(&path);
12113                 return ret;
12114         }
12115
12116         buf = malloc(csum_root->sectorsize);
12117         if (!buf) {
12118                 btrfs_release_path(&path);
12119                 return -ENOMEM;
12120         }
12121
12122         while (1) {
12123                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12124                         ret = btrfs_next_leaf(extent_root, &path);
12125                         if (ret < 0)
12126                                 break;
12127                         if (ret) {
12128                                 ret = 0;
12129                                 break;
12130                         }
12131                 }
12132                 leaf = path.nodes[0];
12133
12134                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12135                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12136                         path.slots[0]++;
12137                         continue;
12138                 }
12139
12140                 ei = btrfs_item_ptr(leaf, path.slots[0],
12141                                     struct btrfs_extent_item);
12142                 if (!(btrfs_extent_flags(leaf, ei) &
12143                       BTRFS_EXTENT_FLAG_DATA)) {
12144                         path.slots[0]++;
12145                         continue;
12146                 }
12147
12148                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12149                                     key.offset);
12150                 if (ret)
12151                         break;
12152                 path.slots[0]++;
12153         }
12154
12155         btrfs_release_path(&path);
12156         free(buf);
12157         return ret;
12158 }
12159
12160 /*
12161  * Recalculate the csum and put it into the csum tree.
12162  *
12163  * Extent tree init will wipe out all the extent info, so in that case, we
12164  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12165  * will use fs/subvol trees to init the csum tree.
12166  */
12167 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12168                           struct btrfs_root *csum_root,
12169                           int search_fs_tree)
12170 {
12171         if (search_fs_tree)
12172                 return fill_csum_tree_from_fs(trans, csum_root);
12173         else
12174                 return fill_csum_tree_from_extent(trans, csum_root);
12175 }
12176
12177 static void free_roots_info_cache(void)
12178 {
12179         if (!roots_info_cache)
12180                 return;
12181
12182         while (!cache_tree_empty(roots_info_cache)) {
12183                 struct cache_extent *entry;
12184                 struct root_item_info *rii;
12185
12186                 entry = first_cache_extent(roots_info_cache);
12187                 if (!entry)
12188                         break;
12189                 remove_cache_extent(roots_info_cache, entry);
12190                 rii = container_of(entry, struct root_item_info, cache_extent);
12191                 free(rii);
12192         }
12193
12194         free(roots_info_cache);
12195         roots_info_cache = NULL;
12196 }
12197
12198 static int build_roots_info_cache(struct btrfs_fs_info *info)
12199 {
12200         int ret = 0;
12201         struct btrfs_key key;
12202         struct extent_buffer *leaf;
12203         struct btrfs_path path;
12204
12205         if (!roots_info_cache) {
12206                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12207                 if (!roots_info_cache)
12208                         return -ENOMEM;
12209                 cache_tree_init(roots_info_cache);
12210         }
12211
12212         btrfs_init_path(&path);
12213         key.objectid = 0;
12214         key.type = BTRFS_EXTENT_ITEM_KEY;
12215         key.offset = 0;
12216         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12217         if (ret < 0)
12218                 goto out;
12219         leaf = path.nodes[0];
12220
12221         while (1) {
12222                 struct btrfs_key found_key;
12223                 struct btrfs_extent_item *ei;
12224                 struct btrfs_extent_inline_ref *iref;
12225                 int slot = path.slots[0];
12226                 int type;
12227                 u64 flags;
12228                 u64 root_id;
12229                 u8 level;
12230                 struct cache_extent *entry;
12231                 struct root_item_info *rii;
12232
12233                 if (slot >= btrfs_header_nritems(leaf)) {
12234                         ret = btrfs_next_leaf(info->extent_root, &path);
12235                         if (ret < 0) {
12236                                 break;
12237                         } else if (ret) {
12238                                 ret = 0;
12239                                 break;
12240                         }
12241                         leaf = path.nodes[0];
12242                         slot = path.slots[0];
12243                 }
12244
12245                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12246
12247                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12248                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12249                         goto next;
12250
12251                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12252                 flags = btrfs_extent_flags(leaf, ei);
12253
12254                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12255                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12256                         goto next;
12257
12258                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12259                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12260                         level = found_key.offset;
12261                 } else {
12262                         struct btrfs_tree_block_info *binfo;
12263
12264                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12265                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12266                         level = btrfs_tree_block_level(leaf, binfo);
12267                 }
12268
12269                 /*
12270                  * For a root extent, it must be of the following type and the
12271                  * first (and only one) iref in the item.
12272                  */
12273                 type = btrfs_extent_inline_ref_type(leaf, iref);
12274                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12275                         goto next;
12276
12277                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12278                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12279                 if (!entry) {
12280                         rii = malloc(sizeof(struct root_item_info));
12281                         if (!rii) {
12282                                 ret = -ENOMEM;
12283                                 goto out;
12284                         }
12285                         rii->cache_extent.start = root_id;
12286                         rii->cache_extent.size = 1;
12287                         rii->level = (u8)-1;
12288                         entry = &rii->cache_extent;
12289                         ret = insert_cache_extent(roots_info_cache, entry);
12290                         ASSERT(ret == 0);
12291                 } else {
12292                         rii = container_of(entry, struct root_item_info,
12293                                            cache_extent);
12294                 }
12295
12296                 ASSERT(rii->cache_extent.start == root_id);
12297                 ASSERT(rii->cache_extent.size == 1);
12298
12299                 if (level > rii->level || rii->level == (u8)-1) {
12300                         rii->level = level;
12301                         rii->bytenr = found_key.objectid;
12302                         rii->gen = btrfs_extent_generation(leaf, ei);
12303                         rii->node_count = 1;
12304                 } else if (level == rii->level) {
12305                         rii->node_count++;
12306                 }
12307 next:
12308                 path.slots[0]++;
12309         }
12310
12311 out:
12312         btrfs_release_path(&path);
12313
12314         return ret;
12315 }
12316
12317 static int maybe_repair_root_item(struct btrfs_fs_info *info,
12318                                   struct btrfs_path *path,
12319                                   const struct btrfs_key *root_key,
12320                                   const int read_only_mode)
12321 {
12322         const u64 root_id = root_key->objectid;
12323         struct cache_extent *entry;
12324         struct root_item_info *rii;
12325         struct btrfs_root_item ri;
12326         unsigned long offset;
12327
12328         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12329         if (!entry) {
12330                 fprintf(stderr,
12331                         "Error: could not find extent items for root %llu\n",
12332                         root_key->objectid);
12333                 return -ENOENT;
12334         }
12335
12336         rii = container_of(entry, struct root_item_info, cache_extent);
12337         ASSERT(rii->cache_extent.start == root_id);
12338         ASSERT(rii->cache_extent.size == 1);
12339
12340         if (rii->node_count != 1) {
12341                 fprintf(stderr,
12342                         "Error: could not find btree root extent for root %llu\n",
12343                         root_id);
12344                 return -ENOENT;
12345         }
12346
12347         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12348         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12349
12350         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12351             btrfs_root_level(&ri) != rii->level ||
12352             btrfs_root_generation(&ri) != rii->gen) {
12353
12354                 /*
12355                  * If we're in repair mode but our caller told us to not update
12356                  * the root item, i.e. just check if it needs to be updated, don't
12357                  * print this message, since the caller will call us again shortly
12358                  * for the same root item without read only mode (the caller will
12359                  * open a transaction first).
12360                  */
12361                 if (!(read_only_mode && repair))
12362                         fprintf(stderr,
12363                                 "%sroot item for root %llu,"
12364                                 " current bytenr %llu, current gen %llu, current level %u,"
12365                                 " new bytenr %llu, new gen %llu, new level %u\n",
12366                                 (read_only_mode ? "" : "fixing "),
12367                                 root_id,
12368                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12369                                 btrfs_root_level(&ri),
12370                                 rii->bytenr, rii->gen, rii->level);
12371
12372                 if (btrfs_root_generation(&ri) > rii->gen) {
12373                         fprintf(stderr,
12374                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12375                                 root_id, btrfs_root_generation(&ri), rii->gen);
12376                         return -EINVAL;
12377                 }
12378
12379                 if (!read_only_mode) {
12380                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12381                         btrfs_set_root_level(&ri, rii->level);
12382                         btrfs_set_root_generation(&ri, rii->gen);
12383                         write_extent_buffer(path->nodes[0], &ri,
12384                                             offset, sizeof(ri));
12385                 }
12386
12387                 return 1;
12388         }
12389
12390         return 0;
12391 }
12392
12393 /*
12394  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12395  * caused read-only snapshots to be corrupted if they were created at a moment
12396  * when the source subvolume/snapshot had orphan items. The issue was that the
12397  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12398  * node instead of the post orphan cleanup root node.
12399  * So this function, and its callees, just detects and fixes those cases. Even
12400  * though the regression was for read-only snapshots, this function applies to
12401  * any snapshot/subvolume root.
12402  * This must be run before any other repair code - not doing it so, makes other
12403  * repair code delete or modify backrefs in the extent tree for example, which
12404  * will result in an inconsistent fs after repairing the root items.
12405  */
12406 static int repair_root_items(struct btrfs_fs_info *info)
12407 {
12408         struct btrfs_path path;
12409         struct btrfs_key key;
12410         struct extent_buffer *leaf;
12411         struct btrfs_trans_handle *trans = NULL;
12412         int ret = 0;
12413         int bad_roots = 0;
12414         int need_trans = 0;
12415
12416         btrfs_init_path(&path);
12417
12418         ret = build_roots_info_cache(info);
12419         if (ret)
12420                 goto out;
12421
12422         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12423         key.type = BTRFS_ROOT_ITEM_KEY;
12424         key.offset = 0;
12425
12426 again:
12427         /*
12428          * Avoid opening and committing transactions if a leaf doesn't have
12429          * any root items that need to be fixed, so that we avoid rotating
12430          * backup roots unnecessarily.
12431          */
12432         if (need_trans) {
12433                 trans = btrfs_start_transaction(info->tree_root, 1);
12434                 if (IS_ERR(trans)) {
12435                         ret = PTR_ERR(trans);
12436                         goto out;
12437                 }
12438         }
12439
12440         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12441                                 0, trans ? 1 : 0);
12442         if (ret < 0)
12443                 goto out;
12444         leaf = path.nodes[0];
12445
12446         while (1) {
12447                 struct btrfs_key found_key;
12448
12449                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12450                         int no_more_keys = find_next_key(&path, &key);
12451
12452                         btrfs_release_path(&path);
12453                         if (trans) {
12454                                 ret = btrfs_commit_transaction(trans,
12455                                                                info->tree_root);
12456                                 trans = NULL;
12457                                 if (ret < 0)
12458                                         goto out;
12459                         }
12460                         need_trans = 0;
12461                         if (no_more_keys)
12462                                 break;
12463                         goto again;
12464                 }
12465
12466                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12467
12468                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12469                         goto next;
12470                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12471                         goto next;
12472
12473                 ret = maybe_repair_root_item(info, &path, &found_key,
12474                                              trans ? 0 : 1);
12475                 if (ret < 0)
12476                         goto out;
12477                 if (ret) {
12478                         if (!trans && repair) {
12479                                 need_trans = 1;
12480                                 key = found_key;
12481                                 btrfs_release_path(&path);
12482                                 goto again;
12483                         }
12484                         bad_roots++;
12485                 }
12486 next:
12487                 path.slots[0]++;
12488         }
12489         ret = 0;
12490 out:
12491         free_roots_info_cache();
12492         btrfs_release_path(&path);
12493         if (trans)
12494                 btrfs_commit_transaction(trans, info->tree_root);
12495         if (ret < 0)
12496                 return ret;
12497
12498         return bad_roots;
12499 }
12500
12501 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12502 {
12503         struct btrfs_trans_handle *trans;
12504         struct btrfs_block_group_cache *bg_cache;
12505         u64 current = 0;
12506         int ret = 0;
12507
12508         /* Clear all free space cache inodes and its extent data */
12509         while (1) {
12510                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12511                 if (!bg_cache)
12512                         break;
12513                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12514                 if (ret < 0)
12515                         return ret;
12516                 current = bg_cache->key.objectid + bg_cache->key.offset;
12517         }
12518
12519         /* Don't forget to set cache_generation to -1 */
12520         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12521         if (IS_ERR(trans)) {
12522                 error("failed to update super block cache generation");
12523                 return PTR_ERR(trans);
12524         }
12525         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12526         btrfs_commit_transaction(trans, fs_info->tree_root);
12527
12528         return ret;
12529 }
12530
12531 const char * const cmd_check_usage[] = {
12532         "btrfs check [options] <device>",
12533         "Check structural integrity of a filesystem (unmounted).",
12534         "Check structural integrity of an unmounted filesystem. Verify internal",
12535         "trees' consistency and item connectivity. In the repair mode try to",
12536         "fix the problems found. ",
12537         "WARNING: the repair mode is considered dangerous",
12538         "",
12539         "-s|--super <superblock>     use this superblock copy",
12540         "-b|--backup                 use the first valid backup root copy",
12541         "--repair                    try to repair the filesystem",
12542         "--readonly                  run in read-only mode (default)",
12543         "--init-csum-tree            create a new CRC tree",
12544         "--init-extent-tree          create a new extent tree",
12545         "--mode <MODE>               allows choice of memory/IO trade-offs",
12546         "                            where MODE is one of:",
12547         "                            original - read inodes and extents to memory (requires",
12548         "                                       more memory, does less IO)",
12549         "                            lowmem   - try to use less memory but read blocks again",
12550         "                                       when needed",
12551         "--check-data-csum           verify checksums of data blocks",
12552         "-Q|--qgroup-report          print a report on qgroup consistency",
12553         "-E|--subvol-extents <subvolid>",
12554         "                            print subvolume extents and sharing state",
12555         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12556         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12557         "-p|--progress               indicate progress",
12558         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12559         NULL
12560 };
12561
12562 int cmd_check(int argc, char **argv)
12563 {
12564         struct cache_tree root_cache;
12565         struct btrfs_root *root;
12566         struct btrfs_fs_info *info;
12567         u64 bytenr = 0;
12568         u64 subvolid = 0;
12569         u64 tree_root_bytenr = 0;
12570         u64 chunk_root_bytenr = 0;
12571         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12572         int ret;
12573         int err = 0;
12574         u64 num;
12575         int init_csum_tree = 0;
12576         int readonly = 0;
12577         int clear_space_cache = 0;
12578         int qgroup_report = 0;
12579         int qgroups_repaired = 0;
12580         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12581
12582         while(1) {
12583                 int c;
12584                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12585                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12586                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12587                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12588                 static const struct option long_options[] = {
12589                         { "super", required_argument, NULL, 's' },
12590                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12591                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12592                         { "init-csum-tree", no_argument, NULL,
12593                                 GETOPT_VAL_INIT_CSUM },
12594                         { "init-extent-tree", no_argument, NULL,
12595                                 GETOPT_VAL_INIT_EXTENT },
12596                         { "check-data-csum", no_argument, NULL,
12597                                 GETOPT_VAL_CHECK_CSUM },
12598                         { "backup", no_argument, NULL, 'b' },
12599                         { "subvol-extents", required_argument, NULL, 'E' },
12600                         { "qgroup-report", no_argument, NULL, 'Q' },
12601                         { "tree-root", required_argument, NULL, 'r' },
12602                         { "chunk-root", required_argument, NULL,
12603                                 GETOPT_VAL_CHUNK_TREE },
12604                         { "progress", no_argument, NULL, 'p' },
12605                         { "mode", required_argument, NULL,
12606                                 GETOPT_VAL_MODE },
12607                         { "clear-space-cache", required_argument, NULL,
12608                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12609                         { NULL, 0, NULL, 0}
12610                 };
12611
12612                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12613                 if (c < 0)
12614                         break;
12615                 switch(c) {
12616                         case 'a': /* ignored */ break;
12617                         case 'b':
12618                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12619                                 break;
12620                         case 's':
12621                                 num = arg_strtou64(optarg);
12622                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12623                                         error(
12624                                         "super mirror should be less than %d",
12625                                                 BTRFS_SUPER_MIRROR_MAX);
12626                                         exit(1);
12627                                 }
12628                                 bytenr = btrfs_sb_offset(((int)num));
12629                                 printf("using SB copy %llu, bytenr %llu\n", num,
12630                                        (unsigned long long)bytenr);
12631                                 break;
12632                         case 'Q':
12633                                 qgroup_report = 1;
12634                                 break;
12635                         case 'E':
12636                                 subvolid = arg_strtou64(optarg);
12637                                 break;
12638                         case 'r':
12639                                 tree_root_bytenr = arg_strtou64(optarg);
12640                                 break;
12641                         case GETOPT_VAL_CHUNK_TREE:
12642                                 chunk_root_bytenr = arg_strtou64(optarg);
12643                                 break;
12644                         case 'p':
12645                                 ctx.progress_enabled = true;
12646                                 break;
12647                         case '?':
12648                         case 'h':
12649                                 usage(cmd_check_usage);
12650                         case GETOPT_VAL_REPAIR:
12651                                 printf("enabling repair mode\n");
12652                                 repair = 1;
12653                                 ctree_flags |= OPEN_CTREE_WRITES;
12654                                 break;
12655                         case GETOPT_VAL_READONLY:
12656                                 readonly = 1;
12657                                 break;
12658                         case GETOPT_VAL_INIT_CSUM:
12659                                 printf("Creating a new CRC tree\n");
12660                                 init_csum_tree = 1;
12661                                 repair = 1;
12662                                 ctree_flags |= OPEN_CTREE_WRITES;
12663                                 break;
12664                         case GETOPT_VAL_INIT_EXTENT:
12665                                 init_extent_tree = 1;
12666                                 ctree_flags |= (OPEN_CTREE_WRITES |
12667                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12668                                 repair = 1;
12669                                 break;
12670                         case GETOPT_VAL_CHECK_CSUM:
12671                                 check_data_csum = 1;
12672                                 break;
12673                         case GETOPT_VAL_MODE:
12674                                 check_mode = parse_check_mode(optarg);
12675                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12676                                         error("unknown mode: %s", optarg);
12677                                         exit(1);
12678                                 }
12679                                 break;
12680                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12681                                 if (strcmp(optarg, "v1") == 0) {
12682                                         clear_space_cache = 1;
12683                                 } else if (strcmp(optarg, "v2") == 0) {
12684                                         clear_space_cache = 2;
12685                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12686                                 } else {
12687                                         error(
12688                 "invalid argument to --clear-space-cache, must be v1 or v2");
12689                                         exit(1);
12690                                 }
12691                                 ctree_flags |= OPEN_CTREE_WRITES;
12692                                 break;
12693                 }
12694         }
12695
12696         if (check_argc_exact(argc - optind, 1))
12697                 usage(cmd_check_usage);
12698
12699         if (ctx.progress_enabled) {
12700                 ctx.tp = TASK_NOTHING;
12701                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12702         }
12703
12704         /* This check is the only reason for --readonly to exist */
12705         if (readonly && repair) {
12706                 error("repair options are not compatible with --readonly");
12707                 exit(1);
12708         }
12709
12710         /*
12711          * Not supported yet
12712          */
12713         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12714                 error("low memory mode doesn't support repair yet");
12715                 exit(1);
12716         }
12717
12718         radix_tree_init();
12719         cache_tree_init(&root_cache);
12720
12721         if((ret = check_mounted(argv[optind])) < 0) {
12722                 error("could not check mount status: %s", strerror(-ret));
12723                 err |= !!ret;
12724                 goto err_out;
12725         } else if(ret) {
12726                 error("%s is currently mounted, aborting", argv[optind]);
12727                 ret = -EBUSY;
12728                 err |= !!ret;
12729                 goto err_out;
12730         }
12731
12732         /* only allow partial opening under repair mode */
12733         if (repair)
12734                 ctree_flags |= OPEN_CTREE_PARTIAL;
12735
12736         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12737                                   chunk_root_bytenr, ctree_flags);
12738         if (!info) {
12739                 error("cannot open file system");
12740                 ret = -EIO;
12741                 err |= !!ret;
12742                 goto err_out;
12743         }
12744
12745         global_info = info;
12746         root = info->fs_root;
12747         if (clear_space_cache == 1) {
12748                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12749                         error(
12750                 "free space cache v2 detected, use --clear-space-cache v2");
12751                         ret = 1;
12752                         goto close_out;
12753                 }
12754                 printf("Clearing free space cache\n");
12755                 ret = clear_free_space_cache(info);
12756                 if (ret) {
12757                         error("failed to clear free space cache");
12758                         ret = 1;
12759                 } else {
12760                         printf("Free space cache cleared\n");
12761                 }
12762                 goto close_out;
12763         } else if (clear_space_cache == 2) {
12764                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12765                         printf("no free space cache v2 to clear\n");
12766                         ret = 0;
12767                         goto close_out;
12768                 }
12769                 printf("Clear free space cache v2\n");
12770                 ret = btrfs_clear_free_space_tree(info);
12771                 if (ret) {
12772                         error("failed to clear free space cache v2: %d", ret);
12773                         ret = 1;
12774                 } else {
12775                         printf("free space cache v2 cleared\n");
12776                 }
12777                 goto close_out;
12778         }
12779
12780         /*
12781          * repair mode will force us to commit transaction which
12782          * will make us fail to load log tree when mounting.
12783          */
12784         if (repair && btrfs_super_log_root(info->super_copy)) {
12785                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12786                 if (!ret) {
12787                         ret = 1;
12788                         err |= !!ret;
12789                         goto close_out;
12790                 }
12791                 ret = zero_log_tree(root);
12792                 err |= !!ret;
12793                 if (ret) {
12794                         error("failed to zero log tree: %d", ret);
12795                         goto close_out;
12796                 }
12797         }
12798
12799         uuid_unparse(info->super_copy->fsid, uuidbuf);
12800         if (qgroup_report) {
12801                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12802                        uuidbuf);
12803                 ret = qgroup_verify_all(info);
12804                 err |= !!ret;
12805                 if (ret == 0)
12806                         report_qgroups(1);
12807                 goto close_out;
12808         }
12809         if (subvolid) {
12810                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12811                        subvolid, argv[optind], uuidbuf);
12812                 ret = print_extent_state(info, subvolid);
12813                 err |= !!ret;
12814                 goto close_out;
12815         }
12816         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12817
12818         if (!extent_buffer_uptodate(info->tree_root->node) ||
12819             !extent_buffer_uptodate(info->dev_root->node) ||
12820             !extent_buffer_uptodate(info->chunk_root->node)) {
12821                 error("critical roots corrupted, unable to check the filesystem");
12822                 err |= !!ret;
12823                 ret = -EIO;
12824                 goto close_out;
12825         }
12826
12827         if (init_extent_tree || init_csum_tree) {
12828                 struct btrfs_trans_handle *trans;
12829
12830                 trans = btrfs_start_transaction(info->extent_root, 0);
12831                 if (IS_ERR(trans)) {
12832                         error("error starting transaction");
12833                         ret = PTR_ERR(trans);
12834                         err |= !!ret;
12835                         goto close_out;
12836                 }
12837
12838                 if (init_extent_tree) {
12839                         printf("Creating a new extent tree\n");
12840                         ret = reinit_extent_tree(trans, info);
12841                         err |= !!ret;
12842                         if (ret)
12843                                 goto close_out;
12844                 }
12845
12846                 if (init_csum_tree) {
12847                         printf("Reinitialize checksum tree\n");
12848                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12849                         if (ret) {
12850                                 error("checksum tree initialization failed: %d",
12851                                                 ret);
12852                                 ret = -EIO;
12853                                 err |= !!ret;
12854                                 goto close_out;
12855                         }
12856
12857                         ret = fill_csum_tree(trans, info->csum_root,
12858                                              init_extent_tree);
12859                         err |= !!ret;
12860                         if (ret) {
12861                                 error("checksum tree refilling failed: %d", ret);
12862                                 return -EIO;
12863                         }
12864                 }
12865                 /*
12866                  * Ok now we commit and run the normal fsck, which will add
12867                  * extent entries for all of the items it finds.
12868                  */
12869                 ret = btrfs_commit_transaction(trans, info->extent_root);
12870                 err |= !!ret;
12871                 if (ret)
12872                         goto close_out;
12873         }
12874         if (!extent_buffer_uptodate(info->extent_root->node)) {
12875                 error("critical: extent_root, unable to check the filesystem");
12876                 ret = -EIO;
12877                 err |= !!ret;
12878                 goto close_out;
12879         }
12880         if (!extent_buffer_uptodate(info->csum_root->node)) {
12881                 error("critical: csum_root, unable to check the filesystem");
12882                 ret = -EIO;
12883                 err |= !!ret;
12884                 goto close_out;
12885         }
12886
12887         if (!ctx.progress_enabled)
12888                 fprintf(stderr, "checking extents\n");
12889         if (check_mode == CHECK_MODE_LOWMEM)
12890                 ret = check_chunks_and_extents_v2(root);
12891         else
12892                 ret = check_chunks_and_extents(root);
12893         err |= !!ret;
12894         if (ret)
12895                 error(
12896                 "errors found in extent allocation tree or chunk allocation");
12897
12898         ret = repair_root_items(info);
12899         err |= !!ret;
12900         if (ret < 0)
12901                 goto close_out;
12902         if (repair) {
12903                 fprintf(stderr, "Fixed %d roots.\n", ret);
12904                 ret = 0;
12905         } else if (ret > 0) {
12906                 fprintf(stderr,
12907                        "Found %d roots with an outdated root item.\n",
12908                        ret);
12909                 fprintf(stderr,
12910                         "Please run a filesystem check with the option --repair to fix them.\n");
12911                 ret = 1;
12912                 err |= !!ret;
12913                 goto close_out;
12914         }
12915
12916         if (!ctx.progress_enabled) {
12917                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
12918                         fprintf(stderr, "checking free space tree\n");
12919                 else
12920                         fprintf(stderr, "checking free space cache\n");
12921         }
12922         ret = check_space_cache(root);
12923         err |= !!ret;
12924         if (ret)
12925                 goto out;
12926
12927         /*
12928          * We used to have to have these hole extents in between our real
12929          * extents so if we don't have this flag set we need to make sure there
12930          * are no gaps in the file extents for inodes, otherwise we can just
12931          * ignore it when this happens.
12932          */
12933         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
12934         if (!ctx.progress_enabled)
12935                 fprintf(stderr, "checking fs roots\n");
12936         if (check_mode == CHECK_MODE_LOWMEM)
12937                 ret = check_fs_roots_v2(root->fs_info);
12938         else
12939                 ret = check_fs_roots(root, &root_cache);
12940         err |= !!ret;
12941         if (ret)
12942                 goto out;
12943
12944         fprintf(stderr, "checking csums\n");
12945         ret = check_csums(root);
12946         err |= !!ret;
12947         if (ret)
12948                 goto out;
12949
12950         fprintf(stderr, "checking root refs\n");
12951         /* For low memory mode, check_fs_roots_v2 handles root refs */
12952         if (check_mode != CHECK_MODE_LOWMEM) {
12953                 ret = check_root_refs(root, &root_cache);
12954                 err |= !!ret;
12955                 if (ret)
12956                         goto out;
12957         }
12958
12959         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
12960                 struct extent_buffer *eb;
12961
12962                 eb = list_first_entry(&root->fs_info->recow_ebs,
12963                                       struct extent_buffer, recow);
12964                 list_del_init(&eb->recow);
12965                 ret = recow_extent_buffer(root, eb);
12966                 err |= !!ret;
12967                 if (ret)
12968                         break;
12969         }
12970
12971         while (!list_empty(&delete_items)) {
12972                 struct bad_item *bad;
12973
12974                 bad = list_first_entry(&delete_items, struct bad_item, list);
12975                 list_del_init(&bad->list);
12976                 if (repair) {
12977                         ret = delete_bad_item(root, bad);
12978                         err |= !!ret;
12979                 }
12980                 free(bad);
12981         }
12982
12983         if (info->quota_enabled) {
12984                 fprintf(stderr, "checking quota groups\n");
12985                 ret = qgroup_verify_all(info);
12986                 err |= !!ret;
12987                 if (ret)
12988                         goto out;
12989                 report_qgroups(0);
12990                 ret = repair_qgroups(info, &qgroups_repaired);
12991                 err |= !!ret;
12992                 if (err)
12993                         goto out;
12994                 ret = 0;
12995         }
12996
12997         if (!list_empty(&root->fs_info->recow_ebs)) {
12998                 error("transid errors in file system");
12999                 ret = 1;
13000                 err |= !!ret;
13001         }
13002 out:
13003         if (found_old_backref) { /*
13004                  * there was a disk format change when mixed
13005                  * backref was in testing tree. The old format
13006                  * existed about one week.
13007                  */
13008                 printf("\n * Found old mixed backref format. "
13009                        "The old format is not supported! *"
13010                        "\n * Please mount the FS in readonly mode, "
13011                        "backup data and re-format the FS. *\n\n");
13012                 err |= 1;
13013         }
13014         printf("found %llu bytes used err is %d\n",
13015                (unsigned long long)bytes_used, ret);
13016         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13017         printf("total tree bytes: %llu\n",
13018                (unsigned long long)total_btree_bytes);
13019         printf("total fs tree bytes: %llu\n",
13020                (unsigned long long)total_fs_tree_bytes);
13021         printf("total extent tree bytes: %llu\n",
13022                (unsigned long long)total_extent_tree_bytes);
13023         printf("btree space waste bytes: %llu\n",
13024                (unsigned long long)btree_space_waste);
13025         printf("file data blocks allocated: %llu\n referenced %llu\n",
13026                 (unsigned long long)data_bytes_allocated,
13027                 (unsigned long long)data_bytes_referenced);
13028
13029         free_qgroup_counts();
13030         free_root_recs_tree(&root_cache);
13031 close_out:
13032         close_ctree(root);
13033 err_out:
13034         if (ctx.progress_enabled)
13035                 task_deinit(ctx.info);
13036
13037         return err;
13038 }