btrfs-progs: check: lowmem: Fix extent item size false alert
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         int level_size;
230         struct btrfs_key drop_key;
231 };
232
233 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
235 #define REF_ERR_NO_INODE_REF            (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
238 #define REF_ERR_DUP_INODE_REF           (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF             (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
246
247 struct file_extent_hole {
248         struct rb_node node;
249         u64 start;
250         u64 len;
251 };
252
253 struct inode_record {
254         struct list_head backrefs;
255         unsigned int checked:1;
256         unsigned int merging:1;
257         unsigned int found_inode_item:1;
258         unsigned int found_dir_item:1;
259         unsigned int found_file_extent:1;
260         unsigned int found_csum_item:1;
261         unsigned int some_csum_missing:1;
262         unsigned int nodatasum:1;
263         int errors;
264
265         u64 ino;
266         u32 nlink;
267         u32 imode;
268         u64 isize;
269         u64 nbytes;
270
271         u32 found_link;
272         u64 found_size;
273         u64 extent_start;
274         u64 extent_end;
275         struct rb_root holes;
276         struct list_head orphan_extents;
277
278         u32 refs;
279 };
280
281 #define I_ERR_NO_INODE_ITEM             (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
296
297 struct root_backref {
298         struct list_head list;
299         unsigned int found_dir_item:1;
300         unsigned int found_dir_index:1;
301         unsigned int found_back_ref:1;
302         unsigned int found_forward_ref:1;
303         unsigned int reachable:1;
304         int errors;
305         u64 ref_root;
306         u64 dir;
307         u64 index;
308         u16 namelen;
309         char name[0];
310 };
311
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 {
314         return list_entry(entry, struct root_backref, list);
315 }
316
317 struct root_record {
318         struct list_head backrefs;
319         struct cache_extent cache;
320         unsigned int found_root_item:1;
321         u64 objectid;
322         u32 found_ref;
323 };
324
325 struct ptr_node {
326         struct cache_extent cache;
327         void *data;
328 };
329
330 struct shared_node {
331         struct cache_extent cache;
332         struct cache_tree root_cache;
333         struct cache_tree inode_cache;
334         struct inode_record *current;
335         u32 refs;
336 };
337
338 struct block_info {
339         u64 start;
340         u32 size;
341 };
342
343 struct walk_control {
344         struct cache_tree shared;
345         struct shared_node *nodes[BTRFS_MAX_LEVEL];
346         int active_node;
347         int root_level;
348 };
349
350 struct bad_item {
351         struct btrfs_key key;
352         u64 root_id;
353         struct list_head list;
354 };
355
356 struct extent_entry {
357         u64 bytenr;
358         u64 bytes;
359         int count;
360         int broken;
361         struct list_head list;
362 };
363
364 struct root_item_info {
365         /* level of the root */
366         u8 level;
367         /* number of nodes at this level, must be 1 for a root */
368         int node_count;
369         u64 bytenr;
370         u64 gen;
371         struct cache_extent cache_extent;
372 };
373
374 /*
375  * Error bit for low memory mode check.
376  *
377  * Currently no caller cares about it yet.  Just internal use for error
378  * classification.
379  */
380 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH     (1 << 8)
390
391 static void *print_status_check(void *p)
392 {
393         struct task_ctx *priv = p;
394         const char work_indicator[] = { '.', 'o', 'O', 'o' };
395         uint32_t count = 0;
396         static char *task_position_string[] = {
397                 "checking extents",
398                 "checking free space cache",
399                 "checking fs roots",
400         };
401
402         task_period_start(priv->info, 1000 /* 1s */);
403
404         if (priv->tp == TASK_NOTHING)
405                 return NULL;
406
407         while (1) {
408                 printf("%s [%c]\r", task_position_string[priv->tp],
409                                 work_indicator[count % 4]);
410                 count++;
411                 fflush(stdout);
412                 task_period_wait(priv->info);
413         }
414         return NULL;
415 }
416
417 static int print_status_return(void *p)
418 {
419         printf("\n");
420         fflush(stdout);
421
422         return 0;
423 }
424
425 static enum btrfs_check_mode parse_check_mode(const char *str)
426 {
427         if (strcmp(str, "lowmem") == 0)
428                 return CHECK_MODE_LOWMEM;
429         if (strcmp(str, "orig") == 0)
430                 return CHECK_MODE_ORIGINAL;
431         if (strcmp(str, "original") == 0)
432                 return CHECK_MODE_ORIGINAL;
433
434         return CHECK_MODE_UNKNOWN;
435 }
436
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
439 {
440         struct file_extent_hole *hole;
441
442         if (RB_EMPTY_ROOT(holes))
443                 return (u64)-1;
444
445         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446         return hole->start;
447 }
448
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 {
451         struct file_extent_hole *hole1;
452         struct file_extent_hole *hole2;
453
454         hole1 = rb_entry(node1, struct file_extent_hole, node);
455         hole2 = rb_entry(node2, struct file_extent_hole, node);
456
457         if (hole1->start > hole2->start)
458                 return -1;
459         if (hole1->start < hole2->start)
460                 return 1;
461         /* Now hole1->start == hole2->start */
462         if (hole1->len >= hole2->len)
463                 /*
464                  * Hole 1 will be merge center
465                  * Same hole will be merged later
466                  */
467                 return -1;
468         /* Hole 2 will be merge center */
469         return 1;
470 }
471
472 /*
473  * Add a hole to the record
474  *
475  * This will do hole merge for copy_file_extent_holes(),
476  * which will ensure there won't be continuous holes.
477  */
478 static int add_file_extent_hole(struct rb_root *holes,
479                                 u64 start, u64 len)
480 {
481         struct file_extent_hole *hole;
482         struct file_extent_hole *prev = NULL;
483         struct file_extent_hole *next = NULL;
484
485         hole = malloc(sizeof(*hole));
486         if (!hole)
487                 return -ENOMEM;
488         hole->start = start;
489         hole->len = len;
490         /* Since compare will not return 0, no -EEXIST will happen */
491         rb_insert(holes, &hole->node, compare_hole);
492
493         /* simple merge with previous hole */
494         if (rb_prev(&hole->node))
495                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496                                 node);
497         if (prev && prev->start + prev->len >= hole->start) {
498                 hole->len = hole->start + hole->len - prev->start;
499                 hole->start = prev->start;
500                 rb_erase(&prev->node, holes);
501                 free(prev);
502                 prev = NULL;
503         }
504
505         /* iterate merge with next holes */
506         while (1) {
507                 if (!rb_next(&hole->node))
508                         break;
509                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510                                         node);
511                 if (hole->start + hole->len >= next->start) {
512                         if (hole->start + hole->len <= next->start + next->len)
513                                 hole->len = next->start + next->len -
514                                             hole->start;
515                         rb_erase(&next->node, holes);
516                         free(next);
517                         next = NULL;
518                 } else
519                         break;
520         }
521         return 0;
522 }
523
524 static int compare_hole_range(struct rb_node *node, void *data)
525 {
526         struct file_extent_hole *hole;
527         u64 start;
528
529         hole = (struct file_extent_hole *)data;
530         start = hole->start;
531
532         hole = rb_entry(node, struct file_extent_hole, node);
533         if (start < hole->start)
534                 return -1;
535         if (start >= hole->start && start < hole->start + hole->len)
536                 return 0;
537         return 1;
538 }
539
540 /*
541  * Delete a hole in the record
542  *
543  * This will do the hole split and is much restrict than add.
544  */
545 static int del_file_extent_hole(struct rb_root *holes,
546                                 u64 start, u64 len)
547 {
548         struct file_extent_hole *hole;
549         struct file_extent_hole tmp;
550         u64 prev_start = 0;
551         u64 prev_len = 0;
552         u64 next_start = 0;
553         u64 next_len = 0;
554         struct rb_node *node;
555         int have_prev = 0;
556         int have_next = 0;
557         int ret = 0;
558
559         tmp.start = start;
560         tmp.len = len;
561         node = rb_search(holes, &tmp, compare_hole_range, NULL);
562         if (!node)
563                 return -EEXIST;
564         hole = rb_entry(node, struct file_extent_hole, node);
565         if (start + len > hole->start + hole->len)
566                 return -EEXIST;
567
568         /*
569          * Now there will be no overlap, delete the hole and re-add the
570          * split(s) if they exists.
571          */
572         if (start > hole->start) {
573                 prev_start = hole->start;
574                 prev_len = start - hole->start;
575                 have_prev = 1;
576         }
577         if (hole->start + hole->len > start + len) {
578                 next_start = start + len;
579                 next_len = hole->start + hole->len - start - len;
580                 have_next = 1;
581         }
582         rb_erase(node, holes);
583         free(hole);
584         if (have_prev) {
585                 ret = add_file_extent_hole(holes, prev_start, prev_len);
586                 if (ret < 0)
587                         return ret;
588         }
589         if (have_next) {
590                 ret = add_file_extent_hole(holes, next_start, next_len);
591                 if (ret < 0)
592                         return ret;
593         }
594         return 0;
595 }
596
597 static int copy_file_extent_holes(struct rb_root *dst,
598                                   struct rb_root *src)
599 {
600         struct file_extent_hole *hole;
601         struct rb_node *node;
602         int ret = 0;
603
604         node = rb_first(src);
605         while (node) {
606                 hole = rb_entry(node, struct file_extent_hole, node);
607                 ret = add_file_extent_hole(dst, hole->start, hole->len);
608                 if (ret)
609                         break;
610                 node = rb_next(node);
611         }
612         return ret;
613 }
614
615 static void free_file_extent_holes(struct rb_root *holes)
616 {
617         struct rb_node *node;
618         struct file_extent_hole *hole;
619
620         node = rb_first(holes);
621         while (node) {
622                 hole = rb_entry(node, struct file_extent_hole, node);
623                 rb_erase(node, holes);
624                 free(hole);
625                 node = rb_first(holes);
626         }
627 }
628
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632                                  struct btrfs_root *root)
633 {
634         if (root->last_trans != trans->transid) {
635                 root->track_dirty = 1;
636                 root->last_trans = trans->transid;
637                 root->commit_root = root->node;
638                 extent_buffer_get(root->node);
639         }
640 }
641
642 static u8 imode_to_type(u32 imode)
643 {
644 #define S_SHIFT 12
645         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
647                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
648                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
649                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
650                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
651                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
652                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
653         };
654
655         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
657 }
658
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 {
661         struct device_record *rec1;
662         struct device_record *rec2;
663
664         rec1 = rb_entry(node1, struct device_record, node);
665         rec2 = rb_entry(node2, struct device_record, node);
666         if (rec1->devid > rec2->devid)
667                 return -1;
668         else if (rec1->devid < rec2->devid)
669                 return 1;
670         else
671                 return 0;
672 }
673
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 {
676         struct inode_record *rec;
677         struct inode_backref *backref;
678         struct inode_backref *orig;
679         struct inode_backref *tmp;
680         struct orphan_data_extent *src_orphan;
681         struct orphan_data_extent *dst_orphan;
682         struct rb_node *rb;
683         size_t size;
684         int ret;
685
686         rec = malloc(sizeof(*rec));
687         if (!rec)
688                 return ERR_PTR(-ENOMEM);
689         memcpy(rec, orig_rec, sizeof(*rec));
690         rec->refs = 1;
691         INIT_LIST_HEAD(&rec->backrefs);
692         INIT_LIST_HEAD(&rec->orphan_extents);
693         rec->holes = RB_ROOT;
694
695         list_for_each_entry(orig, &orig_rec->backrefs, list) {
696                 size = sizeof(*orig) + orig->namelen + 1;
697                 backref = malloc(size);
698                 if (!backref) {
699                         ret = -ENOMEM;
700                         goto cleanup;
701                 }
702                 memcpy(backref, orig, size);
703                 list_add_tail(&backref->list, &rec->backrefs);
704         }
705         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706                 dst_orphan = malloc(sizeof(*dst_orphan));
707                 if (!dst_orphan) {
708                         ret = -ENOMEM;
709                         goto cleanup;
710                 }
711                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713         }
714         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715         if (ret < 0)
716                 goto cleanup_rb;
717
718         return rec;
719
720 cleanup_rb:
721         rb = rb_first(&rec->holes);
722         while (rb) {
723                 struct file_extent_hole *hole;
724
725                 hole = rb_entry(rb, struct file_extent_hole, node);
726                 rb = rb_next(rb);
727                 free(hole);
728         }
729
730 cleanup:
731         if (!list_empty(&rec->backrefs))
732                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733                         list_del(&orig->list);
734                         free(orig);
735                 }
736
737         if (!list_empty(&rec->orphan_extents))
738                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739                         list_del(&orig->list);
740                         free(orig);
741                 }
742
743         free(rec);
744
745         return ERR_PTR(ret);
746 }
747
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749                                       u64 objectid)
750 {
751         struct orphan_data_extent *orphan;
752
753         if (list_empty(orphan_extents))
754                 return;
755         printf("The following data extent is lost in tree %llu:\n",
756                objectid);
757         list_for_each_entry(orphan, orphan_extents, list) {
758                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
760                        orphan->disk_len);
761         }
762 }
763
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 {
766         u64 root_objectid = root->root_key.objectid;
767         int errors = rec->errors;
768
769         if (!errors)
770                 return;
771         /* reloc root errors, we print its corresponding fs root objectid*/
772         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773                 root_objectid = root->root_key.offset;
774                 fprintf(stderr, "reloc");
775         }
776         fprintf(stderr, "root %llu inode %llu errors %x",
777                 (unsigned long long) root_objectid,
778                 (unsigned long long) rec->ino, rec->errors);
779
780         if (errors & I_ERR_NO_INODE_ITEM)
781                 fprintf(stderr, ", no inode item");
782         if (errors & I_ERR_NO_ORPHAN_ITEM)
783                 fprintf(stderr, ", no orphan item");
784         if (errors & I_ERR_DUP_INODE_ITEM)
785                 fprintf(stderr, ", dup inode item");
786         if (errors & I_ERR_DUP_DIR_INDEX)
787                 fprintf(stderr, ", dup dir index");
788         if (errors & I_ERR_ODD_DIR_ITEM)
789                 fprintf(stderr, ", odd dir item");
790         if (errors & I_ERR_ODD_FILE_EXTENT)
791                 fprintf(stderr, ", odd file extent");
792         if (errors & I_ERR_BAD_FILE_EXTENT)
793                 fprintf(stderr, ", bad file extent");
794         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795                 fprintf(stderr, ", file extent overlap");
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797                 fprintf(stderr, ", file extent discount");
798         if (errors & I_ERR_DIR_ISIZE_WRONG)
799                 fprintf(stderr, ", dir isize wrong");
800         if (errors & I_ERR_FILE_NBYTES_WRONG)
801                 fprintf(stderr, ", nbytes wrong");
802         if (errors & I_ERR_ODD_CSUM_ITEM)
803                 fprintf(stderr, ", odd csum item");
804         if (errors & I_ERR_SOME_CSUM_MISSING)
805                 fprintf(stderr, ", some csum missing");
806         if (errors & I_ERR_LINK_COUNT_WRONG)
807                 fprintf(stderr, ", link count wrong");
808         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809                 fprintf(stderr, ", orphan file extent");
810         fprintf(stderr, "\n");
811         /* Print the orphan extents if needed */
812         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814
815         /* Print the holes if needed */
816         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817                 struct file_extent_hole *hole;
818                 struct rb_node *node;
819                 int found = 0;
820
821                 node = rb_first(&rec->holes);
822                 fprintf(stderr, "Found file extent holes:\n");
823                 while (node) {
824                         found = 1;
825                         hole = rb_entry(node, struct file_extent_hole, node);
826                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
827                                 hole->start, hole->len);
828                         node = rb_next(node);
829                 }
830                 if (!found)
831                         fprintf(stderr, "\tstart: 0, len: %llu\n",
832                                 round_up(rec->isize, root->sectorsize));
833         }
834 }
835
836 static void print_ref_error(int errors)
837 {
838         if (errors & REF_ERR_NO_DIR_ITEM)
839                 fprintf(stderr, ", no dir item");
840         if (errors & REF_ERR_NO_DIR_INDEX)
841                 fprintf(stderr, ", no dir index");
842         if (errors & REF_ERR_NO_INODE_REF)
843                 fprintf(stderr, ", no inode ref");
844         if (errors & REF_ERR_DUP_DIR_ITEM)
845                 fprintf(stderr, ", dup dir item");
846         if (errors & REF_ERR_DUP_DIR_INDEX)
847                 fprintf(stderr, ", dup dir index");
848         if (errors & REF_ERR_DUP_INODE_REF)
849                 fprintf(stderr, ", dup inode ref");
850         if (errors & REF_ERR_INDEX_UNMATCH)
851                 fprintf(stderr, ", index mismatch");
852         if (errors & REF_ERR_FILETYPE_UNMATCH)
853                 fprintf(stderr, ", filetype mismatch");
854         if (errors & REF_ERR_NAME_TOO_LONG)
855                 fprintf(stderr, ", name too long");
856         if (errors & REF_ERR_NO_ROOT_REF)
857                 fprintf(stderr, ", no root ref");
858         if (errors & REF_ERR_NO_ROOT_BACKREF)
859                 fprintf(stderr, ", no root backref");
860         if (errors & REF_ERR_DUP_ROOT_REF)
861                 fprintf(stderr, ", dup root ref");
862         if (errors & REF_ERR_DUP_ROOT_BACKREF)
863                 fprintf(stderr, ", dup root backref");
864         fprintf(stderr, "\n");
865 }
866
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
868                                           u64 ino, int mod)
869 {
870         struct ptr_node *node;
871         struct cache_extent *cache;
872         struct inode_record *rec = NULL;
873         int ret;
874
875         cache = lookup_cache_extent(inode_cache, ino, 1);
876         if (cache) {
877                 node = container_of(cache, struct ptr_node, cache);
878                 rec = node->data;
879                 if (mod && rec->refs > 1) {
880                         node->data = clone_inode_rec(rec);
881                         if (IS_ERR(node->data))
882                                 return node->data;
883                         rec->refs--;
884                         rec = node->data;
885                 }
886         } else if (mod) {
887                 rec = calloc(1, sizeof(*rec));
888                 if (!rec)
889                         return ERR_PTR(-ENOMEM);
890                 rec->ino = ino;
891                 rec->extent_start = (u64)-1;
892                 rec->refs = 1;
893                 INIT_LIST_HEAD(&rec->backrefs);
894                 INIT_LIST_HEAD(&rec->orphan_extents);
895                 rec->holes = RB_ROOT;
896
897                 node = malloc(sizeof(*node));
898                 if (!node) {
899                         free(rec);
900                         return ERR_PTR(-ENOMEM);
901                 }
902                 node->cache.start = ino;
903                 node->cache.size = 1;
904                 node->data = rec;
905
906                 if (ino == BTRFS_FREE_INO_OBJECTID)
907                         rec->found_link = 1;
908
909                 ret = insert_cache_extent(inode_cache, &node->cache);
910                 if (ret)
911                         return ERR_PTR(-EEXIST);
912         }
913         return rec;
914 }
915
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
917 {
918         struct orphan_data_extent *orphan;
919
920         while (!list_empty(orphan_extents)) {
921                 orphan = list_entry(orphan_extents->next,
922                                     struct orphan_data_extent, list);
923                 list_del(&orphan->list);
924                 free(orphan);
925         }
926 }
927
928 static void free_inode_rec(struct inode_record *rec)
929 {
930         struct inode_backref *backref;
931
932         if (--rec->refs > 0)
933                 return;
934
935         while (!list_empty(&rec->backrefs)) {
936                 backref = to_inode_backref(rec->backrefs.next);
937                 list_del(&backref->list);
938                 free(backref);
939         }
940         free_orphan_data_extents(&rec->orphan_extents);
941         free_file_extent_holes(&rec->holes);
942         free(rec);
943 }
944
945 static int can_free_inode_rec(struct inode_record *rec)
946 {
947         if (!rec->errors && rec->checked && rec->found_inode_item &&
948             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
949                 return 1;
950         return 0;
951 }
952
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954                                  struct inode_record *rec)
955 {
956         struct cache_extent *cache;
957         struct inode_backref *tmp, *backref;
958         struct ptr_node *node;
959         u8 filetype;
960
961         if (!rec->found_inode_item)
962                 return;
963
964         filetype = imode_to_type(rec->imode);
965         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966                 if (backref->found_dir_item && backref->found_dir_index) {
967                         if (backref->filetype != filetype)
968                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969                         if (!backref->errors && backref->found_inode_ref &&
970                             rec->nlink == rec->found_link) {
971                                 list_del(&backref->list);
972                                 free(backref);
973                         }
974                 }
975         }
976
977         if (!rec->checked || rec->merging)
978                 return;
979
980         if (S_ISDIR(rec->imode)) {
981                 if (rec->found_size != rec->isize)
982                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983                 if (rec->found_file_extent)
984                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
985         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986                 if (rec->found_dir_item)
987                         rec->errors |= I_ERR_ODD_DIR_ITEM;
988                 if (rec->found_size != rec->nbytes)
989                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990                 if (rec->nlink > 0 && !no_holes &&
991                     (rec->extent_end < rec->isize ||
992                      first_extent_gap(&rec->holes) < rec->isize))
993                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
994         }
995
996         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997                 if (rec->found_csum_item && rec->nodatasum)
998                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
999                 if (rec->some_csum_missing && !rec->nodatasum)
1000                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1001         }
1002
1003         BUG_ON(rec->refs != 1);
1004         if (can_free_inode_rec(rec)) {
1005                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006                 node = container_of(cache, struct ptr_node, cache);
1007                 BUG_ON(node->data != rec);
1008                 remove_cache_extent(inode_cache, &node->cache);
1009                 free(node);
1010                 free_inode_rec(rec);
1011         }
1012 }
1013
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1015 {
1016         struct btrfs_path path;
1017         struct btrfs_key key;
1018         int ret;
1019
1020         key.objectid = BTRFS_ORPHAN_OBJECTID;
1021         key.type = BTRFS_ORPHAN_ITEM_KEY;
1022         key.offset = ino;
1023
1024         btrfs_init_path(&path);
1025         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026         btrfs_release_path(&path);
1027         if (ret > 0)
1028                 ret = -ENOENT;
1029         return ret;
1030 }
1031
1032 static int process_inode_item(struct extent_buffer *eb,
1033                               int slot, struct btrfs_key *key,
1034                               struct shared_node *active_node)
1035 {
1036         struct inode_record *rec;
1037         struct btrfs_inode_item *item;
1038
1039         rec = active_node->current;
1040         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041         if (rec->found_inode_item) {
1042                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1043                 return 1;
1044         }
1045         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046         rec->nlink = btrfs_inode_nlink(eb, item);
1047         rec->isize = btrfs_inode_size(eb, item);
1048         rec->nbytes = btrfs_inode_nbytes(eb, item);
1049         rec->imode = btrfs_inode_mode(eb, item);
1050         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051                 rec->nodatasum = 1;
1052         rec->found_inode_item = 1;
1053         if (rec->nlink == 0)
1054                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055         maybe_free_inode_rec(&active_node->inode_cache, rec);
1056         return 0;
1057 }
1058
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060                                                 const char *name,
1061                                                 int namelen, u64 dir)
1062 {
1063         struct inode_backref *backref;
1064
1065         list_for_each_entry(backref, &rec->backrefs, list) {
1066                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067                         break;
1068                 if (backref->dir != dir || backref->namelen != namelen)
1069                         continue;
1070                 if (memcmp(name, backref->name, namelen))
1071                         continue;
1072                 return backref;
1073         }
1074
1075         backref = malloc(sizeof(*backref) + namelen + 1);
1076         if (!backref)
1077                 return NULL;
1078         memset(backref, 0, sizeof(*backref));
1079         backref->dir = dir;
1080         backref->namelen = namelen;
1081         memcpy(backref->name, name, namelen);
1082         backref->name[namelen] = '\0';
1083         list_add_tail(&backref->list, &rec->backrefs);
1084         return backref;
1085 }
1086
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088                              u64 ino, u64 dir, u64 index,
1089                              const char *name, int namelen,
1090                              u8 filetype, u8 itemtype, int errors)
1091 {
1092         struct inode_record *rec;
1093         struct inode_backref *backref;
1094
1095         rec = get_inode_rec(inode_cache, ino, 1);
1096         BUG_ON(IS_ERR(rec));
1097         backref = get_inode_backref(rec, name, namelen, dir);
1098         BUG_ON(!backref);
1099         if (errors)
1100                 backref->errors |= errors;
1101         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102                 if (backref->found_dir_index)
1103                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104                 if (backref->found_inode_ref && backref->index != index)
1105                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1106                 if (backref->found_dir_item && backref->filetype != filetype)
1107                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1108
1109                 backref->index = index;
1110                 backref->filetype = filetype;
1111                 backref->found_dir_index = 1;
1112         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113                 rec->found_link++;
1114                 if (backref->found_dir_item)
1115                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116                 if (backref->found_dir_index && backref->filetype != filetype)
1117                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1118
1119                 backref->filetype = filetype;
1120                 backref->found_dir_item = 1;
1121         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123                 if (backref->found_inode_ref)
1124                         backref->errors |= REF_ERR_DUP_INODE_REF;
1125                 if (backref->found_dir_index && backref->index != index)
1126                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1127                 else
1128                         backref->index = index;
1129
1130                 backref->ref_type = itemtype;
1131                 backref->found_inode_ref = 1;
1132         } else {
1133                 BUG_ON(1);
1134         }
1135
1136         maybe_free_inode_rec(inode_cache, rec);
1137         return 0;
1138 }
1139
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141                             struct cache_tree *dst_cache)
1142 {
1143         struct inode_backref *backref;
1144         u32 dir_count = 0;
1145         int ret = 0;
1146
1147         dst->merging = 1;
1148         list_for_each_entry(backref, &src->backrefs, list) {
1149                 if (backref->found_dir_index) {
1150                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1151                                         backref->index, backref->name,
1152                                         backref->namelen, backref->filetype,
1153                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1154                 }
1155                 if (backref->found_dir_item) {
1156                         dir_count++;
1157                         add_inode_backref(dst_cache, dst->ino,
1158                                         backref->dir, 0, backref->name,
1159                                         backref->namelen, backref->filetype,
1160                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1161                 }
1162                 if (backref->found_inode_ref) {
1163                         add_inode_backref(dst_cache, dst->ino,
1164                                         backref->dir, backref->index,
1165                                         backref->name, backref->namelen, 0,
1166                                         backref->ref_type, backref->errors);
1167                 }
1168         }
1169
1170         if (src->found_dir_item)
1171                 dst->found_dir_item = 1;
1172         if (src->found_file_extent)
1173                 dst->found_file_extent = 1;
1174         if (src->found_csum_item)
1175                 dst->found_csum_item = 1;
1176         if (src->some_csum_missing)
1177                 dst->some_csum_missing = 1;
1178         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1180                 if (ret < 0)
1181                         return ret;
1182         }
1183
1184         BUG_ON(src->found_link < dir_count);
1185         dst->found_link += src->found_link - dir_count;
1186         dst->found_size += src->found_size;
1187         if (src->extent_start != (u64)-1) {
1188                 if (dst->extent_start == (u64)-1) {
1189                         dst->extent_start = src->extent_start;
1190                         dst->extent_end = src->extent_end;
1191                 } else {
1192                         if (dst->extent_end > src->extent_start)
1193                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194                         else if (dst->extent_end < src->extent_start) {
1195                                 ret = add_file_extent_hole(&dst->holes,
1196                                         dst->extent_end,
1197                                         src->extent_start - dst->extent_end);
1198                         }
1199                         if (dst->extent_end < src->extent_end)
1200                                 dst->extent_end = src->extent_end;
1201                 }
1202         }
1203
1204         dst->errors |= src->errors;
1205         if (src->found_inode_item) {
1206                 if (!dst->found_inode_item) {
1207                         dst->nlink = src->nlink;
1208                         dst->isize = src->isize;
1209                         dst->nbytes = src->nbytes;
1210                         dst->imode = src->imode;
1211                         dst->nodatasum = src->nodatasum;
1212                         dst->found_inode_item = 1;
1213                 } else {
1214                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1215                 }
1216         }
1217         dst->merging = 0;
1218
1219         return 0;
1220 }
1221
1222 static int splice_shared_node(struct shared_node *src_node,
1223                               struct shared_node *dst_node)
1224 {
1225         struct cache_extent *cache;
1226         struct ptr_node *node, *ins;
1227         struct cache_tree *src, *dst;
1228         struct inode_record *rec, *conflict;
1229         u64 current_ino = 0;
1230         int splice = 0;
1231         int ret;
1232
1233         if (--src_node->refs == 0)
1234                 splice = 1;
1235         if (src_node->current)
1236                 current_ino = src_node->current->ino;
1237
1238         src = &src_node->root_cache;
1239         dst = &dst_node->root_cache;
1240 again:
1241         cache = search_cache_extent(src, 0);
1242         while (cache) {
1243                 node = container_of(cache, struct ptr_node, cache);
1244                 rec = node->data;
1245                 cache = next_cache_extent(cache);
1246
1247                 if (splice) {
1248                         remove_cache_extent(src, &node->cache);
1249                         ins = node;
1250                 } else {
1251                         ins = malloc(sizeof(*ins));
1252                         BUG_ON(!ins);
1253                         ins->cache.start = node->cache.start;
1254                         ins->cache.size = node->cache.size;
1255                         ins->data = rec;
1256                         rec->refs++;
1257                 }
1258                 ret = insert_cache_extent(dst, &ins->cache);
1259                 if (ret == -EEXIST) {
1260                         conflict = get_inode_rec(dst, rec->ino, 1);
1261                         BUG_ON(IS_ERR(conflict));
1262                         merge_inode_recs(rec, conflict, dst);
1263                         if (rec->checked) {
1264                                 conflict->checked = 1;
1265                                 if (dst_node->current == conflict)
1266                                         dst_node->current = NULL;
1267                         }
1268                         maybe_free_inode_rec(dst, conflict);
1269                         free_inode_rec(rec);
1270                         free(ins);
1271                 } else {
1272                         BUG_ON(ret);
1273                 }
1274         }
1275
1276         if (src == &src_node->root_cache) {
1277                 src = &src_node->inode_cache;
1278                 dst = &dst_node->inode_cache;
1279                 goto again;
1280         }
1281
1282         if (current_ino > 0 && (!dst_node->current ||
1283             current_ino > dst_node->current->ino)) {
1284                 if (dst_node->current) {
1285                         dst_node->current->checked = 1;
1286                         maybe_free_inode_rec(dst, dst_node->current);
1287                 }
1288                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289                 BUG_ON(IS_ERR(dst_node->current));
1290         }
1291         return 0;
1292 }
1293
1294 static void free_inode_ptr(struct cache_extent *cache)
1295 {
1296         struct ptr_node *node;
1297         struct inode_record *rec;
1298
1299         node = container_of(cache, struct ptr_node, cache);
1300         rec = node->data;
1301         free_inode_rec(rec);
1302         free(node);
1303 }
1304
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1306
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1308                                             u64 bytenr)
1309 {
1310         struct cache_extent *cache;
1311         struct shared_node *node;
1312
1313         cache = lookup_cache_extent(shared, bytenr, 1);
1314         if (cache) {
1315                 node = container_of(cache, struct shared_node, cache);
1316                 return node;
1317         }
1318         return NULL;
1319 }
1320
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1322 {
1323         int ret;
1324         struct shared_node *node;
1325
1326         node = calloc(1, sizeof(*node));
1327         if (!node)
1328                 return -ENOMEM;
1329         node->cache.start = bytenr;
1330         node->cache.size = 1;
1331         cache_tree_init(&node->root_cache);
1332         cache_tree_init(&node->inode_cache);
1333         node->refs = refs;
1334
1335         ret = insert_cache_extent(shared, &node->cache);
1336
1337         return ret;
1338 }
1339
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341                              struct walk_control *wc, int level)
1342 {
1343         struct shared_node *node;
1344         struct shared_node *dest;
1345         int ret;
1346
1347         if (level == wc->active_node)
1348                 return 0;
1349
1350         BUG_ON(wc->active_node <= level);
1351         node = find_shared_node(&wc->shared, bytenr);
1352         if (!node) {
1353                 ret = add_shared_node(&wc->shared, bytenr, refs);
1354                 BUG_ON(ret);
1355                 node = find_shared_node(&wc->shared, bytenr);
1356                 wc->nodes[level] = node;
1357                 wc->active_node = level;
1358                 return 0;
1359         }
1360
1361         if (wc->root_level == wc->active_node &&
1362             btrfs_root_refs(&root->root_item) == 0) {
1363                 if (--node->refs == 0) {
1364                         free_inode_recs_tree(&node->root_cache);
1365                         free_inode_recs_tree(&node->inode_cache);
1366                         remove_cache_extent(&wc->shared, &node->cache);
1367                         free(node);
1368                 }
1369                 return 1;
1370         }
1371
1372         dest = wc->nodes[wc->active_node];
1373         splice_shared_node(node, dest);
1374         if (node->refs == 0) {
1375                 remove_cache_extent(&wc->shared, &node->cache);
1376                 free(node);
1377         }
1378         return 1;
1379 }
1380
1381 static int leave_shared_node(struct btrfs_root *root,
1382                              struct walk_control *wc, int level)
1383 {
1384         struct shared_node *node;
1385         struct shared_node *dest;
1386         int i;
1387
1388         if (level == wc->root_level)
1389                 return 0;
1390
1391         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1392                 if (wc->nodes[i])
1393                         break;
1394         }
1395         BUG_ON(i >= BTRFS_MAX_LEVEL);
1396
1397         node = wc->nodes[wc->active_node];
1398         wc->nodes[wc->active_node] = NULL;
1399         wc->active_node = i;
1400
1401         dest = wc->nodes[wc->active_node];
1402         if (wc->active_node < wc->root_level ||
1403             btrfs_root_refs(&root->root_item) > 0) {
1404                 BUG_ON(node->refs <= 1);
1405                 splice_shared_node(node, dest);
1406         } else {
1407                 BUG_ON(node->refs < 2);
1408                 node->refs--;
1409         }
1410         return 0;
1411 }
1412
1413 /*
1414  * Returns:
1415  * < 0 - on error
1416  * 1   - if the root with id child_root_id is a child of root parent_root_id
1417  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1418  *       has other root(s) as parent(s)
1419  * 2   - if the root child_root_id doesn't have any parent roots
1420  */
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1422                          u64 child_root_id)
1423 {
1424         struct btrfs_path path;
1425         struct btrfs_key key;
1426         struct extent_buffer *leaf;
1427         int has_parent = 0;
1428         int ret;
1429
1430         btrfs_init_path(&path);
1431
1432         key.objectid = parent_root_id;
1433         key.type = BTRFS_ROOT_REF_KEY;
1434         key.offset = child_root_id;
1435         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1436                                 0, 0);
1437         if (ret < 0)
1438                 return ret;
1439         btrfs_release_path(&path);
1440         if (!ret)
1441                 return 1;
1442
1443         key.objectid = child_root_id;
1444         key.type = BTRFS_ROOT_BACKREF_KEY;
1445         key.offset = 0;
1446         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1447                                 0, 0);
1448         if (ret < 0)
1449                 goto out;
1450
1451         while (1) {
1452                 leaf = path.nodes[0];
1453                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1455                         if (ret)
1456                                 break;
1457                         leaf = path.nodes[0];
1458                 }
1459
1460                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461                 if (key.objectid != child_root_id ||
1462                     key.type != BTRFS_ROOT_BACKREF_KEY)
1463                         break;
1464
1465                 has_parent = 1;
1466
1467                 if (key.offset == parent_root_id) {
1468                         btrfs_release_path(&path);
1469                         return 1;
1470                 }
1471
1472                 path.slots[0]++;
1473         }
1474 out:
1475         btrfs_release_path(&path);
1476         if (ret < 0)
1477                 return ret;
1478         return has_parent ? 0 : 2;
1479 }
1480
1481 static int process_dir_item(struct extent_buffer *eb,
1482                             int slot, struct btrfs_key *key,
1483                             struct shared_node *active_node)
1484 {
1485         u32 total;
1486         u32 cur = 0;
1487         u32 len;
1488         u32 name_len;
1489         u32 data_len;
1490         int error;
1491         int nritems = 0;
1492         u8 filetype;
1493         struct btrfs_dir_item *di;
1494         struct inode_record *rec;
1495         struct cache_tree *root_cache;
1496         struct cache_tree *inode_cache;
1497         struct btrfs_key location;
1498         char namebuf[BTRFS_NAME_LEN];
1499
1500         root_cache = &active_node->root_cache;
1501         inode_cache = &active_node->inode_cache;
1502         rec = active_node->current;
1503         rec->found_dir_item = 1;
1504
1505         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506         total = btrfs_item_size_nr(eb, slot);
1507         while (cur < total) {
1508                 nritems++;
1509                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510                 name_len = btrfs_dir_name_len(eb, di);
1511                 data_len = btrfs_dir_data_len(eb, di);
1512                 filetype = btrfs_dir_type(eb, di);
1513
1514                 rec->found_size += name_len;
1515                 if (name_len <= BTRFS_NAME_LEN) {
1516                         len = name_len;
1517                         error = 0;
1518                 } else {
1519                         len = BTRFS_NAME_LEN;
1520                         error = REF_ERR_NAME_TOO_LONG;
1521                 }
1522                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1523
1524                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525                         add_inode_backref(inode_cache, location.objectid,
1526                                           key->objectid, key->offset, namebuf,
1527                                           len, filetype, key->type, error);
1528                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529                         add_inode_backref(root_cache, location.objectid,
1530                                           key->objectid, key->offset,
1531                                           namebuf, len, filetype,
1532                                           key->type, error);
1533                 } else {
1534                         fprintf(stderr, "invalid location in dir item %u\n",
1535                                 location.type);
1536                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537                                           key->objectid, key->offset, namebuf,
1538                                           len, filetype, key->type, error);
1539                 }
1540
1541                 len = sizeof(*di) + name_len + data_len;
1542                 di = (struct btrfs_dir_item *)((char *)di + len);
1543                 cur += len;
1544         }
1545         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1547
1548         return 0;
1549 }
1550
1551 static int process_inode_ref(struct extent_buffer *eb,
1552                              int slot, struct btrfs_key *key,
1553                              struct shared_node *active_node)
1554 {
1555         u32 total;
1556         u32 cur = 0;
1557         u32 len;
1558         u32 name_len;
1559         u64 index;
1560         int error;
1561         struct cache_tree *inode_cache;
1562         struct btrfs_inode_ref *ref;
1563         char namebuf[BTRFS_NAME_LEN];
1564
1565         inode_cache = &active_node->inode_cache;
1566
1567         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568         total = btrfs_item_size_nr(eb, slot);
1569         while (cur < total) {
1570                 name_len = btrfs_inode_ref_name_len(eb, ref);
1571                 index = btrfs_inode_ref_index(eb, ref);
1572                 if (name_len <= BTRFS_NAME_LEN) {
1573                         len = name_len;
1574                         error = 0;
1575                 } else {
1576                         len = BTRFS_NAME_LEN;
1577                         error = REF_ERR_NAME_TOO_LONG;
1578                 }
1579                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1580                 add_inode_backref(inode_cache, key->objectid, key->offset,
1581                                   index, namebuf, len, 0, key->type, error);
1582
1583                 len = sizeof(*ref) + name_len;
1584                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1585                 cur += len;
1586         }
1587         return 0;
1588 }
1589
1590 static int process_inode_extref(struct extent_buffer *eb,
1591                                 int slot, struct btrfs_key *key,
1592                                 struct shared_node *active_node)
1593 {
1594         u32 total;
1595         u32 cur = 0;
1596         u32 len;
1597         u32 name_len;
1598         u64 index;
1599         u64 parent;
1600         int error;
1601         struct cache_tree *inode_cache;
1602         struct btrfs_inode_extref *extref;
1603         char namebuf[BTRFS_NAME_LEN];
1604
1605         inode_cache = &active_node->inode_cache;
1606
1607         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1608         total = btrfs_item_size_nr(eb, slot);
1609         while (cur < total) {
1610                 name_len = btrfs_inode_extref_name_len(eb, extref);
1611                 index = btrfs_inode_extref_index(eb, extref);
1612                 parent = btrfs_inode_extref_parent(eb, extref);
1613                 if (name_len <= BTRFS_NAME_LEN) {
1614                         len = name_len;
1615                         error = 0;
1616                 } else {
1617                         len = BTRFS_NAME_LEN;
1618                         error = REF_ERR_NAME_TOO_LONG;
1619                 }
1620                 read_extent_buffer(eb, namebuf,
1621                                    (unsigned long)(extref + 1), len);
1622                 add_inode_backref(inode_cache, key->objectid, parent,
1623                                   index, namebuf, len, 0, key->type, error);
1624
1625                 len = sizeof(*extref) + name_len;
1626                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1627                 cur += len;
1628         }
1629         return 0;
1630
1631 }
1632
1633 static int count_csum_range(struct btrfs_root *root, u64 start,
1634                             u64 len, u64 *found)
1635 {
1636         struct btrfs_key key;
1637         struct btrfs_path path;
1638         struct extent_buffer *leaf;
1639         int ret;
1640         size_t size;
1641         *found = 0;
1642         u64 csum_end;
1643         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1644
1645         btrfs_init_path(&path);
1646
1647         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1648         key.offset = start;
1649         key.type = BTRFS_EXTENT_CSUM_KEY;
1650
1651         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1652                                 &key, &path, 0, 0);
1653         if (ret < 0)
1654                 goto out;
1655         if (ret > 0 && path.slots[0] > 0) {
1656                 leaf = path.nodes[0];
1657                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1658                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1659                     key.type == BTRFS_EXTENT_CSUM_KEY)
1660                         path.slots[0]--;
1661         }
1662
1663         while (len > 0) {
1664                 leaf = path.nodes[0];
1665                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1666                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1667                         if (ret > 0)
1668                                 break;
1669                         else if (ret < 0)
1670                                 goto out;
1671                         leaf = path.nodes[0];
1672                 }
1673
1674                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1675                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1676                     key.type != BTRFS_EXTENT_CSUM_KEY)
1677                         break;
1678
1679                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1680                 if (key.offset >= start + len)
1681                         break;
1682
1683                 if (key.offset > start)
1684                         start = key.offset;
1685
1686                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1687                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1688                 if (csum_end > start) {
1689                         size = min(csum_end - start, len);
1690                         len -= size;
1691                         start += size;
1692                         *found += size;
1693                 }
1694
1695                 path.slots[0]++;
1696         }
1697 out:
1698         btrfs_release_path(&path);
1699         if (ret < 0)
1700                 return ret;
1701         return 0;
1702 }
1703
1704 static int process_file_extent(struct btrfs_root *root,
1705                                 struct extent_buffer *eb,
1706                                 int slot, struct btrfs_key *key,
1707                                 struct shared_node *active_node)
1708 {
1709         struct inode_record *rec;
1710         struct btrfs_file_extent_item *fi;
1711         u64 num_bytes = 0;
1712         u64 disk_bytenr = 0;
1713         u64 extent_offset = 0;
1714         u64 mask = root->sectorsize - 1;
1715         int extent_type;
1716         int ret;
1717
1718         rec = active_node->current;
1719         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1720         rec->found_file_extent = 1;
1721
1722         if (rec->extent_start == (u64)-1) {
1723                 rec->extent_start = key->offset;
1724                 rec->extent_end = key->offset;
1725         }
1726
1727         if (rec->extent_end > key->offset)
1728                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1729         else if (rec->extent_end < key->offset) {
1730                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1731                                            key->offset - rec->extent_end);
1732                 if (ret < 0)
1733                         return ret;
1734         }
1735
1736         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1737         extent_type = btrfs_file_extent_type(eb, fi);
1738
1739         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1740                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1741                 if (num_bytes == 0)
1742                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1743                 rec->found_size += num_bytes;
1744                 num_bytes = (num_bytes + mask) & ~mask;
1745         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1746                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1747                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1748                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1749                 extent_offset = btrfs_file_extent_offset(eb, fi);
1750                 if (num_bytes == 0 || (num_bytes & mask))
1751                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752                 if (num_bytes + extent_offset >
1753                     btrfs_file_extent_ram_bytes(eb, fi))
1754                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1755                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1756                     (btrfs_file_extent_compression(eb, fi) ||
1757                      btrfs_file_extent_encryption(eb, fi) ||
1758                      btrfs_file_extent_other_encoding(eb, fi)))
1759                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760                 if (disk_bytenr > 0)
1761                         rec->found_size += num_bytes;
1762         } else {
1763                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1764         }
1765         rec->extent_end = key->offset + num_bytes;
1766
1767         /*
1768          * The data reloc tree will copy full extents into its inode and then
1769          * copy the corresponding csums.  Because the extent it copied could be
1770          * a preallocated extent that hasn't been written to yet there may be no
1771          * csums to copy, ergo we won't have csums for our file extent.  This is
1772          * ok so just don't bother checking csums if the inode belongs to the
1773          * data reloc tree.
1774          */
1775         if (disk_bytenr > 0 &&
1776             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1777                 u64 found;
1778                 if (btrfs_file_extent_compression(eb, fi))
1779                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1780                 else
1781                         disk_bytenr += extent_offset;
1782
1783                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1784                 if (ret < 0)
1785                         return ret;
1786                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1787                         if (found > 0)
1788                                 rec->found_csum_item = 1;
1789                         if (found < num_bytes)
1790                                 rec->some_csum_missing = 1;
1791                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1792                         if (found > 0)
1793                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1794                 }
1795         }
1796         return 0;
1797 }
1798
1799 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1800                             struct walk_control *wc)
1801 {
1802         struct btrfs_key key;
1803         u32 nritems;
1804         int i;
1805         int ret = 0;
1806         struct cache_tree *inode_cache;
1807         struct shared_node *active_node;
1808
1809         if (wc->root_level == wc->active_node &&
1810             btrfs_root_refs(&root->root_item) == 0)
1811                 return 0;
1812
1813         active_node = wc->nodes[wc->active_node];
1814         inode_cache = &active_node->inode_cache;
1815         nritems = btrfs_header_nritems(eb);
1816         for (i = 0; i < nritems; i++) {
1817                 btrfs_item_key_to_cpu(eb, &key, i);
1818
1819                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1820                         continue;
1821                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1822                         continue;
1823
1824                 if (active_node->current == NULL ||
1825                     active_node->current->ino < key.objectid) {
1826                         if (active_node->current) {
1827                                 active_node->current->checked = 1;
1828                                 maybe_free_inode_rec(inode_cache,
1829                                                      active_node->current);
1830                         }
1831                         active_node->current = get_inode_rec(inode_cache,
1832                                                              key.objectid, 1);
1833                         BUG_ON(IS_ERR(active_node->current));
1834                 }
1835                 switch (key.type) {
1836                 case BTRFS_DIR_ITEM_KEY:
1837                 case BTRFS_DIR_INDEX_KEY:
1838                         ret = process_dir_item(eb, i, &key, active_node);
1839                         break;
1840                 case BTRFS_INODE_REF_KEY:
1841                         ret = process_inode_ref(eb, i, &key, active_node);
1842                         break;
1843                 case BTRFS_INODE_EXTREF_KEY:
1844                         ret = process_inode_extref(eb, i, &key, active_node);
1845                         break;
1846                 case BTRFS_INODE_ITEM_KEY:
1847                         ret = process_inode_item(eb, i, &key, active_node);
1848                         break;
1849                 case BTRFS_EXTENT_DATA_KEY:
1850                         ret = process_file_extent(root, eb, i, &key,
1851                                                   active_node);
1852                         break;
1853                 default:
1854                         break;
1855                 };
1856         }
1857         return ret;
1858 }
1859
1860 struct node_refs {
1861         u64 bytenr[BTRFS_MAX_LEVEL];
1862         u64 refs[BTRFS_MAX_LEVEL];
1863         int need_check[BTRFS_MAX_LEVEL];
1864 };
1865
1866 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1867                              struct node_refs *nrefs, u64 level);
1868 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1869                             unsigned int ext_ref);
1870
1871 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1872                                struct node_refs *nrefs, int *level, int ext_ref)
1873 {
1874         struct extent_buffer *cur = path->nodes[0];
1875         struct btrfs_key key;
1876         u64 cur_bytenr;
1877         u32 nritems;
1878         u64 first_ino = 0;
1879         int root_level = btrfs_header_level(root->node);
1880         int i;
1881         int ret = 0; /* Final return value */
1882         int err = 0; /* Positive error bitmap */
1883
1884         cur_bytenr = cur->start;
1885
1886         /* skip to first inode item or the first inode number change */
1887         nritems = btrfs_header_nritems(cur);
1888         for (i = 0; i < nritems; i++) {
1889                 btrfs_item_key_to_cpu(cur, &key, i);
1890                 if (i == 0)
1891                         first_ino = key.objectid;
1892                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1893                     (first_ino && first_ino != key.objectid))
1894                         break;
1895         }
1896         if (i == nritems) {
1897                 path->slots[0] = nritems;
1898                 return 0;
1899         }
1900         path->slots[0] = i;
1901
1902 again:
1903         err |= check_inode_item(root, path, ext_ref);
1904
1905         if (err & LAST_ITEM)
1906                 goto out;
1907
1908         /* still have inode items in thie leaf */
1909         if (cur->start == cur_bytenr)
1910                 goto again;
1911
1912         /*
1913          * we have switched to another leaf, above nodes may
1914          * have changed, here walk down the path, if a node
1915          * or leaf is shared, check whether we can skip this
1916          * node or leaf.
1917          */
1918         for (i = root_level; i >= 0; i--) {
1919                 if (path->nodes[i]->start == nrefs->bytenr[i])
1920                         continue;
1921
1922                 ret = update_nodes_refs(root,
1923                                 path->nodes[i]->start,
1924                                 nrefs, i);
1925                 if (ret)
1926                         goto out;
1927
1928                 if (!nrefs->need_check[i]) {
1929                         *level += 1;
1930                         break;
1931                 }
1932         }
1933
1934         for (i = 0; i < *level; i++) {
1935                 free_extent_buffer(path->nodes[i]);
1936                 path->nodes[i] = NULL;
1937         }
1938 out:
1939         err &= ~LAST_ITEM;
1940         /*
1941          * Convert any error bitmap to -EIO, as we should avoid
1942          * mixing positive and negative return value to represent
1943          * error
1944          */
1945         if (err && !ret)
1946                 ret = -EIO;
1947         return ret;
1948 }
1949
1950 static void reada_walk_down(struct btrfs_root *root,
1951                             struct extent_buffer *node, int slot)
1952 {
1953         u64 bytenr;
1954         u64 ptr_gen;
1955         u32 nritems;
1956         u32 blocksize;
1957         int i;
1958         int level;
1959
1960         level = btrfs_header_level(node);
1961         if (level != 1)
1962                 return;
1963
1964         nritems = btrfs_header_nritems(node);
1965         blocksize = root->nodesize;
1966         for (i = slot; i < nritems; i++) {
1967                 bytenr = btrfs_node_blockptr(node, i);
1968                 ptr_gen = btrfs_node_ptr_generation(node, i);
1969                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1970         }
1971 }
1972
1973 /*
1974  * Check the child node/leaf by the following condition:
1975  * 1. the first item key of the node/leaf should be the same with the one
1976  *    in parent.
1977  * 2. block in parent node should match the child node/leaf.
1978  * 3. generation of parent node and child's header should be consistent.
1979  *
1980  * Or the child node/leaf pointed by the key in parent is not valid.
1981  *
1982  * We hope to check leaf owner too, but since subvol may share leaves,
1983  * which makes leaf owner check not so strong, key check should be
1984  * sufficient enough for that case.
1985  */
1986 static int check_child_node(struct extent_buffer *parent, int slot,
1987                             struct extent_buffer *child)
1988 {
1989         struct btrfs_key parent_key;
1990         struct btrfs_key child_key;
1991         int ret = 0;
1992
1993         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1994         if (btrfs_header_level(child) == 0)
1995                 btrfs_item_key_to_cpu(child, &child_key, 0);
1996         else
1997                 btrfs_node_key_to_cpu(child, &child_key, 0);
1998
1999         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2000                 ret = -EINVAL;
2001                 fprintf(stderr,
2002                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2003                         parent_key.objectid, parent_key.type, parent_key.offset,
2004                         child_key.objectid, child_key.type, child_key.offset);
2005         }
2006         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2007                 ret = -EINVAL;
2008                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2009                         btrfs_node_blockptr(parent, slot),
2010                         btrfs_header_bytenr(child));
2011         }
2012         if (btrfs_node_ptr_generation(parent, slot) !=
2013             btrfs_header_generation(child)) {
2014                 ret = -EINVAL;
2015                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2016                         btrfs_header_generation(child),
2017                         btrfs_node_ptr_generation(parent, slot));
2018         }
2019         return ret;
2020 }
2021
2022 /*
2023  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2024  * in every fs or file tree check. Here we find its all root ids, and only check
2025  * it in the fs or file tree which has the smallest root id.
2026  */
2027 static int need_check(struct btrfs_root *root, struct ulist *roots)
2028 {
2029         struct rb_node *node;
2030         struct ulist_node *u;
2031
2032         if (roots->nnodes == 1)
2033                 return 1;
2034
2035         node = rb_first(&roots->root);
2036         u = rb_entry(node, struct ulist_node, rb_node);
2037         /*
2038          * current root id is not smallest, we skip it and let it be checked
2039          * in the fs or file tree who hash the smallest root id.
2040          */
2041         if (root->objectid != u->val)
2042                 return 0;
2043
2044         return 1;
2045 }
2046
2047 /*
2048  * for a tree node or leaf, we record its reference count, so later if we still
2049  * process this node or leaf, don't need to compute its reference count again.
2050  */
2051 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2052                              struct node_refs *nrefs, u64 level)
2053 {
2054         int check, ret;
2055         u64 refs;
2056         struct ulist *roots;
2057
2058         if (nrefs->bytenr[level] != bytenr) {
2059                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2060                                        level, 1, &refs, NULL);
2061                 if (ret < 0)
2062                         return ret;
2063
2064                 nrefs->bytenr[level] = bytenr;
2065                 nrefs->refs[level] = refs;
2066                 if (refs > 1) {
2067                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2068                                                    0, &roots);
2069                         if (ret)
2070                                 return -EIO;
2071
2072                         check = need_check(root, roots);
2073                         ulist_free(roots);
2074                         nrefs->need_check[level] = check;
2075                 } else {
2076                         nrefs->need_check[level] = 1;
2077                 }
2078         }
2079
2080         return 0;
2081 }
2082
2083 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2084                           struct walk_control *wc, int *level,
2085                           struct node_refs *nrefs)
2086 {
2087         enum btrfs_tree_block_status status;
2088         u64 bytenr;
2089         u64 ptr_gen;
2090         struct extent_buffer *next;
2091         struct extent_buffer *cur;
2092         u32 blocksize;
2093         int ret, err = 0;
2094         u64 refs;
2095
2096         WARN_ON(*level < 0);
2097         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2098
2099         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2100                 refs = nrefs->refs[*level];
2101                 ret = 0;
2102         } else {
2103                 ret = btrfs_lookup_extent_info(NULL, root,
2104                                        path->nodes[*level]->start,
2105                                        *level, 1, &refs, NULL);
2106                 if (ret < 0) {
2107                         err = ret;
2108                         goto out;
2109                 }
2110                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2111                 nrefs->refs[*level] = refs;
2112         }
2113
2114         if (refs > 1) {
2115                 ret = enter_shared_node(root, path->nodes[*level]->start,
2116                                         refs, wc, *level);
2117                 if (ret > 0) {
2118                         err = ret;
2119                         goto out;
2120                 }
2121         }
2122
2123         while (*level >= 0) {
2124                 WARN_ON(*level < 0);
2125                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2126                 cur = path->nodes[*level];
2127
2128                 if (btrfs_header_level(cur) != *level)
2129                         WARN_ON(1);
2130
2131                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2132                         break;
2133                 if (*level == 0) {
2134                         ret = process_one_leaf(root, cur, wc);
2135                         if (ret < 0)
2136                                 err = ret;
2137                         break;
2138                 }
2139                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2140                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2141                 blocksize = root->nodesize;
2142
2143                 if (bytenr == nrefs->bytenr[*level - 1]) {
2144                         refs = nrefs->refs[*level - 1];
2145                 } else {
2146                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2147                                         *level - 1, 1, &refs, NULL);
2148                         if (ret < 0) {
2149                                 refs = 0;
2150                         } else {
2151                                 nrefs->bytenr[*level - 1] = bytenr;
2152                                 nrefs->refs[*level - 1] = refs;
2153                         }
2154                 }
2155
2156                 if (refs > 1) {
2157                         ret = enter_shared_node(root, bytenr, refs,
2158                                                 wc, *level - 1);
2159                         if (ret > 0) {
2160                                 path->slots[*level]++;
2161                                 continue;
2162                         }
2163                 }
2164
2165                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2166                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2167                         free_extent_buffer(next);
2168                         reada_walk_down(root, cur, path->slots[*level]);
2169                         next = read_tree_block(root, bytenr, blocksize,
2170                                                ptr_gen);
2171                         if (!extent_buffer_uptodate(next)) {
2172                                 struct btrfs_key node_key;
2173
2174                                 btrfs_node_key_to_cpu(path->nodes[*level],
2175                                                       &node_key,
2176                                                       path->slots[*level]);
2177                                 btrfs_add_corrupt_extent_record(root->fs_info,
2178                                                 &node_key,
2179                                                 path->nodes[*level]->start,
2180                                                 root->nodesize, *level);
2181                                 err = -EIO;
2182                                 goto out;
2183                         }
2184                 }
2185
2186                 ret = check_child_node(cur, path->slots[*level], next);
2187                 if (ret) {
2188                         err = ret;
2189                         goto out;
2190                 }
2191
2192                 if (btrfs_is_leaf(next))
2193                         status = btrfs_check_leaf(root, NULL, next);
2194                 else
2195                         status = btrfs_check_node(root, NULL, next);
2196                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2197                         free_extent_buffer(next);
2198                         err = -EIO;
2199                         goto out;
2200                 }
2201
2202                 *level = *level - 1;
2203                 free_extent_buffer(path->nodes[*level]);
2204                 path->nodes[*level] = next;
2205                 path->slots[*level] = 0;
2206         }
2207 out:
2208         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2209         return err;
2210 }
2211
2212 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2213                             unsigned int ext_ref);
2214
2215 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2216                              int *level, struct node_refs *nrefs, int ext_ref)
2217 {
2218         enum btrfs_tree_block_status status;
2219         u64 bytenr;
2220         u64 ptr_gen;
2221         struct extent_buffer *next;
2222         struct extent_buffer *cur;
2223         u32 blocksize;
2224         int ret;
2225
2226         WARN_ON(*level < 0);
2227         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2228
2229         ret = update_nodes_refs(root, path->nodes[*level]->start,
2230                                 nrefs, *level);
2231         if (ret < 0)
2232                 return ret;
2233
2234         while (*level >= 0) {
2235                 WARN_ON(*level < 0);
2236                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2237                 cur = path->nodes[*level];
2238
2239                 if (btrfs_header_level(cur) != *level)
2240                         WARN_ON(1);
2241
2242                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2243                         break;
2244                 /* Don't forgot to check leaf/node validation */
2245                 if (*level == 0) {
2246                         ret = btrfs_check_leaf(root, NULL, cur);
2247                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2248                                 ret = -EIO;
2249                                 break;
2250                         }
2251                         ret = process_one_leaf_v2(root, path, nrefs,
2252                                                   level, ext_ref);
2253                         break;
2254                 } else {
2255                         ret = btrfs_check_node(root, NULL, cur);
2256                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2257                                 ret = -EIO;
2258                                 break;
2259                         }
2260                 }
2261                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2262                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2263                 blocksize = root->nodesize;
2264
2265                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2266                 if (ret)
2267                         break;
2268                 if (!nrefs->need_check[*level - 1]) {
2269                         path->slots[*level]++;
2270                         continue;
2271                 }
2272
2273                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2274                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2275                         free_extent_buffer(next);
2276                         reada_walk_down(root, cur, path->slots[*level]);
2277                         next = read_tree_block(root, bytenr, blocksize,
2278                                                ptr_gen);
2279                         if (!extent_buffer_uptodate(next)) {
2280                                 struct btrfs_key node_key;
2281
2282                                 btrfs_node_key_to_cpu(path->nodes[*level],
2283                                                       &node_key,
2284                                                       path->slots[*level]);
2285                                 btrfs_add_corrupt_extent_record(root->fs_info,
2286                                                 &node_key,
2287                                                 path->nodes[*level]->start,
2288                                                 root->nodesize, *level);
2289                                 ret = -EIO;
2290                                 break;
2291                         }
2292                 }
2293
2294                 ret = check_child_node(cur, path->slots[*level], next);
2295                 if (ret < 0) 
2296                         break;
2297
2298                 if (btrfs_is_leaf(next))
2299                         status = btrfs_check_leaf(root, NULL, next);
2300                 else
2301                         status = btrfs_check_node(root, NULL, next);
2302                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2303                         free_extent_buffer(next);
2304                         ret = -EIO;
2305                         break;
2306                 }
2307
2308                 *level = *level - 1;
2309                 free_extent_buffer(path->nodes[*level]);
2310                 path->nodes[*level] = next;
2311                 path->slots[*level] = 0;
2312         }
2313         return ret;
2314 }
2315
2316 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2317                         struct walk_control *wc, int *level)
2318 {
2319         int i;
2320         struct extent_buffer *leaf;
2321
2322         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2323                 leaf = path->nodes[i];
2324                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2325                         path->slots[i]++;
2326                         *level = i;
2327                         return 0;
2328                 } else {
2329                         free_extent_buffer(path->nodes[*level]);
2330                         path->nodes[*level] = NULL;
2331                         BUG_ON(*level > wc->active_node);
2332                         if (*level == wc->active_node)
2333                                 leave_shared_node(root, wc, *level);
2334                         *level = i + 1;
2335                 }
2336         }
2337         return 1;
2338 }
2339
2340 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2341                            int *level)
2342 {
2343         int i;
2344         struct extent_buffer *leaf;
2345
2346         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2347                 leaf = path->nodes[i];
2348                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2349                         path->slots[i]++;
2350                         *level = i;
2351                         return 0;
2352                 } else {
2353                         free_extent_buffer(path->nodes[*level]);
2354                         path->nodes[*level] = NULL;
2355                         *level = i + 1;
2356                 }
2357         }
2358         return 1;
2359 }
2360
2361 static int check_root_dir(struct inode_record *rec)
2362 {
2363         struct inode_backref *backref;
2364         int ret = -1;
2365
2366         if (!rec->found_inode_item || rec->errors)
2367                 goto out;
2368         if (rec->nlink != 1 || rec->found_link != 0)
2369                 goto out;
2370         if (list_empty(&rec->backrefs))
2371                 goto out;
2372         backref = to_inode_backref(rec->backrefs.next);
2373         if (!backref->found_inode_ref)
2374                 goto out;
2375         if (backref->index != 0 || backref->namelen != 2 ||
2376             memcmp(backref->name, "..", 2))
2377                 goto out;
2378         if (backref->found_dir_index || backref->found_dir_item)
2379                 goto out;
2380         ret = 0;
2381 out:
2382         return ret;
2383 }
2384
2385 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2386                               struct btrfs_root *root, struct btrfs_path *path,
2387                               struct inode_record *rec)
2388 {
2389         struct btrfs_inode_item *ei;
2390         struct btrfs_key key;
2391         int ret;
2392
2393         key.objectid = rec->ino;
2394         key.type = BTRFS_INODE_ITEM_KEY;
2395         key.offset = (u64)-1;
2396
2397         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2398         if (ret < 0)
2399                 goto out;
2400         if (ret) {
2401                 if (!path->slots[0]) {
2402                         ret = -ENOENT;
2403                         goto out;
2404                 }
2405                 path->slots[0]--;
2406                 ret = 0;
2407         }
2408         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2409         if (key.objectid != rec->ino) {
2410                 ret = -ENOENT;
2411                 goto out;
2412         }
2413
2414         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2415                             struct btrfs_inode_item);
2416         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2417         btrfs_mark_buffer_dirty(path->nodes[0]);
2418         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2419         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2420                root->root_key.objectid);
2421 out:
2422         btrfs_release_path(path);
2423         return ret;
2424 }
2425
2426 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2427                                     struct btrfs_root *root,
2428                                     struct btrfs_path *path,
2429                                     struct inode_record *rec)
2430 {
2431         int ret;
2432
2433         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2434         btrfs_release_path(path);
2435         if (!ret)
2436                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2437         return ret;
2438 }
2439
2440 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2441                                struct btrfs_root *root,
2442                                struct btrfs_path *path,
2443                                struct inode_record *rec)
2444 {
2445         struct btrfs_inode_item *ei;
2446         struct btrfs_key key;
2447         int ret = 0;
2448
2449         key.objectid = rec->ino;
2450         key.type = BTRFS_INODE_ITEM_KEY;
2451         key.offset = 0;
2452
2453         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2454         if (ret) {
2455                 if (ret > 0)
2456                         ret = -ENOENT;
2457                 goto out;
2458         }
2459
2460         /* Since ret == 0, no need to check anything */
2461         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2462                             struct btrfs_inode_item);
2463         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2464         btrfs_mark_buffer_dirty(path->nodes[0]);
2465         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2466         printf("reset nbytes for ino %llu root %llu\n",
2467                rec->ino, root->root_key.objectid);
2468 out:
2469         btrfs_release_path(path);
2470         return ret;
2471 }
2472
2473 static int add_missing_dir_index(struct btrfs_root *root,
2474                                  struct cache_tree *inode_cache,
2475                                  struct inode_record *rec,
2476                                  struct inode_backref *backref)
2477 {
2478         struct btrfs_path path;
2479         struct btrfs_trans_handle *trans;
2480         struct btrfs_dir_item *dir_item;
2481         struct extent_buffer *leaf;
2482         struct btrfs_key key;
2483         struct btrfs_disk_key disk_key;
2484         struct inode_record *dir_rec;
2485         unsigned long name_ptr;
2486         u32 data_size = sizeof(*dir_item) + backref->namelen;
2487         int ret;
2488
2489         trans = btrfs_start_transaction(root, 1);
2490         if (IS_ERR(trans))
2491                 return PTR_ERR(trans);
2492
2493         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2494                 (unsigned long long)rec->ino);
2495
2496         btrfs_init_path(&path);
2497         key.objectid = backref->dir;
2498         key.type = BTRFS_DIR_INDEX_KEY;
2499         key.offset = backref->index;
2500         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2501         BUG_ON(ret);
2502
2503         leaf = path.nodes[0];
2504         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2505
2506         disk_key.objectid = cpu_to_le64(rec->ino);
2507         disk_key.type = BTRFS_INODE_ITEM_KEY;
2508         disk_key.offset = 0;
2509
2510         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2511         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2512         btrfs_set_dir_data_len(leaf, dir_item, 0);
2513         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2514         name_ptr = (unsigned long)(dir_item + 1);
2515         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2516         btrfs_mark_buffer_dirty(leaf);
2517         btrfs_release_path(&path);
2518         btrfs_commit_transaction(trans, root);
2519
2520         backref->found_dir_index = 1;
2521         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2522         BUG_ON(IS_ERR(dir_rec));
2523         if (!dir_rec)
2524                 return 0;
2525         dir_rec->found_size += backref->namelen;
2526         if (dir_rec->found_size == dir_rec->isize &&
2527             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2528                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2529         if (dir_rec->found_size != dir_rec->isize)
2530                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2531
2532         return 0;
2533 }
2534
2535 static int delete_dir_index(struct btrfs_root *root,
2536                             struct inode_backref *backref)
2537 {
2538         struct btrfs_trans_handle *trans;
2539         struct btrfs_dir_item *di;
2540         struct btrfs_path path;
2541         int ret = 0;
2542
2543         trans = btrfs_start_transaction(root, 1);
2544         if (IS_ERR(trans))
2545                 return PTR_ERR(trans);
2546
2547         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2548                 (unsigned long long)backref->dir,
2549                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2550                 (unsigned long long)root->objectid);
2551
2552         btrfs_init_path(&path);
2553         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2554                                     backref->name, backref->namelen,
2555                                     backref->index, -1);
2556         if (IS_ERR(di)) {
2557                 ret = PTR_ERR(di);
2558                 btrfs_release_path(&path);
2559                 btrfs_commit_transaction(trans, root);
2560                 if (ret == -ENOENT)
2561                         return 0;
2562                 return ret;
2563         }
2564
2565         if (!di)
2566                 ret = btrfs_del_item(trans, root, &path);
2567         else
2568                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2569         BUG_ON(ret);
2570         btrfs_release_path(&path);
2571         btrfs_commit_transaction(trans, root);
2572         return ret;
2573 }
2574
2575 static int create_inode_item(struct btrfs_root *root,
2576                              struct inode_record *rec,
2577                              int root_dir)
2578 {
2579         struct btrfs_trans_handle *trans;
2580         struct btrfs_inode_item inode_item;
2581         time_t now = time(NULL);
2582         int ret;
2583
2584         trans = btrfs_start_transaction(root, 1);
2585         if (IS_ERR(trans)) {
2586                 ret = PTR_ERR(trans);
2587                 return ret;
2588         }
2589
2590         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2591                 "be incomplete, please check permissions and content after "
2592                 "the fsck completes.\n", (unsigned long long)root->objectid,
2593                 (unsigned long long)rec->ino);
2594
2595         memset(&inode_item, 0, sizeof(inode_item));
2596         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2597         if (root_dir)
2598                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2599         else
2600                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2601         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2602         if (rec->found_dir_item) {
2603                 if (rec->found_file_extent)
2604                         fprintf(stderr, "root %llu inode %llu has both a dir "
2605                                 "item and extents, unsure if it is a dir or a "
2606                                 "regular file so setting it as a directory\n",
2607                                 (unsigned long long)root->objectid,
2608                                 (unsigned long long)rec->ino);
2609                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2610                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2611         } else if (!rec->found_dir_item) {
2612                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2613                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2614         }
2615         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2616         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2617         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2618         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2619         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2620         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2621         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2622         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2623
2624         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2625         BUG_ON(ret);
2626         btrfs_commit_transaction(trans, root);
2627         return 0;
2628 }
2629
2630 static int repair_inode_backrefs(struct btrfs_root *root,
2631                                  struct inode_record *rec,
2632                                  struct cache_tree *inode_cache,
2633                                  int delete)
2634 {
2635         struct inode_backref *tmp, *backref;
2636         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2637         int ret = 0;
2638         int repaired = 0;
2639
2640         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2641                 if (!delete && rec->ino == root_dirid) {
2642                         if (!rec->found_inode_item) {
2643                                 ret = create_inode_item(root, rec, 1);
2644                                 if (ret)
2645                                         break;
2646                                 repaired++;
2647                         }
2648                 }
2649
2650                 /* Index 0 for root dir's are special, don't mess with it */
2651                 if (rec->ino == root_dirid && backref->index == 0)
2652                         continue;
2653
2654                 if (delete &&
2655                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2656                      (backref->found_dir_index && backref->found_inode_ref &&
2657                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2658                         ret = delete_dir_index(root, backref);
2659                         if (ret)
2660                                 break;
2661                         repaired++;
2662                         list_del(&backref->list);
2663                         free(backref);
2664                 }
2665
2666                 if (!delete && !backref->found_dir_index &&
2667                     backref->found_dir_item && backref->found_inode_ref) {
2668                         ret = add_missing_dir_index(root, inode_cache, rec,
2669                                                     backref);
2670                         if (ret)
2671                                 break;
2672                         repaired++;
2673                         if (backref->found_dir_item &&
2674                             backref->found_dir_index &&
2675                             backref->found_dir_index) {
2676                                 if (!backref->errors &&
2677                                     backref->found_inode_ref) {
2678                                         list_del(&backref->list);
2679                                         free(backref);
2680                                 }
2681                         }
2682                 }
2683
2684                 if (!delete && (!backref->found_dir_index &&
2685                                 !backref->found_dir_item &&
2686                                 backref->found_inode_ref)) {
2687                         struct btrfs_trans_handle *trans;
2688                         struct btrfs_key location;
2689
2690                         ret = check_dir_conflict(root, backref->name,
2691                                                  backref->namelen,
2692                                                  backref->dir,
2693                                                  backref->index);
2694                         if (ret) {
2695                                 /*
2696                                  * let nlink fixing routine to handle it,
2697                                  * which can do it better.
2698                                  */
2699                                 ret = 0;
2700                                 break;
2701                         }
2702                         location.objectid = rec->ino;
2703                         location.type = BTRFS_INODE_ITEM_KEY;
2704                         location.offset = 0;
2705
2706                         trans = btrfs_start_transaction(root, 1);
2707                         if (IS_ERR(trans)) {
2708                                 ret = PTR_ERR(trans);
2709                                 break;
2710                         }
2711                         fprintf(stderr, "adding missing dir index/item pair "
2712                                 "for inode %llu\n",
2713                                 (unsigned long long)rec->ino);
2714                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2715                                                     backref->namelen,
2716                                                     backref->dir, &location,
2717                                                     imode_to_type(rec->imode),
2718                                                     backref->index);
2719                         BUG_ON(ret);
2720                         btrfs_commit_transaction(trans, root);
2721                         repaired++;
2722                 }
2723
2724                 if (!delete && (backref->found_inode_ref &&
2725                                 backref->found_dir_index &&
2726                                 backref->found_dir_item &&
2727                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2728                                 !rec->found_inode_item)) {
2729                         ret = create_inode_item(root, rec, 0);
2730                         if (ret)
2731                                 break;
2732                         repaired++;
2733                 }
2734
2735         }
2736         return ret ? ret : repaired;
2737 }
2738
2739 /*
2740  * To determine the file type for nlink/inode_item repair
2741  *
2742  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2743  * Return -ENOENT if file type is not found.
2744  */
2745 static int find_file_type(struct inode_record *rec, u8 *type)
2746 {
2747         struct inode_backref *backref;
2748
2749         /* For inode item recovered case */
2750         if (rec->found_inode_item) {
2751                 *type = imode_to_type(rec->imode);
2752                 return 0;
2753         }
2754
2755         list_for_each_entry(backref, &rec->backrefs, list) {
2756                 if (backref->found_dir_index || backref->found_dir_item) {
2757                         *type = backref->filetype;
2758                         return 0;
2759                 }
2760         }
2761         return -ENOENT;
2762 }
2763
2764 /*
2765  * To determine the file name for nlink repair
2766  *
2767  * Return 0 if file name is found, set name and namelen.
2768  * Return -ENOENT if file name is not found.
2769  */
2770 static int find_file_name(struct inode_record *rec,
2771                           char *name, int *namelen)
2772 {
2773         struct inode_backref *backref;
2774
2775         list_for_each_entry(backref, &rec->backrefs, list) {
2776                 if (backref->found_dir_index || backref->found_dir_item ||
2777                     backref->found_inode_ref) {
2778                         memcpy(name, backref->name, backref->namelen);
2779                         *namelen = backref->namelen;
2780                         return 0;
2781                 }
2782         }
2783         return -ENOENT;
2784 }
2785
2786 /* Reset the nlink of the inode to the correct one */
2787 static int reset_nlink(struct btrfs_trans_handle *trans,
2788                        struct btrfs_root *root,
2789                        struct btrfs_path *path,
2790                        struct inode_record *rec)
2791 {
2792         struct inode_backref *backref;
2793         struct inode_backref *tmp;
2794         struct btrfs_key key;
2795         struct btrfs_inode_item *inode_item;
2796         int ret = 0;
2797
2798         /* We don't believe this either, reset it and iterate backref */
2799         rec->found_link = 0;
2800
2801         /* Remove all backref including the valid ones */
2802         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2803                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2804                                    backref->index, backref->name,
2805                                    backref->namelen, 0);
2806                 if (ret < 0)
2807                         goto out;
2808
2809                 /* remove invalid backref, so it won't be added back */
2810                 if (!(backref->found_dir_index &&
2811                       backref->found_dir_item &&
2812                       backref->found_inode_ref)) {
2813                         list_del(&backref->list);
2814                         free(backref);
2815                 } else {
2816                         rec->found_link++;
2817                 }
2818         }
2819
2820         /* Set nlink to 0 */
2821         key.objectid = rec->ino;
2822         key.type = BTRFS_INODE_ITEM_KEY;
2823         key.offset = 0;
2824         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2825         if (ret < 0)
2826                 goto out;
2827         if (ret > 0) {
2828                 ret = -ENOENT;
2829                 goto out;
2830         }
2831         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2832                                     struct btrfs_inode_item);
2833         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2834         btrfs_mark_buffer_dirty(path->nodes[0]);
2835         btrfs_release_path(path);
2836
2837         /*
2838          * Add back valid inode_ref/dir_item/dir_index,
2839          * add_link() will handle the nlink inc, so new nlink must be correct
2840          */
2841         list_for_each_entry(backref, &rec->backrefs, list) {
2842                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2843                                      backref->name, backref->namelen,
2844                                      backref->filetype, &backref->index, 1);
2845                 if (ret < 0)
2846                         goto out;
2847         }
2848 out:
2849         btrfs_release_path(path);
2850         return ret;
2851 }
2852
2853 static int get_highest_inode(struct btrfs_trans_handle *trans,
2854                                 struct btrfs_root *root,
2855                                 struct btrfs_path *path,
2856                                 u64 *highest_ino)
2857 {
2858         struct btrfs_key key, found_key;
2859         int ret;
2860
2861         btrfs_init_path(path);
2862         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2863         key.offset = -1;
2864         key.type = BTRFS_INODE_ITEM_KEY;
2865         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2866         if (ret == 1) {
2867                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2868                                 path->slots[0] - 1);
2869                 *highest_ino = found_key.objectid;
2870                 ret = 0;
2871         }
2872         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2873                 ret = -EOVERFLOW;
2874         btrfs_release_path(path);
2875         return ret;
2876 }
2877
2878 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2879                                struct btrfs_root *root,
2880                                struct btrfs_path *path,
2881                                struct inode_record *rec)
2882 {
2883         char *dir_name = "lost+found";
2884         char namebuf[BTRFS_NAME_LEN] = {0};
2885         u64 lost_found_ino;
2886         u32 mode = 0700;
2887         u8 type = 0;
2888         int namelen = 0;
2889         int name_recovered = 0;
2890         int type_recovered = 0;
2891         int ret = 0;
2892
2893         /*
2894          * Get file name and type first before these invalid inode ref
2895          * are deleted by remove_all_invalid_backref()
2896          */
2897         name_recovered = !find_file_name(rec, namebuf, &namelen);
2898         type_recovered = !find_file_type(rec, &type);
2899
2900         if (!name_recovered) {
2901                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2902                        rec->ino, rec->ino);
2903                 namelen = count_digits(rec->ino);
2904                 sprintf(namebuf, "%llu", rec->ino);
2905                 name_recovered = 1;
2906         }
2907         if (!type_recovered) {
2908                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2909                        rec->ino);
2910                 type = BTRFS_FT_REG_FILE;
2911                 type_recovered = 1;
2912         }
2913
2914         ret = reset_nlink(trans, root, path, rec);
2915         if (ret < 0) {
2916                 fprintf(stderr,
2917                         "Failed to reset nlink for inode %llu: %s\n",
2918                         rec->ino, strerror(-ret));
2919                 goto out;
2920         }
2921
2922         if (rec->found_link == 0) {
2923                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2924                 if (ret < 0)
2925                         goto out;
2926                 lost_found_ino++;
2927                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2928                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2929                                   mode);
2930                 if (ret < 0) {
2931                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2932                                 dir_name, strerror(-ret));
2933                         goto out;
2934                 }
2935                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2936                                      namebuf, namelen, type, NULL, 1);
2937                 /*
2938                  * Add ".INO" suffix several times to handle case where
2939                  * "FILENAME.INO" is already taken by another file.
2940                  */
2941                 while (ret == -EEXIST) {
2942                         /*
2943                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2944                          */
2945                         if (namelen + count_digits(rec->ino) + 1 >
2946                             BTRFS_NAME_LEN) {
2947                                 ret = -EFBIG;
2948                                 goto out;
2949                         }
2950                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2951                                  ".%llu", rec->ino);
2952                         namelen += count_digits(rec->ino) + 1;
2953                         ret = btrfs_add_link(trans, root, rec->ino,
2954                                              lost_found_ino, namebuf,
2955                                              namelen, type, NULL, 1);
2956                 }
2957                 if (ret < 0) {
2958                         fprintf(stderr,
2959                                 "Failed to link the inode %llu to %s dir: %s\n",
2960                                 rec->ino, dir_name, strerror(-ret));
2961                         goto out;
2962                 }
2963                 /*
2964                  * Just increase the found_link, don't actually add the
2965                  * backref. This will make things easier and this inode
2966                  * record will be freed after the repair is done.
2967                  * So fsck will not report problem about this inode.
2968                  */
2969                 rec->found_link++;
2970                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2971                        namelen, namebuf, dir_name);
2972         }
2973         printf("Fixed the nlink of inode %llu\n", rec->ino);
2974 out:
2975         /*
2976          * Clear the flag anyway, or we will loop forever for the same inode
2977          * as it will not be removed from the bad inode list and the dead loop
2978          * happens.
2979          */
2980         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2981         btrfs_release_path(path);
2982         return ret;
2983 }
2984
2985 /*
2986  * Check if there is any normal(reg or prealloc) file extent for given
2987  * ino.
2988  * This is used to determine the file type when neither its dir_index/item or
2989  * inode_item exists.
2990  *
2991  * This will *NOT* report error, if any error happens, just consider it does
2992  * not have any normal file extent.
2993  */
2994 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2995 {
2996         struct btrfs_path path;
2997         struct btrfs_key key;
2998         struct btrfs_key found_key;
2999         struct btrfs_file_extent_item *fi;
3000         u8 type;
3001         int ret = 0;
3002
3003         btrfs_init_path(&path);
3004         key.objectid = ino;
3005         key.type = BTRFS_EXTENT_DATA_KEY;
3006         key.offset = 0;
3007
3008         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3009         if (ret < 0) {
3010                 ret = 0;
3011                 goto out;
3012         }
3013         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3014                 ret = btrfs_next_leaf(root, &path);
3015                 if (ret) {
3016                         ret = 0;
3017                         goto out;
3018                 }
3019         }
3020         while (1) {
3021                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3022                                       path.slots[0]);
3023                 if (found_key.objectid != ino ||
3024                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3025                         break;
3026                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3027                                     struct btrfs_file_extent_item);
3028                 type = btrfs_file_extent_type(path.nodes[0], fi);
3029                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3030                         ret = 1;
3031                         goto out;
3032                 }
3033         }
3034 out:
3035         btrfs_release_path(&path);
3036         return ret;
3037 }
3038
3039 static u32 btrfs_type_to_imode(u8 type)
3040 {
3041         static u32 imode_by_btrfs_type[] = {
3042                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3043                 [BTRFS_FT_DIR]          = S_IFDIR,
3044                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3045                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3046                 [BTRFS_FT_FIFO]         = S_IFIFO,
3047                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3048                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3049         };
3050
3051         return imode_by_btrfs_type[(type)];
3052 }
3053
3054 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3055                                 struct btrfs_root *root,
3056                                 struct btrfs_path *path,
3057                                 struct inode_record *rec)
3058 {
3059         u8 filetype;
3060         u32 mode = 0700;
3061         int type_recovered = 0;
3062         int ret = 0;
3063
3064         printf("Trying to rebuild inode:%llu\n", rec->ino);
3065
3066         type_recovered = !find_file_type(rec, &filetype);
3067
3068         /*
3069          * Try to determine inode type if type not found.
3070          *
3071          * For found regular file extent, it must be FILE.
3072          * For found dir_item/index, it must be DIR.
3073          *
3074          * For undetermined one, use FILE as fallback.
3075          *
3076          * TODO:
3077          * 1. If found backref(inode_index/item is already handled) to it,
3078          *    it must be DIR.
3079          *    Need new inode-inode ref structure to allow search for that.
3080          */
3081         if (!type_recovered) {
3082                 if (rec->found_file_extent &&
3083                     find_normal_file_extent(root, rec->ino)) {
3084                         type_recovered = 1;
3085                         filetype = BTRFS_FT_REG_FILE;
3086                 } else if (rec->found_dir_item) {
3087                         type_recovered = 1;
3088                         filetype = BTRFS_FT_DIR;
3089                 } else if (!list_empty(&rec->orphan_extents)) {
3090                         type_recovered = 1;
3091                         filetype = BTRFS_FT_REG_FILE;
3092                 } else{
3093                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3094                                rec->ino);
3095                         type_recovered = 1;
3096                         filetype = BTRFS_FT_REG_FILE;
3097                 }
3098         }
3099
3100         ret = btrfs_new_inode(trans, root, rec->ino,
3101                               mode | btrfs_type_to_imode(filetype));
3102         if (ret < 0)
3103                 goto out;
3104
3105         /*
3106          * Here inode rebuild is done, we only rebuild the inode item,
3107          * don't repair the nlink(like move to lost+found).
3108          * That is the job of nlink repair.
3109          *
3110          * We just fill the record and return
3111          */
3112         rec->found_dir_item = 1;
3113         rec->imode = mode | btrfs_type_to_imode(filetype);
3114         rec->nlink = 0;
3115         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3116         /* Ensure the inode_nlinks repair function will be called */
3117         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3118 out:
3119         return ret;
3120 }
3121
3122 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3123                                       struct btrfs_root *root,
3124                                       struct btrfs_path *path,
3125                                       struct inode_record *rec)
3126 {
3127         struct orphan_data_extent *orphan;
3128         struct orphan_data_extent *tmp;
3129         int ret = 0;
3130
3131         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3132                 /*
3133                  * Check for conflicting file extents
3134                  *
3135                  * Here we don't know whether the extents is compressed or not,
3136                  * so we can only assume it not compressed nor data offset,
3137                  * and use its disk_len as extent length.
3138                  */
3139                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3140                                        orphan->offset, orphan->disk_len, 0);
3141                 btrfs_release_path(path);
3142                 if (ret < 0)
3143                         goto out;
3144                 if (!ret) {
3145                         fprintf(stderr,
3146                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3147                                 orphan->disk_bytenr, orphan->disk_len);
3148                         ret = btrfs_free_extent(trans,
3149                                         root->fs_info->extent_root,
3150                                         orphan->disk_bytenr, orphan->disk_len,
3151                                         0, root->objectid, orphan->objectid,
3152                                         orphan->offset);
3153                         if (ret < 0)
3154                                 goto out;
3155                 }
3156                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3157                                 orphan->offset, orphan->disk_bytenr,
3158                                 orphan->disk_len, orphan->disk_len);
3159                 if (ret < 0)
3160                         goto out;
3161
3162                 /* Update file size info */
3163                 rec->found_size += orphan->disk_len;
3164                 if (rec->found_size == rec->nbytes)
3165                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3166
3167                 /* Update the file extent hole info too */
3168                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3169                                            orphan->disk_len);
3170                 if (ret < 0)
3171                         goto out;
3172                 if (RB_EMPTY_ROOT(&rec->holes))
3173                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3174
3175                 list_del(&orphan->list);
3176                 free(orphan);
3177         }
3178         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3179 out:
3180         return ret;
3181 }
3182
3183 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3184                                         struct btrfs_root *root,
3185                                         struct btrfs_path *path,
3186                                         struct inode_record *rec)
3187 {
3188         struct rb_node *node;
3189         struct file_extent_hole *hole;
3190         int found = 0;
3191         int ret = 0;
3192
3193         node = rb_first(&rec->holes);
3194
3195         while (node) {
3196                 found = 1;
3197                 hole = rb_entry(node, struct file_extent_hole, node);
3198                 ret = btrfs_punch_hole(trans, root, rec->ino,
3199                                        hole->start, hole->len);
3200                 if (ret < 0)
3201                         goto out;
3202                 ret = del_file_extent_hole(&rec->holes, hole->start,
3203                                            hole->len);
3204                 if (ret < 0)
3205                         goto out;
3206                 if (RB_EMPTY_ROOT(&rec->holes))
3207                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3208                 node = rb_first(&rec->holes);
3209         }
3210         /* special case for a file losing all its file extent */
3211         if (!found) {
3212                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3213                                        round_up(rec->isize, root->sectorsize));
3214                 if (ret < 0)
3215                         goto out;
3216         }
3217         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3218                rec->ino, root->objectid);
3219 out:
3220         return ret;
3221 }
3222
3223 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3224 {
3225         struct btrfs_trans_handle *trans;
3226         struct btrfs_path path;
3227         int ret = 0;
3228
3229         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3230                              I_ERR_NO_ORPHAN_ITEM |
3231                              I_ERR_LINK_COUNT_WRONG |
3232                              I_ERR_NO_INODE_ITEM |
3233                              I_ERR_FILE_EXTENT_ORPHAN |
3234                              I_ERR_FILE_EXTENT_DISCOUNT|
3235                              I_ERR_FILE_NBYTES_WRONG)))
3236                 return rec->errors;
3237
3238         /*
3239          * For nlink repair, it may create a dir and add link, so
3240          * 2 for parent(256)'s dir_index and dir_item
3241          * 2 for lost+found dir's inode_item and inode_ref
3242          * 1 for the new inode_ref of the file
3243          * 2 for lost+found dir's dir_index and dir_item for the file
3244          */
3245         trans = btrfs_start_transaction(root, 7);
3246         if (IS_ERR(trans))
3247                 return PTR_ERR(trans);
3248
3249         btrfs_init_path(&path);
3250         if (rec->errors & I_ERR_NO_INODE_ITEM)
3251                 ret = repair_inode_no_item(trans, root, &path, rec);
3252         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3253                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3254         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3255                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3256         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3257                 ret = repair_inode_isize(trans, root, &path, rec);
3258         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3259                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3260         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3261                 ret = repair_inode_nlinks(trans, root, &path, rec);
3262         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3263                 ret = repair_inode_nbytes(trans, root, &path, rec);
3264         btrfs_commit_transaction(trans, root);
3265         btrfs_release_path(&path);
3266         return ret;
3267 }
3268
3269 static int check_inode_recs(struct btrfs_root *root,
3270                             struct cache_tree *inode_cache)
3271 {
3272         struct cache_extent *cache;
3273         struct ptr_node *node;
3274         struct inode_record *rec;
3275         struct inode_backref *backref;
3276         int stage = 0;
3277         int ret = 0;
3278         int err = 0;
3279         u64 error = 0;
3280         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3281
3282         if (btrfs_root_refs(&root->root_item) == 0) {
3283                 if (!cache_tree_empty(inode_cache))
3284                         fprintf(stderr, "warning line %d\n", __LINE__);
3285                 return 0;
3286         }
3287
3288         /*
3289          * We need to repair backrefs first because we could change some of the
3290          * errors in the inode recs.
3291          *
3292          * We also need to go through and delete invalid backrefs first and then
3293          * add the correct ones second.  We do this because we may get EEXIST
3294          * when adding back the correct index because we hadn't yet deleted the
3295          * invalid index.
3296          *
3297          * For example, if we were missing a dir index then the directories
3298          * isize would be wrong, so if we fixed the isize to what we thought it
3299          * would be and then fixed the backref we'd still have a invalid fs, so
3300          * we need to add back the dir index and then check to see if the isize
3301          * is still wrong.
3302          */
3303         while (stage < 3) {
3304                 stage++;
3305                 if (stage == 3 && !err)
3306                         break;
3307
3308                 cache = search_cache_extent(inode_cache, 0);
3309                 while (repair && cache) {
3310                         node = container_of(cache, struct ptr_node, cache);
3311                         rec = node->data;
3312                         cache = next_cache_extent(cache);
3313
3314                         /* Need to free everything up and rescan */
3315                         if (stage == 3) {
3316                                 remove_cache_extent(inode_cache, &node->cache);
3317                                 free(node);
3318                                 free_inode_rec(rec);
3319                                 continue;
3320                         }
3321
3322                         if (list_empty(&rec->backrefs))
3323                                 continue;
3324
3325                         ret = repair_inode_backrefs(root, rec, inode_cache,
3326                                                     stage == 1);
3327                         if (ret < 0) {
3328                                 err = ret;
3329                                 stage = 2;
3330                                 break;
3331                         } if (ret > 0) {
3332                                 err = -EAGAIN;
3333                         }
3334                 }
3335         }
3336         if (err)
3337                 return err;
3338
3339         rec = get_inode_rec(inode_cache, root_dirid, 0);
3340         BUG_ON(IS_ERR(rec));
3341         if (rec) {
3342                 ret = check_root_dir(rec);
3343                 if (ret) {
3344                         fprintf(stderr, "root %llu root dir %llu error\n",
3345                                 (unsigned long long)root->root_key.objectid,
3346                                 (unsigned long long)root_dirid);
3347                         print_inode_error(root, rec);
3348                         error++;
3349                 }
3350         } else {
3351                 if (repair) {
3352                         struct btrfs_trans_handle *trans;
3353
3354                         trans = btrfs_start_transaction(root, 1);
3355                         if (IS_ERR(trans)) {
3356                                 err = PTR_ERR(trans);
3357                                 return err;
3358                         }
3359
3360                         fprintf(stderr,
3361                                 "root %llu missing its root dir, recreating\n",
3362                                 (unsigned long long)root->objectid);
3363
3364                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3365                         BUG_ON(ret);
3366
3367                         btrfs_commit_transaction(trans, root);
3368                         return -EAGAIN;
3369                 }
3370
3371                 fprintf(stderr, "root %llu root dir %llu not found\n",
3372                         (unsigned long long)root->root_key.objectid,
3373                         (unsigned long long)root_dirid);
3374         }
3375
3376         while (1) {
3377                 cache = search_cache_extent(inode_cache, 0);
3378                 if (!cache)
3379                         break;
3380                 node = container_of(cache, struct ptr_node, cache);
3381                 rec = node->data;
3382                 remove_cache_extent(inode_cache, &node->cache);
3383                 free(node);
3384                 if (rec->ino == root_dirid ||
3385                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3386                         free_inode_rec(rec);
3387                         continue;
3388                 }
3389
3390                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3391                         ret = check_orphan_item(root, rec->ino);
3392                         if (ret == 0)
3393                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3394                         if (can_free_inode_rec(rec)) {
3395                                 free_inode_rec(rec);
3396                                 continue;
3397                         }
3398                 }
3399
3400                 if (!rec->found_inode_item)
3401                         rec->errors |= I_ERR_NO_INODE_ITEM;
3402                 if (rec->found_link != rec->nlink)
3403                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3404                 if (repair) {
3405                         ret = try_repair_inode(root, rec);
3406                         if (ret == 0 && can_free_inode_rec(rec)) {
3407                                 free_inode_rec(rec);
3408                                 continue;
3409                         }
3410                         ret = 0;
3411                 }
3412
3413                 if (!(repair && ret == 0))
3414                         error++;
3415                 print_inode_error(root, rec);
3416                 list_for_each_entry(backref, &rec->backrefs, list) {
3417                         if (!backref->found_dir_item)
3418                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3419                         if (!backref->found_dir_index)
3420                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3421                         if (!backref->found_inode_ref)
3422                                 backref->errors |= REF_ERR_NO_INODE_REF;
3423                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3424                                 " namelen %u name %s filetype %d errors %x",
3425                                 (unsigned long long)backref->dir,
3426                                 (unsigned long long)backref->index,
3427                                 backref->namelen, backref->name,
3428                                 backref->filetype, backref->errors);
3429                         print_ref_error(backref->errors);
3430                 }
3431                 free_inode_rec(rec);
3432         }
3433         return (error > 0) ? -1 : 0;
3434 }
3435
3436 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3437                                         u64 objectid)
3438 {
3439         struct cache_extent *cache;
3440         struct root_record *rec = NULL;
3441         int ret;
3442
3443         cache = lookup_cache_extent(root_cache, objectid, 1);
3444         if (cache) {
3445                 rec = container_of(cache, struct root_record, cache);
3446         } else {
3447                 rec = calloc(1, sizeof(*rec));
3448                 if (!rec)
3449                         return ERR_PTR(-ENOMEM);
3450                 rec->objectid = objectid;
3451                 INIT_LIST_HEAD(&rec->backrefs);
3452                 rec->cache.start = objectid;
3453                 rec->cache.size = 1;
3454
3455                 ret = insert_cache_extent(root_cache, &rec->cache);
3456                 if (ret)
3457                         return ERR_PTR(-EEXIST);
3458         }
3459         return rec;
3460 }
3461
3462 static struct root_backref *get_root_backref(struct root_record *rec,
3463                                              u64 ref_root, u64 dir, u64 index,
3464                                              const char *name, int namelen)
3465 {
3466         struct root_backref *backref;
3467
3468         list_for_each_entry(backref, &rec->backrefs, list) {
3469                 if (backref->ref_root != ref_root || backref->dir != dir ||
3470                     backref->namelen != namelen)
3471                         continue;
3472                 if (memcmp(name, backref->name, namelen))
3473                         continue;
3474                 return backref;
3475         }
3476
3477         backref = calloc(1, sizeof(*backref) + namelen + 1);
3478         if (!backref)
3479                 return NULL;
3480         backref->ref_root = ref_root;
3481         backref->dir = dir;
3482         backref->index = index;
3483         backref->namelen = namelen;
3484         memcpy(backref->name, name, namelen);
3485         backref->name[namelen] = '\0';
3486         list_add_tail(&backref->list, &rec->backrefs);
3487         return backref;
3488 }
3489
3490 static void free_root_record(struct cache_extent *cache)
3491 {
3492         struct root_record *rec;
3493         struct root_backref *backref;
3494
3495         rec = container_of(cache, struct root_record, cache);
3496         while (!list_empty(&rec->backrefs)) {
3497                 backref = to_root_backref(rec->backrefs.next);
3498                 list_del(&backref->list);
3499                 free(backref);
3500         }
3501
3502         free(rec);
3503 }
3504
3505 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3506
3507 static int add_root_backref(struct cache_tree *root_cache,
3508                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3509                             const char *name, int namelen,
3510                             int item_type, int errors)
3511 {
3512         struct root_record *rec;
3513         struct root_backref *backref;
3514
3515         rec = get_root_rec(root_cache, root_id);
3516         BUG_ON(IS_ERR(rec));
3517         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3518         BUG_ON(!backref);
3519
3520         backref->errors |= errors;
3521
3522         if (item_type != BTRFS_DIR_ITEM_KEY) {
3523                 if (backref->found_dir_index || backref->found_back_ref ||
3524                     backref->found_forward_ref) {
3525                         if (backref->index != index)
3526                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3527                 } else {
3528                         backref->index = index;
3529                 }
3530         }
3531
3532         if (item_type == BTRFS_DIR_ITEM_KEY) {
3533                 if (backref->found_forward_ref)
3534                         rec->found_ref++;
3535                 backref->found_dir_item = 1;
3536         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3537                 backref->found_dir_index = 1;
3538         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3539                 if (backref->found_forward_ref)
3540                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3541                 else if (backref->found_dir_item)
3542                         rec->found_ref++;
3543                 backref->found_forward_ref = 1;
3544         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3545                 if (backref->found_back_ref)
3546                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3547                 backref->found_back_ref = 1;
3548         } else {
3549                 BUG_ON(1);
3550         }
3551
3552         if (backref->found_forward_ref && backref->found_dir_item)
3553                 backref->reachable = 1;
3554         return 0;
3555 }
3556
3557 static int merge_root_recs(struct btrfs_root *root,
3558                            struct cache_tree *src_cache,
3559                            struct cache_tree *dst_cache)
3560 {
3561         struct cache_extent *cache;
3562         struct ptr_node *node;
3563         struct inode_record *rec;
3564         struct inode_backref *backref;
3565         int ret = 0;
3566
3567         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3568                 free_inode_recs_tree(src_cache);
3569                 return 0;
3570         }
3571
3572         while (1) {
3573                 cache = search_cache_extent(src_cache, 0);
3574                 if (!cache)
3575                         break;
3576                 node = container_of(cache, struct ptr_node, cache);
3577                 rec = node->data;
3578                 remove_cache_extent(src_cache, &node->cache);
3579                 free(node);
3580
3581                 ret = is_child_root(root, root->objectid, rec->ino);
3582                 if (ret < 0)
3583                         break;
3584                 else if (ret == 0)
3585                         goto skip;
3586
3587                 list_for_each_entry(backref, &rec->backrefs, list) {
3588                         BUG_ON(backref->found_inode_ref);
3589                         if (backref->found_dir_item)
3590                                 add_root_backref(dst_cache, rec->ino,
3591                                         root->root_key.objectid, backref->dir,
3592                                         backref->index, backref->name,
3593                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3594                                         backref->errors);
3595                         if (backref->found_dir_index)
3596                                 add_root_backref(dst_cache, rec->ino,
3597                                         root->root_key.objectid, backref->dir,
3598                                         backref->index, backref->name,
3599                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3600                                         backref->errors);
3601                 }
3602 skip:
3603                 free_inode_rec(rec);
3604         }
3605         if (ret < 0)
3606                 return ret;
3607         return 0;
3608 }
3609
3610 static int check_root_refs(struct btrfs_root *root,
3611                            struct cache_tree *root_cache)
3612 {
3613         struct root_record *rec;
3614         struct root_record *ref_root;
3615         struct root_backref *backref;
3616         struct cache_extent *cache;
3617         int loop = 1;
3618         int ret;
3619         int error;
3620         int errors = 0;
3621
3622         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3623         BUG_ON(IS_ERR(rec));
3624         rec->found_ref = 1;
3625
3626         /* fixme: this can not detect circular references */
3627         while (loop) {
3628                 loop = 0;
3629                 cache = search_cache_extent(root_cache, 0);
3630                 while (1) {
3631                         if (!cache)
3632                                 break;
3633                         rec = container_of(cache, struct root_record, cache);
3634                         cache = next_cache_extent(cache);
3635
3636                         if (rec->found_ref == 0)
3637                                 continue;
3638
3639                         list_for_each_entry(backref, &rec->backrefs, list) {
3640                                 if (!backref->reachable)
3641                                         continue;
3642
3643                                 ref_root = get_root_rec(root_cache,
3644                                                         backref->ref_root);
3645                                 BUG_ON(IS_ERR(ref_root));
3646                                 if (ref_root->found_ref > 0)
3647                                         continue;
3648
3649                                 backref->reachable = 0;
3650                                 rec->found_ref--;
3651                                 if (rec->found_ref == 0)
3652                                         loop = 1;
3653                         }
3654                 }
3655         }
3656
3657         cache = search_cache_extent(root_cache, 0);
3658         while (1) {
3659                 if (!cache)
3660                         break;
3661                 rec = container_of(cache, struct root_record, cache);
3662                 cache = next_cache_extent(cache);
3663
3664                 if (rec->found_ref == 0 &&
3665                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3666                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3667                         ret = check_orphan_item(root->fs_info->tree_root,
3668                                                 rec->objectid);
3669                         if (ret == 0)
3670                                 continue;
3671
3672                         /*
3673                          * If we don't have a root item then we likely just have
3674                          * a dir item in a snapshot for this root but no actual
3675                          * ref key or anything so it's meaningless.
3676                          */
3677                         if (!rec->found_root_item)
3678                                 continue;
3679                         errors++;
3680                         fprintf(stderr, "fs tree %llu not referenced\n",
3681                                 (unsigned long long)rec->objectid);
3682                 }
3683
3684                 error = 0;
3685                 if (rec->found_ref > 0 && !rec->found_root_item)
3686                         error = 1;
3687                 list_for_each_entry(backref, &rec->backrefs, list) {
3688                         if (!backref->found_dir_item)
3689                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3690                         if (!backref->found_dir_index)
3691                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3692                         if (!backref->found_back_ref)
3693                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3694                         if (!backref->found_forward_ref)
3695                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3696                         if (backref->reachable && backref->errors)
3697                                 error = 1;
3698                 }
3699                 if (!error)
3700                         continue;
3701
3702                 errors++;
3703                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3704                         (unsigned long long)rec->objectid, rec->found_ref,
3705                          rec->found_root_item ? "" : "not found");
3706
3707                 list_for_each_entry(backref, &rec->backrefs, list) {
3708                         if (!backref->reachable)
3709                                 continue;
3710                         if (!backref->errors && rec->found_root_item)
3711                                 continue;
3712                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3713                                 " index %llu namelen %u name %s errors %x\n",
3714                                 (unsigned long long)backref->ref_root,
3715                                 (unsigned long long)backref->dir,
3716                                 (unsigned long long)backref->index,
3717                                 backref->namelen, backref->name,
3718                                 backref->errors);
3719                         print_ref_error(backref->errors);
3720                 }
3721         }
3722         return errors > 0 ? 1 : 0;
3723 }
3724
3725 static int process_root_ref(struct extent_buffer *eb, int slot,
3726                             struct btrfs_key *key,
3727                             struct cache_tree *root_cache)
3728 {
3729         u64 dirid;
3730         u64 index;
3731         u32 len;
3732         u32 name_len;
3733         struct btrfs_root_ref *ref;
3734         char namebuf[BTRFS_NAME_LEN];
3735         int error;
3736
3737         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3738
3739         dirid = btrfs_root_ref_dirid(eb, ref);
3740         index = btrfs_root_ref_sequence(eb, ref);
3741         name_len = btrfs_root_ref_name_len(eb, ref);
3742
3743         if (name_len <= BTRFS_NAME_LEN) {
3744                 len = name_len;
3745                 error = 0;
3746         } else {
3747                 len = BTRFS_NAME_LEN;
3748                 error = REF_ERR_NAME_TOO_LONG;
3749         }
3750         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3751
3752         if (key->type == BTRFS_ROOT_REF_KEY) {
3753                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3754                                  index, namebuf, len, key->type, error);
3755         } else {
3756                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3757                                  index, namebuf, len, key->type, error);
3758         }
3759         return 0;
3760 }
3761
3762 static void free_corrupt_block(struct cache_extent *cache)
3763 {
3764         struct btrfs_corrupt_block *corrupt;
3765
3766         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3767         free(corrupt);
3768 }
3769
3770 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3771
3772 /*
3773  * Repair the btree of the given root.
3774  *
3775  * The fix is to remove the node key in corrupt_blocks cache_tree.
3776  * and rebalance the tree.
3777  * After the fix, the btree should be writeable.
3778  */
3779 static int repair_btree(struct btrfs_root *root,
3780                         struct cache_tree *corrupt_blocks)
3781 {
3782         struct btrfs_trans_handle *trans;
3783         struct btrfs_path path;
3784         struct btrfs_corrupt_block *corrupt;
3785         struct cache_extent *cache;
3786         struct btrfs_key key;
3787         u64 offset;
3788         int level;
3789         int ret = 0;
3790
3791         if (cache_tree_empty(corrupt_blocks))
3792                 return 0;
3793
3794         trans = btrfs_start_transaction(root, 1);
3795         if (IS_ERR(trans)) {
3796                 ret = PTR_ERR(trans);
3797                 fprintf(stderr, "Error starting transaction: %s\n",
3798                         strerror(-ret));
3799                 return ret;
3800         }
3801         btrfs_init_path(&path);
3802         cache = first_cache_extent(corrupt_blocks);
3803         while (cache) {
3804                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3805                                        cache);
3806                 level = corrupt->level;
3807                 path.lowest_level = level;
3808                 key.objectid = corrupt->key.objectid;
3809                 key.type = corrupt->key.type;
3810                 key.offset = corrupt->key.offset;
3811
3812                 /*
3813                  * Here we don't want to do any tree balance, since it may
3814                  * cause a balance with corrupted brother leaf/node,
3815                  * so ins_len set to 0 here.
3816                  * Balance will be done after all corrupt node/leaf is deleted.
3817                  */
3818                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3819                 if (ret < 0)
3820                         goto out;
3821                 offset = btrfs_node_blockptr(path.nodes[level],
3822                                              path.slots[level]);
3823
3824                 /* Remove the ptr */
3825                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3826                 if (ret < 0)
3827                         goto out;
3828                 /*
3829                  * Remove the corresponding extent
3830                  * return value is not concerned.
3831                  */
3832                 btrfs_release_path(&path);
3833                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3834                                         0, root->root_key.objectid,
3835                                         level - 1, 0);
3836                 cache = next_cache_extent(cache);
3837         }
3838
3839         /* Balance the btree using btrfs_search_slot() */
3840         cache = first_cache_extent(corrupt_blocks);
3841         while (cache) {
3842                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3843                                        cache);
3844                 memcpy(&key, &corrupt->key, sizeof(key));
3845                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3846                 if (ret < 0)
3847                         goto out;
3848                 /* return will always >0 since it won't find the item */
3849                 ret = 0;
3850                 btrfs_release_path(&path);
3851                 cache = next_cache_extent(cache);
3852         }
3853 out:
3854         btrfs_commit_transaction(trans, root);
3855         btrfs_release_path(&path);
3856         return ret;
3857 }
3858
3859 static int check_fs_root(struct btrfs_root *root,
3860                          struct cache_tree *root_cache,
3861                          struct walk_control *wc)
3862 {
3863         int ret = 0;
3864         int err = 0;
3865         int wret;
3866         int level;
3867         struct btrfs_path path;
3868         struct shared_node root_node;
3869         struct root_record *rec;
3870         struct btrfs_root_item *root_item = &root->root_item;
3871         struct cache_tree corrupt_blocks;
3872         struct orphan_data_extent *orphan;
3873         struct orphan_data_extent *tmp;
3874         enum btrfs_tree_block_status status;
3875         struct node_refs nrefs;
3876
3877         /*
3878          * Reuse the corrupt_block cache tree to record corrupted tree block
3879          *
3880          * Unlike the usage in extent tree check, here we do it in a per
3881          * fs/subvol tree base.
3882          */
3883         cache_tree_init(&corrupt_blocks);
3884         root->fs_info->corrupt_blocks = &corrupt_blocks;
3885
3886         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3887                 rec = get_root_rec(root_cache, root->root_key.objectid);
3888                 BUG_ON(IS_ERR(rec));
3889                 if (btrfs_root_refs(root_item) > 0)
3890                         rec->found_root_item = 1;
3891         }
3892
3893         btrfs_init_path(&path);
3894         memset(&root_node, 0, sizeof(root_node));
3895         cache_tree_init(&root_node.root_cache);
3896         cache_tree_init(&root_node.inode_cache);
3897         memset(&nrefs, 0, sizeof(nrefs));
3898
3899         /* Move the orphan extent record to corresponding inode_record */
3900         list_for_each_entry_safe(orphan, tmp,
3901                                  &root->orphan_data_extents, list) {
3902                 struct inode_record *inode;
3903
3904                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3905                                       1);
3906                 BUG_ON(IS_ERR(inode));
3907                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3908                 list_move(&orphan->list, &inode->orphan_extents);
3909         }
3910
3911         level = btrfs_header_level(root->node);
3912         memset(wc->nodes, 0, sizeof(wc->nodes));
3913         wc->nodes[level] = &root_node;
3914         wc->active_node = level;
3915         wc->root_level = level;
3916
3917         /* We may not have checked the root block, lets do that now */
3918         if (btrfs_is_leaf(root->node))
3919                 status = btrfs_check_leaf(root, NULL, root->node);
3920         else
3921                 status = btrfs_check_node(root, NULL, root->node);
3922         if (status != BTRFS_TREE_BLOCK_CLEAN)
3923                 return -EIO;
3924
3925         if (btrfs_root_refs(root_item) > 0 ||
3926             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3927                 path.nodes[level] = root->node;
3928                 extent_buffer_get(root->node);
3929                 path.slots[level] = 0;
3930         } else {
3931                 struct btrfs_key key;
3932                 struct btrfs_disk_key found_key;
3933
3934                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3935                 level = root_item->drop_level;
3936                 path.lowest_level = level;
3937                 if (level > btrfs_header_level(root->node) ||
3938                     level >= BTRFS_MAX_LEVEL) {
3939                         error("ignoring invalid drop level: %u", level);
3940                         goto skip_walking;
3941                 }
3942                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3943                 if (wret < 0)
3944                         goto skip_walking;
3945                 btrfs_node_key(path.nodes[level], &found_key,
3946                                 path.slots[level]);
3947                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3948                                         sizeof(found_key)));
3949         }
3950
3951         while (1) {
3952                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3953                 if (wret < 0)
3954                         ret = wret;
3955                 if (wret != 0)
3956                         break;
3957
3958                 wret = walk_up_tree(root, &path, wc, &level);
3959                 if (wret < 0)
3960                         ret = wret;
3961                 if (wret != 0)
3962                         break;
3963         }
3964 skip_walking:
3965         btrfs_release_path(&path);
3966
3967         if (!cache_tree_empty(&corrupt_blocks)) {
3968                 struct cache_extent *cache;
3969                 struct btrfs_corrupt_block *corrupt;
3970
3971                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3972                        root->root_key.objectid);
3973                 cache = first_cache_extent(&corrupt_blocks);
3974                 while (cache) {
3975                         corrupt = container_of(cache,
3976                                                struct btrfs_corrupt_block,
3977                                                cache);
3978                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3979                                cache->start, corrupt->level,
3980                                corrupt->key.objectid, corrupt->key.type,
3981                                corrupt->key.offset);
3982                         cache = next_cache_extent(cache);
3983                 }
3984                 if (repair) {
3985                         printf("Try to repair the btree for root %llu\n",
3986                                root->root_key.objectid);
3987                         ret = repair_btree(root, &corrupt_blocks);
3988                         if (ret < 0)
3989                                 fprintf(stderr, "Failed to repair btree: %s\n",
3990                                         strerror(-ret));
3991                         if (!ret)
3992                                 printf("Btree for root %llu is fixed\n",
3993                                        root->root_key.objectid);
3994                 }
3995         }
3996
3997         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3998         if (err < 0)
3999                 ret = err;
4000
4001         if (root_node.current) {
4002                 root_node.current->checked = 1;
4003                 maybe_free_inode_rec(&root_node.inode_cache,
4004                                 root_node.current);
4005         }
4006
4007         err = check_inode_recs(root, &root_node.inode_cache);
4008         if (!ret)
4009                 ret = err;
4010
4011         free_corrupt_blocks_tree(&corrupt_blocks);
4012         root->fs_info->corrupt_blocks = NULL;
4013         free_orphan_data_extents(&root->orphan_data_extents);
4014         return ret;
4015 }
4016
4017 static int fs_root_objectid(u64 objectid)
4018 {
4019         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4020             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4021                 return 1;
4022         return is_fstree(objectid);
4023 }
4024
4025 static int check_fs_roots(struct btrfs_root *root,
4026                           struct cache_tree *root_cache)
4027 {
4028         struct btrfs_path path;
4029         struct btrfs_key key;
4030         struct walk_control wc;
4031         struct extent_buffer *leaf, *tree_node;
4032         struct btrfs_root *tmp_root;
4033         struct btrfs_root *tree_root = root->fs_info->tree_root;
4034         int ret;
4035         int err = 0;
4036
4037         if (ctx.progress_enabled) {
4038                 ctx.tp = TASK_FS_ROOTS;
4039                 task_start(ctx.info);
4040         }
4041
4042         /*
4043          * Just in case we made any changes to the extent tree that weren't
4044          * reflected into the free space cache yet.
4045          */
4046         if (repair)
4047                 reset_cached_block_groups(root->fs_info);
4048         memset(&wc, 0, sizeof(wc));
4049         cache_tree_init(&wc.shared);
4050         btrfs_init_path(&path);
4051
4052 again:
4053         key.offset = 0;
4054         key.objectid = 0;
4055         key.type = BTRFS_ROOT_ITEM_KEY;
4056         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4057         if (ret < 0) {
4058                 err = 1;
4059                 goto out;
4060         }
4061         tree_node = tree_root->node;
4062         while (1) {
4063                 if (tree_node != tree_root->node) {
4064                         free_root_recs_tree(root_cache);
4065                         btrfs_release_path(&path);
4066                         goto again;
4067                 }
4068                 leaf = path.nodes[0];
4069                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4070                         ret = btrfs_next_leaf(tree_root, &path);
4071                         if (ret) {
4072                                 if (ret < 0)
4073                                         err = 1;
4074                                 break;
4075                         }
4076                         leaf = path.nodes[0];
4077                 }
4078                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4079                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4080                     fs_root_objectid(key.objectid)) {
4081                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4082                                 tmp_root = btrfs_read_fs_root_no_cache(
4083                                                 root->fs_info, &key);
4084                         } else {
4085                                 key.offset = (u64)-1;
4086                                 tmp_root = btrfs_read_fs_root(
4087                                                 root->fs_info, &key);
4088                         }
4089                         if (IS_ERR(tmp_root)) {
4090                                 err = 1;
4091                                 goto next;
4092                         }
4093                         ret = check_fs_root(tmp_root, root_cache, &wc);
4094                         if (ret == -EAGAIN) {
4095                                 free_root_recs_tree(root_cache);
4096                                 btrfs_release_path(&path);
4097                                 goto again;
4098                         }
4099                         if (ret)
4100                                 err = 1;
4101                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4102                                 btrfs_free_fs_root(tmp_root);
4103                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4104                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4105                         process_root_ref(leaf, path.slots[0], &key,
4106                                          root_cache);
4107                 }
4108 next:
4109                 path.slots[0]++;
4110         }
4111 out:
4112         btrfs_release_path(&path);
4113         if (err)
4114                 free_extent_cache_tree(&wc.shared);
4115         if (!cache_tree_empty(&wc.shared))
4116                 fprintf(stderr, "warning line %d\n", __LINE__);
4117
4118         task_stop(ctx.info);
4119
4120         return err;
4121 }
4122
4123 /*
4124  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4125  * INODE_REF/INODE_EXTREF match.
4126  *
4127  * @root:       the root of the fs/file tree
4128  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4129  * @key:        the key of the DIR_ITEM/DIR_INDEX
4130  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4131  *              distinguish root_dir between normal dir/file
4132  * @name:       the name in the INODE_REF/INODE_EXTREF
4133  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4134  * @mode:       the st_mode of INODE_ITEM
4135  *
4136  * Return 0 if no error occurred.
4137  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4138  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4139  * dir/file.
4140  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4141  * not match for normal dir/file.
4142  */
4143 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4144                          struct btrfs_key *key, u64 index, char *name,
4145                          u32 namelen, u32 mode)
4146 {
4147         struct btrfs_path path;
4148         struct extent_buffer *node;
4149         struct btrfs_dir_item *di;
4150         struct btrfs_key location;
4151         char namebuf[BTRFS_NAME_LEN] = {0};
4152         u32 total;
4153         u32 cur = 0;
4154         u32 len;
4155         u32 name_len;
4156         u32 data_len;
4157         u8 filetype;
4158         int slot;
4159         int ret;
4160
4161         btrfs_init_path(&path);
4162         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4163         if (ret < 0) {
4164                 ret = DIR_ITEM_MISSING;
4165                 goto out;
4166         }
4167
4168         /* Process root dir and goto out*/
4169         if (index == 0) {
4170                 if (ret == 0) {
4171                         ret = ROOT_DIR_ERROR;
4172                         error(
4173                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4174                                 root->objectid,
4175                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4176                                         "REF" : "EXTREF",
4177                                 ref_key->objectid, ref_key->offset,
4178                                 key->type == BTRFS_DIR_ITEM_KEY ?
4179                                         "DIR_ITEM" : "DIR_INDEX");
4180                 } else {
4181                         ret = 0;
4182                 }
4183
4184                 goto out;
4185         }
4186
4187         /* Process normal file/dir */
4188         if (ret > 0) {
4189                 ret = DIR_ITEM_MISSING;
4190                 error(
4191                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4192                         root->objectid,
4193                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4194                         ref_key->objectid, ref_key->offset,
4195                         key->type == BTRFS_DIR_ITEM_KEY ?
4196                                 "DIR_ITEM" : "DIR_INDEX",
4197                         key->objectid, key->offset, namelen, name,
4198                         imode_to_type(mode));
4199                 goto out;
4200         }
4201
4202         /* Check whether inode_id/filetype/name match */
4203         node = path.nodes[0];
4204         slot = path.slots[0];
4205         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4206         total = btrfs_item_size_nr(node, slot);
4207         while (cur < total) {
4208                 ret = DIR_ITEM_MISMATCH;
4209                 name_len = btrfs_dir_name_len(node, di);
4210                 data_len = btrfs_dir_data_len(node, di);
4211
4212                 btrfs_dir_item_key_to_cpu(node, di, &location);
4213                 if (location.objectid != ref_key->objectid ||
4214                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4215                     location.offset != 0)
4216                         goto next;
4217
4218                 filetype = btrfs_dir_type(node, di);
4219                 if (imode_to_type(mode) != filetype)
4220                         goto next;
4221
4222                 if (name_len <= BTRFS_NAME_LEN) {
4223                         len = name_len;
4224                 } else {
4225                         len = BTRFS_NAME_LEN;
4226                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4227                         root->objectid,
4228                         key->type == BTRFS_DIR_ITEM_KEY ?
4229                         "DIR_ITEM" : "DIR_INDEX",
4230                         key->objectid, key->offset, name_len);
4231                 }
4232                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4233                 if (len != namelen || strncmp(namebuf, name, len))
4234                         goto next;
4235
4236                 ret = 0;
4237                 goto out;
4238 next:
4239                 len = sizeof(*di) + name_len + data_len;
4240                 di = (struct btrfs_dir_item *)((char *)di + len);
4241                 cur += len;
4242         }
4243         if (ret == DIR_ITEM_MISMATCH)
4244                 error(
4245                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4246                         root->objectid,
4247                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4248                         ref_key->objectid, ref_key->offset,
4249                         key->type == BTRFS_DIR_ITEM_KEY ?
4250                                 "DIR_ITEM" : "DIR_INDEX",
4251                         key->objectid, key->offset, namelen, name,
4252                         imode_to_type(mode));
4253 out:
4254         btrfs_release_path(&path);
4255         return ret;
4256 }
4257
4258 /*
4259  * Traverse the given INODE_REF and call find_dir_item() to find related
4260  * DIR_ITEM/DIR_INDEX.
4261  *
4262  * @root:       the root of the fs/file tree
4263  * @ref_key:    the key of the INODE_REF
4264  * @refs:       the count of INODE_REF
4265  * @mode:       the st_mode of INODE_ITEM
4266  *
4267  * Return 0 if no error occurred.
4268  */
4269 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4270                            struct extent_buffer *node, int slot, u64 *refs,
4271                            int mode)
4272 {
4273         struct btrfs_key key;
4274         struct btrfs_inode_ref *ref;
4275         char namebuf[BTRFS_NAME_LEN] = {0};
4276         u32 total;
4277         u32 cur = 0;
4278         u32 len;
4279         u32 name_len;
4280         u64 index;
4281         int ret, err = 0;
4282
4283         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4284         total = btrfs_item_size_nr(node, slot);
4285
4286 next:
4287         /* Update inode ref count */
4288         (*refs)++;
4289
4290         index = btrfs_inode_ref_index(node, ref);
4291         name_len = btrfs_inode_ref_name_len(node, ref);
4292         if (name_len <= BTRFS_NAME_LEN) {
4293                 len = name_len;
4294         } else {
4295                 len = BTRFS_NAME_LEN;
4296                 warning("root %llu INODE_REF[%llu %llu] name too long",
4297                         root->objectid, ref_key->objectid, ref_key->offset);
4298         }
4299
4300         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4301
4302         /* Check root dir ref name */
4303         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4304                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4305                       root->objectid, ref_key->objectid, ref_key->offset,
4306                       namebuf);
4307                 err |= ROOT_DIR_ERROR;
4308         }
4309
4310         /* Find related DIR_INDEX */
4311         key.objectid = ref_key->offset;
4312         key.type = BTRFS_DIR_INDEX_KEY;
4313         key.offset = index;
4314         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4315         err |= ret;
4316
4317         /* Find related dir_item */
4318         key.objectid = ref_key->offset;
4319         key.type = BTRFS_DIR_ITEM_KEY;
4320         key.offset = btrfs_name_hash(namebuf, len);
4321         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4322         err |= ret;
4323
4324         len = sizeof(*ref) + name_len;
4325         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4326         cur += len;
4327         if (cur < total)
4328                 goto next;
4329
4330         return err;
4331 }
4332
4333 /*
4334  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4335  * DIR_ITEM/DIR_INDEX.
4336  *
4337  * @root:       the root of the fs/file tree
4338  * @ref_key:    the key of the INODE_EXTREF
4339  * @refs:       the count of INODE_EXTREF
4340  * @mode:       the st_mode of INODE_ITEM
4341  *
4342  * Return 0 if no error occurred.
4343  */
4344 static int check_inode_extref(struct btrfs_root *root,
4345                               struct btrfs_key *ref_key,
4346                               struct extent_buffer *node, int slot, u64 *refs,
4347                               int mode)
4348 {
4349         struct btrfs_key key;
4350         struct btrfs_inode_extref *extref;
4351         char namebuf[BTRFS_NAME_LEN] = {0};
4352         u32 total;
4353         u32 cur = 0;
4354         u32 len;
4355         u32 name_len;
4356         u64 index;
4357         u64 parent;
4358         int ret;
4359         int err = 0;
4360
4361         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4362         total = btrfs_item_size_nr(node, slot);
4363
4364 next:
4365         /* update inode ref count */
4366         (*refs)++;
4367         name_len = btrfs_inode_extref_name_len(node, extref);
4368         index = btrfs_inode_extref_index(node, extref);
4369         parent = btrfs_inode_extref_parent(node, extref);
4370         if (name_len <= BTRFS_NAME_LEN) {
4371                 len = name_len;
4372         } else {
4373                 len = BTRFS_NAME_LEN;
4374                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4375                         root->objectid, ref_key->objectid, ref_key->offset);
4376         }
4377         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4378
4379         /* Check root dir ref name */
4380         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4381                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4382                       root->objectid, ref_key->objectid, ref_key->offset,
4383                       namebuf);
4384                 err |= ROOT_DIR_ERROR;
4385         }
4386
4387         /* find related dir_index */
4388         key.objectid = parent;
4389         key.type = BTRFS_DIR_INDEX_KEY;
4390         key.offset = index;
4391         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4392         err |= ret;
4393
4394         /* find related dir_item */
4395         key.objectid = parent;
4396         key.type = BTRFS_DIR_ITEM_KEY;
4397         key.offset = btrfs_name_hash(namebuf, len);
4398         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4399         err |= ret;
4400
4401         len = sizeof(*extref) + name_len;
4402         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4403         cur += len;
4404
4405         if (cur < total)
4406                 goto next;
4407
4408         return err;
4409 }
4410
4411 /*
4412  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4413  * DIR_ITEM/DIR_INDEX match.
4414  *
4415  * @root:       the root of the fs/file tree
4416  * @key:        the key of the INODE_REF/INODE_EXTREF
4417  * @name:       the name in the INODE_REF/INODE_EXTREF
4418  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4419  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4420  * to (u64)-1
4421  * @ext_ref:    the EXTENDED_IREF feature
4422  *
4423  * Return 0 if no error occurred.
4424  * Return >0 for error bitmap
4425  */
4426 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4427                           char *name, int namelen, u64 index,
4428                           unsigned int ext_ref)
4429 {
4430         struct btrfs_path path;
4431         struct btrfs_inode_ref *ref;
4432         struct btrfs_inode_extref *extref;
4433         struct extent_buffer *node;
4434         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4435         u32 total;
4436         u32 cur = 0;
4437         u32 len;
4438         u32 ref_namelen;
4439         u64 ref_index;
4440         u64 parent;
4441         u64 dir_id;
4442         int slot;
4443         int ret;
4444
4445         btrfs_init_path(&path);
4446         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4447         if (ret) {
4448                 ret = INODE_REF_MISSING;
4449                 goto extref;
4450         }
4451
4452         node = path.nodes[0];
4453         slot = path.slots[0];
4454
4455         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4456         total = btrfs_item_size_nr(node, slot);
4457
4458         /* Iterate all entry of INODE_REF */
4459         while (cur < total) {
4460                 ret = INODE_REF_MISSING;
4461
4462                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4463                 ref_index = btrfs_inode_ref_index(node, ref);
4464                 if (index != (u64)-1 && index != ref_index)
4465                         goto next_ref;
4466
4467                 if (ref_namelen <= BTRFS_NAME_LEN) {
4468                         len = ref_namelen;
4469                 } else {
4470                         len = BTRFS_NAME_LEN;
4471                         warning("root %llu INODE %s[%llu %llu] name too long",
4472                                 root->objectid,
4473                                 key->type == BTRFS_INODE_REF_KEY ?
4474                                         "REF" : "EXTREF",
4475                                 key->objectid, key->offset);
4476                 }
4477                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4478                                    len);
4479
4480                 if (len != namelen || strncmp(ref_namebuf, name, len))
4481                         goto next_ref;
4482
4483                 ret = 0;
4484                 goto out;
4485 next_ref:
4486                 len = sizeof(*ref) + ref_namelen;
4487                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4488                 cur += len;
4489         }
4490
4491 extref:
4492         /* Skip if not support EXTENDED_IREF feature */
4493         if (!ext_ref)
4494                 goto out;
4495
4496         btrfs_release_path(&path);
4497         btrfs_init_path(&path);
4498
4499         dir_id = key->offset;
4500         key->type = BTRFS_INODE_EXTREF_KEY;
4501         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4502
4503         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4504         if (ret) {
4505                 ret = INODE_REF_MISSING;
4506                 goto out;
4507         }
4508
4509         node = path.nodes[0];
4510         slot = path.slots[0];
4511
4512         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4513         cur = 0;
4514         total = btrfs_item_size_nr(node, slot);
4515
4516         /* Iterate all entry of INODE_EXTREF */
4517         while (cur < total) {
4518                 ret = INODE_REF_MISSING;
4519
4520                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4521                 ref_index = btrfs_inode_extref_index(node, extref);
4522                 parent = btrfs_inode_extref_parent(node, extref);
4523                 if (index != (u64)-1 && index != ref_index)
4524                         goto next_extref;
4525
4526                 if (parent != dir_id)
4527                         goto next_extref;
4528
4529                 if (ref_namelen <= BTRFS_NAME_LEN) {
4530                         len = ref_namelen;
4531                 } else {
4532                         len = BTRFS_NAME_LEN;
4533                         warning("root %llu INODE %s[%llu %llu] name too long",
4534                                 root->objectid,
4535                                 key->type == BTRFS_INODE_REF_KEY ?
4536                                         "REF" : "EXTREF",
4537                                 key->objectid, key->offset);
4538                 }
4539                 read_extent_buffer(node, ref_namebuf,
4540                                    (unsigned long)(extref + 1), len);
4541
4542                 if (len != namelen || strncmp(ref_namebuf, name, len))
4543                         goto next_extref;
4544
4545                 ret = 0;
4546                 goto out;
4547
4548 next_extref:
4549                 len = sizeof(*extref) + ref_namelen;
4550                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4551                 cur += len;
4552
4553         }
4554 out:
4555         btrfs_release_path(&path);
4556         return ret;
4557 }
4558
4559 /*
4560  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4561  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4562  *
4563  * @root:       the root of the fs/file tree
4564  * @key:        the key of the INODE_REF/INODE_EXTREF
4565  * @size:       the st_size of the INODE_ITEM
4566  * @ext_ref:    the EXTENDED_IREF feature
4567  *
4568  * Return 0 if no error occurred.
4569  */
4570 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4571                           struct extent_buffer *node, int slot, u64 *size,
4572                           unsigned int ext_ref)
4573 {
4574         struct btrfs_dir_item *di;
4575         struct btrfs_inode_item *ii;
4576         struct btrfs_path path;
4577         struct btrfs_key location;
4578         char namebuf[BTRFS_NAME_LEN] = {0};
4579         u32 total;
4580         u32 cur = 0;
4581         u32 len;
4582         u32 name_len;
4583         u32 data_len;
4584         u8 filetype;
4585         u32 mode;
4586         u64 index;
4587         int ret;
4588         int err = 0;
4589
4590         /*
4591          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4592          * ignore index check.
4593          */
4594         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4595
4596         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4597         total = btrfs_item_size_nr(node, slot);
4598
4599         while (cur < total) {
4600                 data_len = btrfs_dir_data_len(node, di);
4601                 if (data_len)
4602                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4603                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4604                               "DIR_ITEM" : "DIR_INDEX",
4605                               key->objectid, key->offset, data_len);
4606
4607                 name_len = btrfs_dir_name_len(node, di);
4608                 if (name_len <= BTRFS_NAME_LEN) {
4609                         len = name_len;
4610                 } else {
4611                         len = BTRFS_NAME_LEN;
4612                         warning("root %llu %s[%llu %llu] name too long",
4613                                 root->objectid,
4614                                 key->type == BTRFS_DIR_ITEM_KEY ?
4615                                 "DIR_ITEM" : "DIR_INDEX",
4616                                 key->objectid, key->offset);
4617                 }
4618                 (*size) += name_len;
4619
4620                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4621                 filetype = btrfs_dir_type(node, di);
4622
4623                 btrfs_init_path(&path);
4624                 btrfs_dir_item_key_to_cpu(node, di, &location);
4625
4626                 /* Ignore related ROOT_ITEM check */
4627                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4628                         goto next;
4629
4630                 /* Check relative INODE_ITEM(existence/filetype) */
4631                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4632                 if (ret) {
4633                         err |= INODE_ITEM_MISSING;
4634                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4635                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4636                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4637                               key->offset, location.objectid, name_len,
4638                               namebuf, filetype);
4639                         goto next;
4640                 }
4641
4642                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4643                                     struct btrfs_inode_item);
4644                 mode = btrfs_inode_mode(path.nodes[0], ii);
4645
4646                 if (imode_to_type(mode) != filetype) {
4647                         err |= INODE_ITEM_MISMATCH;
4648                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4649                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4650                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4651                               key->offset, name_len, namebuf, filetype);
4652                 }
4653
4654                 /* Check relative INODE_REF/INODE_EXTREF */
4655                 location.type = BTRFS_INODE_REF_KEY;
4656                 location.offset = key->objectid;
4657                 ret = find_inode_ref(root, &location, namebuf, len,
4658                                        index, ext_ref);
4659                 err |= ret;
4660                 if (ret & INODE_REF_MISSING)
4661                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4662                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4663                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4664                               key->offset, name_len, namebuf, filetype);
4665
4666 next:
4667                 btrfs_release_path(&path);
4668                 len = sizeof(*di) + name_len + data_len;
4669                 di = (struct btrfs_dir_item *)((char *)di + len);
4670                 cur += len;
4671
4672                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4673                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4674                               root->objectid, key->objectid, key->offset);
4675                         break;
4676                 }
4677         }
4678
4679         return err;
4680 }
4681
4682 /*
4683  * Check file extent datasum/hole, update the size of the file extents,
4684  * check and update the last offset of the file extent.
4685  *
4686  * @root:       the root of fs/file tree.
4687  * @fkey:       the key of the file extent.
4688  * @nodatasum:  INODE_NODATASUM feature.
4689  * @size:       the sum of all EXTENT_DATA items size for this inode.
4690  * @end:        the offset of the last extent.
4691  *
4692  * Return 0 if no error occurred.
4693  */
4694 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4695                              struct extent_buffer *node, int slot,
4696                              unsigned int nodatasum, u64 *size, u64 *end)
4697 {
4698         struct btrfs_file_extent_item *fi;
4699         u64 disk_bytenr;
4700         u64 disk_num_bytes;
4701         u64 extent_num_bytes;
4702         u64 extent_offset;
4703         u64 csum_found;         /* In byte size, sectorsize aligned */
4704         u64 search_start;       /* Logical range start we search for csum */
4705         u64 search_len;         /* Logical range len we search for csum */
4706         unsigned int extent_type;
4707         unsigned int is_hole;
4708         int compressed = 0;
4709         int ret;
4710         int err = 0;
4711
4712         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4713
4714         extent_type = btrfs_file_extent_type(node, fi);
4715         /* Skip if file extent is inline */
4716         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4717                 struct btrfs_item *e = btrfs_item_nr(slot);
4718                 u32 item_inline_len;
4719
4720                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4721                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4722                 if (extent_num_bytes == 0 ||
4723                     extent_num_bytes != item_inline_len)
4724                         err |= FILE_EXTENT_ERROR;
4725                 *size += extent_num_bytes;
4726                 return err;
4727         }
4728
4729         /* Check extent type */
4730         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4731                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4732                 err |= FILE_EXTENT_ERROR;
4733                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4734                       root->objectid, fkey->objectid, fkey->offset);
4735                 return err;
4736         }
4737
4738         /* Check REG_EXTENT/PREALLOC_EXTENT */
4739         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4740         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4741         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4742         extent_offset = btrfs_file_extent_offset(node, fi);
4743         compressed = btrfs_file_extent_compression(node, fi);
4744         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4745
4746         /*
4747          * Check EXTENT_DATA csum
4748          *
4749          * For plain (uncompressed) extent, we should only check the range
4750          * we're referring to, as it's possible that part of prealloc extent
4751          * has been written, and has csum:
4752          *
4753          * |<--- Original large preallocated extent A ---->|
4754          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4755          *      No csum                         Has csum
4756          *
4757          * For compressed extent, we should check the whole range.
4758          */
4759         if (!compressed) {
4760                 search_start = disk_bytenr + extent_offset;
4761                 search_len = extent_num_bytes;
4762         } else {
4763                 search_start = disk_bytenr;
4764                 search_len = disk_num_bytes;
4765         }
4766         ret = count_csum_range(root, search_start, search_len, &csum_found);
4767         if (csum_found > 0 && nodatasum) {
4768                 err |= ODD_CSUM_ITEM;
4769                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4770                       root->objectid, fkey->objectid, fkey->offset);
4771         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4772                    !is_hole && (ret < 0 || csum_found < search_len)) {
4773                 err |= CSUM_ITEM_MISSING;
4774                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4775                       root->objectid, fkey->objectid, fkey->offset,
4776                       csum_found, search_len);
4777         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4778                 err |= ODD_CSUM_ITEM;
4779                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4780                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4781         }
4782
4783         /* Check EXTENT_DATA hole */
4784         if (no_holes && is_hole) {
4785                 err |= FILE_EXTENT_ERROR;
4786                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4787                       root->objectid, fkey->objectid, fkey->offset);
4788         } else if (!no_holes && *end != fkey->offset) {
4789                 err |= FILE_EXTENT_ERROR;
4790                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4791                       root->objectid, fkey->objectid, fkey->offset);
4792         }
4793
4794         *end += extent_num_bytes;
4795         if (!is_hole)
4796                 *size += extent_num_bytes;
4797
4798         return err;
4799 }
4800
4801 /*
4802  * Check INODE_ITEM and related ITEMs (the same inode number)
4803  * 1. check link count
4804  * 2. check inode ref/extref
4805  * 3. check dir item/index
4806  *
4807  * @ext_ref:    the EXTENDED_IREF feature
4808  *
4809  * Return 0 if no error occurred.
4810  * Return >0 for error or hit the traversal is done(by error bitmap)
4811  */
4812 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4813                             unsigned int ext_ref)
4814 {
4815         struct extent_buffer *node;
4816         struct btrfs_inode_item *ii;
4817         struct btrfs_key key;
4818         u64 inode_id;
4819         u32 mode;
4820         u64 nlink;
4821         u64 nbytes;
4822         u64 isize;
4823         u64 size = 0;
4824         u64 refs = 0;
4825         u64 extent_end = 0;
4826         u64 extent_size = 0;
4827         unsigned int dir;
4828         unsigned int nodatasum;
4829         int slot;
4830         int ret;
4831         int err = 0;
4832
4833         node = path->nodes[0];
4834         slot = path->slots[0];
4835
4836         btrfs_item_key_to_cpu(node, &key, slot);
4837         inode_id = key.objectid;
4838
4839         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4840                 ret = btrfs_next_item(root, path);
4841                 if (ret > 0)
4842                         err |= LAST_ITEM;
4843                 return err;
4844         }
4845
4846         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4847         isize = btrfs_inode_size(node, ii);
4848         nbytes = btrfs_inode_nbytes(node, ii);
4849         mode = btrfs_inode_mode(node, ii);
4850         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4851         nlink = btrfs_inode_nlink(node, ii);
4852         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4853
4854         while (1) {
4855                 ret = btrfs_next_item(root, path);
4856                 if (ret < 0) {
4857                         /* out will fill 'err' rusing current statistics */
4858                         goto out;
4859                 } else if (ret > 0) {
4860                         err |= LAST_ITEM;
4861                         goto out;
4862                 }
4863
4864                 node = path->nodes[0];
4865                 slot = path->slots[0];
4866                 btrfs_item_key_to_cpu(node, &key, slot);
4867                 if (key.objectid != inode_id)
4868                         goto out;
4869
4870                 switch (key.type) {
4871                 case BTRFS_INODE_REF_KEY:
4872                         ret = check_inode_ref(root, &key, node, slot, &refs,
4873                                               mode);
4874                         err |= ret;
4875                         break;
4876                 case BTRFS_INODE_EXTREF_KEY:
4877                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4878                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4879                                         root->objectid, key.objectid,
4880                                         key.offset);
4881                         ret = check_inode_extref(root, &key, node, slot, &refs,
4882                                                  mode);
4883                         err |= ret;
4884                         break;
4885                 case BTRFS_DIR_ITEM_KEY:
4886                 case BTRFS_DIR_INDEX_KEY:
4887                         if (!dir) {
4888                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4889                                         root->objectid, inode_id,
4890                                         imode_to_type(mode), key.objectid,
4891                                         key.offset);
4892                         }
4893                         ret = check_dir_item(root, &key, node, slot, &size,
4894                                              ext_ref);
4895                         err |= ret;
4896                         break;
4897                 case BTRFS_EXTENT_DATA_KEY:
4898                         if (dir) {
4899                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4900                                         root->objectid, inode_id, key.objectid,
4901                                         key.offset);
4902                         }
4903                         ret = check_file_extent(root, &key, node, slot,
4904                                                 nodatasum, &extent_size,
4905                                                 &extent_end);
4906                         err |= ret;
4907                         break;
4908                 case BTRFS_XATTR_ITEM_KEY:
4909                         break;
4910                 default:
4911                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4912                               key.objectid, key.type, key.offset);
4913                 }
4914         }
4915
4916 out:
4917         /* verify INODE_ITEM nlink/isize/nbytes */
4918         if (dir) {
4919                 if (nlink != 1) {
4920                         err |= LINK_COUNT_ERROR;
4921                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4922                               root->objectid, inode_id, nlink);
4923                 }
4924
4925                 /*
4926                  * Just a warning, as dir inode nbytes is just an
4927                  * instructive value.
4928                  */
4929                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4930                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4931                                 root->objectid, inode_id, root->nodesize);
4932                 }
4933
4934                 if (isize != size) {
4935                         err |= ISIZE_ERROR;
4936                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4937                               root->objectid, inode_id, isize, size);
4938                 }
4939         } else {
4940                 if (nlink != refs) {
4941                         err |= LINK_COUNT_ERROR;
4942                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4943                               root->objectid, inode_id, nlink, refs);
4944                 } else if (!nlink) {
4945                         err |= ORPHAN_ITEM;
4946                 }
4947
4948                 if (!nbytes && !no_holes && extent_end < isize) {
4949                         err |= NBYTES_ERROR;
4950                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4951                               root->objectid, inode_id, isize);
4952                 }
4953
4954                 if (nbytes != extent_size) {
4955                         err |= NBYTES_ERROR;
4956                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4957                               root->objectid, inode_id, nbytes, extent_size);
4958                 }
4959         }
4960
4961         return err;
4962 }
4963
4964 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4965 {
4966         struct btrfs_path path;
4967         struct btrfs_key key;
4968         int err = 0;
4969         int ret;
4970
4971         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4972         key.type = BTRFS_INODE_ITEM_KEY;
4973         key.offset = 0;
4974
4975         /* For root being dropped, we don't need to check first inode */
4976         if (btrfs_root_refs(&root->root_item) == 0 &&
4977             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
4978             key.objectid)
4979                 return 0;
4980
4981         btrfs_init_path(&path);
4982
4983         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4984         if (ret < 0)
4985                 goto out;
4986         if (ret > 0) {
4987                 ret = 0;
4988                 err |= INODE_ITEM_MISSING;
4989         }
4990
4991         err |= check_inode_item(root, &path, ext_ref);
4992         err &= ~LAST_ITEM;
4993         if (err && !ret)
4994                 ret = -EIO;
4995 out:
4996         btrfs_release_path(&path);
4997         return ret;
4998 }
4999
5000 /*
5001  * Iterate all item on the tree and call check_inode_item() to check.
5002  *
5003  * @root:       the root of the tree to be checked.
5004  * @ext_ref:    the EXTENDED_IREF feature
5005  *
5006  * Return 0 if no error found.
5007  * Return <0 for error.
5008  */
5009 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5010 {
5011         struct btrfs_path path;
5012         struct node_refs nrefs;
5013         struct btrfs_root_item *root_item = &root->root_item;
5014         int ret, wret;
5015         int level;
5016
5017         /*
5018          * We need to manually check the first inode item(256)
5019          * As the following traversal function will only start from
5020          * the first inode item in the leaf, if inode item(256) is missing
5021          * we will just skip it forever.
5022          */
5023         ret = check_fs_first_inode(root, ext_ref);
5024         if (ret < 0)
5025                 return ret;
5026
5027         memset(&nrefs, 0, sizeof(nrefs));
5028         level = btrfs_header_level(root->node);
5029         btrfs_init_path(&path);
5030
5031         if (btrfs_root_refs(root_item) > 0 ||
5032             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5033                 path.nodes[level] = root->node;
5034                 path.slots[level] = 0;
5035                 extent_buffer_get(root->node);
5036         } else {
5037                 struct btrfs_key key;
5038
5039                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5040                 level = root_item->drop_level;
5041                 path.lowest_level = level;
5042                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5043                 if (ret < 0)
5044                         goto out;
5045                 ret = 0;
5046         }
5047
5048         while (1) {
5049                 wret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5050                 if (wret < 0)
5051                         ret = wret;
5052                 if (wret != 0)
5053                         break;
5054
5055                 wret = walk_up_tree_v2(root, &path, &level);
5056                 if (wret < 0)
5057                         ret = wret;
5058                 if (wret != 0)
5059                         break;
5060         }
5061
5062 out:
5063         btrfs_release_path(&path);
5064         return ret;
5065 }
5066
5067 /*
5068  * Find the relative ref for root_ref and root_backref.
5069  *
5070  * @root:       the root of the root tree.
5071  * @ref_key:    the key of the root ref.
5072  *
5073  * Return 0 if no error occurred.
5074  */
5075 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5076                           struct extent_buffer *node, int slot)
5077 {
5078         struct btrfs_path path;
5079         struct btrfs_key key;
5080         struct btrfs_root_ref *ref;
5081         struct btrfs_root_ref *backref;
5082         char ref_name[BTRFS_NAME_LEN] = {0};
5083         char backref_name[BTRFS_NAME_LEN] = {0};
5084         u64 ref_dirid;
5085         u64 ref_seq;
5086         u32 ref_namelen;
5087         u64 backref_dirid;
5088         u64 backref_seq;
5089         u32 backref_namelen;
5090         u32 len;
5091         int ret;
5092         int err = 0;
5093
5094         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5095         ref_dirid = btrfs_root_ref_dirid(node, ref);
5096         ref_seq = btrfs_root_ref_sequence(node, ref);
5097         ref_namelen = btrfs_root_ref_name_len(node, ref);
5098
5099         if (ref_namelen <= BTRFS_NAME_LEN) {
5100                 len = ref_namelen;
5101         } else {
5102                 len = BTRFS_NAME_LEN;
5103                 warning("%s[%llu %llu] ref_name too long",
5104                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5105                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5106                         ref_key->offset);
5107         }
5108         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5109
5110         /* Find relative root_ref */
5111         key.objectid = ref_key->offset;
5112         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5113         key.offset = ref_key->objectid;
5114
5115         btrfs_init_path(&path);
5116         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5117         if (ret) {
5118                 err |= ROOT_REF_MISSING;
5119                 error("%s[%llu %llu] couldn't find relative ref",
5120                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5121                       "ROOT_REF" : "ROOT_BACKREF",
5122                       ref_key->objectid, ref_key->offset);
5123                 goto out;
5124         }
5125
5126         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5127                                  struct btrfs_root_ref);
5128         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5129         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5130         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5131
5132         if (backref_namelen <= BTRFS_NAME_LEN) {
5133                 len = backref_namelen;
5134         } else {
5135                 len = BTRFS_NAME_LEN;
5136                 warning("%s[%llu %llu] ref_name too long",
5137                         key.type == BTRFS_ROOT_REF_KEY ?
5138                         "ROOT_REF" : "ROOT_BACKREF",
5139                         key.objectid, key.offset);
5140         }
5141         read_extent_buffer(path.nodes[0], backref_name,
5142                            (unsigned long)(backref + 1), len);
5143
5144         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5145             ref_namelen != backref_namelen ||
5146             strncmp(ref_name, backref_name, len)) {
5147                 err |= ROOT_REF_MISMATCH;
5148                 error("%s[%llu %llu] mismatch relative ref",
5149                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5150                       "ROOT_REF" : "ROOT_BACKREF",
5151                       ref_key->objectid, ref_key->offset);
5152         }
5153 out:
5154         btrfs_release_path(&path);
5155         return err;
5156 }
5157
5158 /*
5159  * Check all fs/file tree in low_memory mode.
5160  *
5161  * 1. for fs tree root item, call check_fs_root_v2()
5162  * 2. for fs tree root ref/backref, call check_root_ref()
5163  *
5164  * Return 0 if no error occurred.
5165  */
5166 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5167 {
5168         struct btrfs_root *tree_root = fs_info->tree_root;
5169         struct btrfs_root *cur_root = NULL;
5170         struct btrfs_path path;
5171         struct btrfs_key key;
5172         struct extent_buffer *node;
5173         unsigned int ext_ref;
5174         int slot;
5175         int ret;
5176         int err = 0;
5177
5178         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5179
5180         btrfs_init_path(&path);
5181         key.objectid = BTRFS_FS_TREE_OBJECTID;
5182         key.offset = 0;
5183         key.type = BTRFS_ROOT_ITEM_KEY;
5184
5185         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5186         if (ret < 0) {
5187                 err = ret;
5188                 goto out;
5189         } else if (ret > 0) {
5190                 err = -ENOENT;
5191                 goto out;
5192         }
5193
5194         while (1) {
5195                 node = path.nodes[0];
5196                 slot = path.slots[0];
5197                 btrfs_item_key_to_cpu(node, &key, slot);
5198                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5199                         goto out;
5200                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5201                     fs_root_objectid(key.objectid)) {
5202                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5203                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5204                                                                        &key);
5205                         } else {
5206                                 key.offset = (u64)-1;
5207                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5208                         }
5209
5210                         if (IS_ERR(cur_root)) {
5211                                 error("Fail to read fs/subvol tree: %lld",
5212                                       key.objectid);
5213                                 err = -EIO;
5214                                 goto next;
5215                         }
5216
5217                         ret = check_fs_root_v2(cur_root, ext_ref);
5218                         err |= ret;
5219
5220                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5221                                 btrfs_free_fs_root(cur_root);
5222                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5223                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5224                         ret = check_root_ref(tree_root, &key, node, slot);
5225                         err |= ret;
5226                 }
5227 next:
5228                 ret = btrfs_next_item(tree_root, &path);
5229                 if (ret > 0)
5230                         goto out;
5231                 if (ret < 0) {
5232                         err = ret;
5233                         goto out;
5234                 }
5235         }
5236
5237 out:
5238         btrfs_release_path(&path);
5239         return err;
5240 }
5241
5242 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5243 {
5244         struct list_head *cur = rec->backrefs.next;
5245         struct extent_backref *back;
5246         struct tree_backref *tback;
5247         struct data_backref *dback;
5248         u64 found = 0;
5249         int err = 0;
5250
5251         while(cur != &rec->backrefs) {
5252                 back = to_extent_backref(cur);
5253                 cur = cur->next;
5254                 if (!back->found_extent_tree) {
5255                         err = 1;
5256                         if (!print_errs)
5257                                 goto out;
5258                         if (back->is_data) {
5259                                 dback = to_data_backref(back);
5260                                 fprintf(stderr, "Backref %llu %s %llu"
5261                                         " owner %llu offset %llu num_refs %lu"
5262                                         " not found in extent tree\n",
5263                                         (unsigned long long)rec->start,
5264                                         back->full_backref ?
5265                                         "parent" : "root",
5266                                         back->full_backref ?
5267                                         (unsigned long long)dback->parent:
5268                                         (unsigned long long)dback->root,
5269                                         (unsigned long long)dback->owner,
5270                                         (unsigned long long)dback->offset,
5271                                         (unsigned long)dback->num_refs);
5272                         } else {
5273                                 tback = to_tree_backref(back);
5274                                 fprintf(stderr, "Backref %llu parent %llu"
5275                                         " root %llu not found in extent tree\n",
5276                                         (unsigned long long)rec->start,
5277                                         (unsigned long long)tback->parent,
5278                                         (unsigned long long)tback->root);
5279                         }
5280                 }
5281                 if (!back->is_data && !back->found_ref) {
5282                         err = 1;
5283                         if (!print_errs)
5284                                 goto out;
5285                         tback = to_tree_backref(back);
5286                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5287                                 (unsigned long long)rec->start,
5288                                 back->full_backref ? "parent" : "root",
5289                                 back->full_backref ?
5290                                 (unsigned long long)tback->parent :
5291                                 (unsigned long long)tback->root, back);
5292                 }
5293                 if (back->is_data) {
5294                         dback = to_data_backref(back);
5295                         if (dback->found_ref != dback->num_refs) {
5296                                 err = 1;
5297                                 if (!print_errs)
5298                                         goto out;
5299                                 fprintf(stderr, "Incorrect local backref count"
5300                                         " on %llu %s %llu owner %llu"
5301                                         " offset %llu found %u wanted %u back %p\n",
5302                                         (unsigned long long)rec->start,
5303                                         back->full_backref ?
5304                                         "parent" : "root",
5305                                         back->full_backref ?
5306                                         (unsigned long long)dback->parent:
5307                                         (unsigned long long)dback->root,
5308                                         (unsigned long long)dback->owner,
5309                                         (unsigned long long)dback->offset,
5310                                         dback->found_ref, dback->num_refs, back);
5311                         }
5312                         if (dback->disk_bytenr != rec->start) {
5313                                 err = 1;
5314                                 if (!print_errs)
5315                                         goto out;
5316                                 fprintf(stderr, "Backref disk bytenr does not"
5317                                         " match extent record, bytenr=%llu, "
5318                                         "ref bytenr=%llu\n",
5319                                         (unsigned long long)rec->start,
5320                                         (unsigned long long)dback->disk_bytenr);
5321                         }
5322
5323                         if (dback->bytes != rec->nr) {
5324                                 err = 1;
5325                                 if (!print_errs)
5326                                         goto out;
5327                                 fprintf(stderr, "Backref bytes do not match "
5328                                         "extent backref, bytenr=%llu, ref "
5329                                         "bytes=%llu, backref bytes=%llu\n",
5330                                         (unsigned long long)rec->start,
5331                                         (unsigned long long)rec->nr,
5332                                         (unsigned long long)dback->bytes);
5333                         }
5334                 }
5335                 if (!back->is_data) {
5336                         found += 1;
5337                 } else {
5338                         dback = to_data_backref(back);
5339                         found += dback->found_ref;
5340                 }
5341         }
5342         if (found != rec->refs) {
5343                 err = 1;
5344                 if (!print_errs)
5345                         goto out;
5346                 fprintf(stderr, "Incorrect global backref count "
5347                         "on %llu found %llu wanted %llu\n",
5348                         (unsigned long long)rec->start,
5349                         (unsigned long long)found,
5350                         (unsigned long long)rec->refs);
5351         }
5352 out:
5353         return err;
5354 }
5355
5356 static int free_all_extent_backrefs(struct extent_record *rec)
5357 {
5358         struct extent_backref *back;
5359         struct list_head *cur;
5360         while (!list_empty(&rec->backrefs)) {
5361                 cur = rec->backrefs.next;
5362                 back = to_extent_backref(cur);
5363                 list_del(cur);
5364                 free(back);
5365         }
5366         return 0;
5367 }
5368
5369 static void free_extent_record_cache(struct cache_tree *extent_cache)
5370 {
5371         struct cache_extent *cache;
5372         struct extent_record *rec;
5373
5374         while (1) {
5375                 cache = first_cache_extent(extent_cache);
5376                 if (!cache)
5377                         break;
5378                 rec = container_of(cache, struct extent_record, cache);
5379                 remove_cache_extent(extent_cache, cache);
5380                 free_all_extent_backrefs(rec);
5381                 free(rec);
5382         }
5383 }
5384
5385 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5386                                  struct extent_record *rec)
5387 {
5388         if (rec->content_checked && rec->owner_ref_checked &&
5389             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5390             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5391             !rec->bad_full_backref && !rec->crossing_stripes &&
5392             !rec->wrong_chunk_type) {
5393                 remove_cache_extent(extent_cache, &rec->cache);
5394                 free_all_extent_backrefs(rec);
5395                 list_del_init(&rec->list);
5396                 free(rec);
5397         }
5398         return 0;
5399 }
5400
5401 static int check_owner_ref(struct btrfs_root *root,
5402                             struct extent_record *rec,
5403                             struct extent_buffer *buf)
5404 {
5405         struct extent_backref *node;
5406         struct tree_backref *back;
5407         struct btrfs_root *ref_root;
5408         struct btrfs_key key;
5409         struct btrfs_path path;
5410         struct extent_buffer *parent;
5411         int level;
5412         int found = 0;
5413         int ret;
5414
5415         list_for_each_entry(node, &rec->backrefs, list) {
5416                 if (node->is_data)
5417                         continue;
5418                 if (!node->found_ref)
5419                         continue;
5420                 if (node->full_backref)
5421                         continue;
5422                 back = to_tree_backref(node);
5423                 if (btrfs_header_owner(buf) == back->root)
5424                         return 0;
5425         }
5426         BUG_ON(rec->is_root);
5427
5428         /* try to find the block by search corresponding fs tree */
5429         key.objectid = btrfs_header_owner(buf);
5430         key.type = BTRFS_ROOT_ITEM_KEY;
5431         key.offset = (u64)-1;
5432
5433         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5434         if (IS_ERR(ref_root))
5435                 return 1;
5436
5437         level = btrfs_header_level(buf);
5438         if (level == 0)
5439                 btrfs_item_key_to_cpu(buf, &key, 0);
5440         else
5441                 btrfs_node_key_to_cpu(buf, &key, 0);
5442
5443         btrfs_init_path(&path);
5444         path.lowest_level = level + 1;
5445         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5446         if (ret < 0)
5447                 return 0;
5448
5449         parent = path.nodes[level + 1];
5450         if (parent && buf->start == btrfs_node_blockptr(parent,
5451                                                         path.slots[level + 1]))
5452                 found = 1;
5453
5454         btrfs_release_path(&path);
5455         return found ? 0 : 1;
5456 }
5457
5458 static int is_extent_tree_record(struct extent_record *rec)
5459 {
5460         struct list_head *cur = rec->backrefs.next;
5461         struct extent_backref *node;
5462         struct tree_backref *back;
5463         int is_extent = 0;
5464
5465         while(cur != &rec->backrefs) {
5466                 node = to_extent_backref(cur);
5467                 cur = cur->next;
5468                 if (node->is_data)
5469                         return 0;
5470                 back = to_tree_backref(node);
5471                 if (node->full_backref)
5472                         return 0;
5473                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5474                         is_extent = 1;
5475         }
5476         return is_extent;
5477 }
5478
5479
5480 static int record_bad_block_io(struct btrfs_fs_info *info,
5481                                struct cache_tree *extent_cache,
5482                                u64 start, u64 len)
5483 {
5484         struct extent_record *rec;
5485         struct cache_extent *cache;
5486         struct btrfs_key key;
5487
5488         cache = lookup_cache_extent(extent_cache, start, len);
5489         if (!cache)
5490                 return 0;
5491
5492         rec = container_of(cache, struct extent_record, cache);
5493         if (!is_extent_tree_record(rec))
5494                 return 0;
5495
5496         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5497         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5498 }
5499
5500 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5501                        struct extent_buffer *buf, int slot)
5502 {
5503         if (btrfs_header_level(buf)) {
5504                 struct btrfs_key_ptr ptr1, ptr2;
5505
5506                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5507                                    sizeof(struct btrfs_key_ptr));
5508                 read_extent_buffer(buf, &ptr2,
5509                                    btrfs_node_key_ptr_offset(slot + 1),
5510                                    sizeof(struct btrfs_key_ptr));
5511                 write_extent_buffer(buf, &ptr1,
5512                                     btrfs_node_key_ptr_offset(slot + 1),
5513                                     sizeof(struct btrfs_key_ptr));
5514                 write_extent_buffer(buf, &ptr2,
5515                                     btrfs_node_key_ptr_offset(slot),
5516                                     sizeof(struct btrfs_key_ptr));
5517                 if (slot == 0) {
5518                         struct btrfs_disk_key key;
5519                         btrfs_node_key(buf, &key, 0);
5520                         btrfs_fixup_low_keys(root, path, &key,
5521                                              btrfs_header_level(buf) + 1);
5522                 }
5523         } else {
5524                 struct btrfs_item *item1, *item2;
5525                 struct btrfs_key k1, k2;
5526                 char *item1_data, *item2_data;
5527                 u32 item1_offset, item2_offset, item1_size, item2_size;
5528
5529                 item1 = btrfs_item_nr(slot);
5530                 item2 = btrfs_item_nr(slot + 1);
5531                 btrfs_item_key_to_cpu(buf, &k1, slot);
5532                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5533                 item1_offset = btrfs_item_offset(buf, item1);
5534                 item2_offset = btrfs_item_offset(buf, item2);
5535                 item1_size = btrfs_item_size(buf, item1);
5536                 item2_size = btrfs_item_size(buf, item2);
5537
5538                 item1_data = malloc(item1_size);
5539                 if (!item1_data)
5540                         return -ENOMEM;
5541                 item2_data = malloc(item2_size);
5542                 if (!item2_data) {
5543                         free(item1_data);
5544                         return -ENOMEM;
5545                 }
5546
5547                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5548                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5549
5550                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5551                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5552                 free(item1_data);
5553                 free(item2_data);
5554
5555                 btrfs_set_item_offset(buf, item1, item2_offset);
5556                 btrfs_set_item_offset(buf, item2, item1_offset);
5557                 btrfs_set_item_size(buf, item1, item2_size);
5558                 btrfs_set_item_size(buf, item2, item1_size);
5559
5560                 path->slots[0] = slot;
5561                 btrfs_set_item_key_unsafe(root, path, &k2);
5562                 path->slots[0] = slot + 1;
5563                 btrfs_set_item_key_unsafe(root, path, &k1);
5564         }
5565         return 0;
5566 }
5567
5568 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5569 {
5570         struct extent_buffer *buf;
5571         struct btrfs_key k1, k2;
5572         int i;
5573         int level = path->lowest_level;
5574         int ret = -EIO;
5575
5576         buf = path->nodes[level];
5577         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5578                 if (level) {
5579                         btrfs_node_key_to_cpu(buf, &k1, i);
5580                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5581                 } else {
5582                         btrfs_item_key_to_cpu(buf, &k1, i);
5583                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5584                 }
5585                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5586                         continue;
5587                 ret = swap_values(root, path, buf, i);
5588                 if (ret)
5589                         break;
5590                 btrfs_mark_buffer_dirty(buf);
5591                 i = 0;
5592         }
5593         return ret;
5594 }
5595
5596 static int delete_bogus_item(struct btrfs_root *root,
5597                              struct btrfs_path *path,
5598                              struct extent_buffer *buf, int slot)
5599 {
5600         struct btrfs_key key;
5601         int nritems = btrfs_header_nritems(buf);
5602
5603         btrfs_item_key_to_cpu(buf, &key, slot);
5604
5605         /* These are all the keys we can deal with missing. */
5606         if (key.type != BTRFS_DIR_INDEX_KEY &&
5607             key.type != BTRFS_EXTENT_ITEM_KEY &&
5608             key.type != BTRFS_METADATA_ITEM_KEY &&
5609             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5610             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5611                 return -1;
5612
5613         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5614                (unsigned long long)key.objectid, key.type,
5615                (unsigned long long)key.offset, slot, buf->start);
5616         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5617                               btrfs_item_nr_offset(slot + 1),
5618                               sizeof(struct btrfs_item) *
5619                               (nritems - slot - 1));
5620         btrfs_set_header_nritems(buf, nritems - 1);
5621         if (slot == 0) {
5622                 struct btrfs_disk_key disk_key;
5623
5624                 btrfs_item_key(buf, &disk_key, 0);
5625                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5626         }
5627         btrfs_mark_buffer_dirty(buf);
5628         return 0;
5629 }
5630
5631 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5632 {
5633         struct extent_buffer *buf;
5634         int i;
5635         int ret = 0;
5636
5637         /* We should only get this for leaves */
5638         BUG_ON(path->lowest_level);
5639         buf = path->nodes[0];
5640 again:
5641         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5642                 unsigned int shift = 0, offset;
5643
5644                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5645                     BTRFS_LEAF_DATA_SIZE(root)) {
5646                         if (btrfs_item_end_nr(buf, i) >
5647                             BTRFS_LEAF_DATA_SIZE(root)) {
5648                                 ret = delete_bogus_item(root, path, buf, i);
5649                                 if (!ret)
5650                                         goto again;
5651                                 fprintf(stderr, "item is off the end of the "
5652                                         "leaf, can't fix\n");
5653                                 ret = -EIO;
5654                                 break;
5655                         }
5656                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5657                                 btrfs_item_end_nr(buf, i);
5658                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5659                            btrfs_item_offset_nr(buf, i - 1)) {
5660                         if (btrfs_item_end_nr(buf, i) >
5661                             btrfs_item_offset_nr(buf, i - 1)) {
5662                                 ret = delete_bogus_item(root, path, buf, i);
5663                                 if (!ret)
5664                                         goto again;
5665                                 fprintf(stderr, "items overlap, can't fix\n");
5666                                 ret = -EIO;
5667                                 break;
5668                         }
5669                         shift = btrfs_item_offset_nr(buf, i - 1) -
5670                                 btrfs_item_end_nr(buf, i);
5671                 }
5672                 if (!shift)
5673                         continue;
5674
5675                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5676                        i, shift, (unsigned long long)buf->start);
5677                 offset = btrfs_item_offset_nr(buf, i);
5678                 memmove_extent_buffer(buf,
5679                                       btrfs_leaf_data(buf) + offset + shift,
5680                                       btrfs_leaf_data(buf) + offset,
5681                                       btrfs_item_size_nr(buf, i));
5682                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5683                                       offset + shift);
5684                 btrfs_mark_buffer_dirty(buf);
5685         }
5686
5687         /*
5688          * We may have moved things, in which case we want to exit so we don't
5689          * write those changes out.  Once we have proper abort functionality in
5690          * progs this can be changed to something nicer.
5691          */
5692         BUG_ON(ret);
5693         return ret;
5694 }
5695
5696 /*
5697  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5698  * then just return -EIO.
5699  */
5700 static int try_to_fix_bad_block(struct btrfs_root *root,
5701                                 struct extent_buffer *buf,
5702                                 enum btrfs_tree_block_status status)
5703 {
5704         struct btrfs_trans_handle *trans;
5705         struct ulist *roots;
5706         struct ulist_node *node;
5707         struct btrfs_root *search_root;
5708         struct btrfs_path path;
5709         struct ulist_iterator iter;
5710         struct btrfs_key root_key, key;
5711         int ret;
5712
5713         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5714             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5715                 return -EIO;
5716
5717         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5718         if (ret)
5719                 return -EIO;
5720
5721         btrfs_init_path(&path);
5722         ULIST_ITER_INIT(&iter);
5723         while ((node = ulist_next(roots, &iter))) {
5724                 root_key.objectid = node->val;
5725                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5726                 root_key.offset = (u64)-1;
5727
5728                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5729                 if (IS_ERR(root)) {
5730                         ret = -EIO;
5731                         break;
5732                 }
5733
5734
5735                 trans = btrfs_start_transaction(search_root, 0);
5736                 if (IS_ERR(trans)) {
5737                         ret = PTR_ERR(trans);
5738                         break;
5739                 }
5740
5741                 path.lowest_level = btrfs_header_level(buf);
5742                 path.skip_check_block = 1;
5743                 if (path.lowest_level)
5744                         btrfs_node_key_to_cpu(buf, &key, 0);
5745                 else
5746                         btrfs_item_key_to_cpu(buf, &key, 0);
5747                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5748                 if (ret) {
5749                         ret = -EIO;
5750                         btrfs_commit_transaction(trans, search_root);
5751                         break;
5752                 }
5753                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5754                         ret = fix_key_order(search_root, &path);
5755                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5756                         ret = fix_item_offset(search_root, &path);
5757                 if (ret) {
5758                         btrfs_commit_transaction(trans, search_root);
5759                         break;
5760                 }
5761                 btrfs_release_path(&path);
5762                 btrfs_commit_transaction(trans, search_root);
5763         }
5764         ulist_free(roots);
5765         btrfs_release_path(&path);
5766         return ret;
5767 }
5768
5769 static int check_block(struct btrfs_root *root,
5770                        struct cache_tree *extent_cache,
5771                        struct extent_buffer *buf, u64 flags)
5772 {
5773         struct extent_record *rec;
5774         struct cache_extent *cache;
5775         struct btrfs_key key;
5776         enum btrfs_tree_block_status status;
5777         int ret = 0;
5778         int level;
5779
5780         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5781         if (!cache)
5782                 return 1;
5783         rec = container_of(cache, struct extent_record, cache);
5784         rec->generation = btrfs_header_generation(buf);
5785
5786         level = btrfs_header_level(buf);
5787         if (btrfs_header_nritems(buf) > 0) {
5788
5789                 if (level == 0)
5790                         btrfs_item_key_to_cpu(buf, &key, 0);
5791                 else
5792                         btrfs_node_key_to_cpu(buf, &key, 0);
5793
5794                 rec->info_objectid = key.objectid;
5795         }
5796         rec->info_level = level;
5797
5798         if (btrfs_is_leaf(buf))
5799                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5800         else
5801                 status = btrfs_check_node(root, &rec->parent_key, buf);
5802
5803         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5804                 if (repair)
5805                         status = try_to_fix_bad_block(root, buf, status);
5806                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5807                         ret = -EIO;
5808                         fprintf(stderr, "bad block %llu\n",
5809                                 (unsigned long long)buf->start);
5810                 } else {
5811                         /*
5812                          * Signal to callers we need to start the scan over
5813                          * again since we'll have cowed blocks.
5814                          */
5815                         ret = -EAGAIN;
5816                 }
5817         } else {
5818                 rec->content_checked = 1;
5819                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5820                         rec->owner_ref_checked = 1;
5821                 else {
5822                         ret = check_owner_ref(root, rec, buf);
5823                         if (!ret)
5824                                 rec->owner_ref_checked = 1;
5825                 }
5826         }
5827         if (!ret)
5828                 maybe_free_extent_rec(extent_cache, rec);
5829         return ret;
5830 }
5831
5832 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5833                                                 u64 parent, u64 root)
5834 {
5835         struct list_head *cur = rec->backrefs.next;
5836         struct extent_backref *node;
5837         struct tree_backref *back;
5838
5839         while(cur != &rec->backrefs) {
5840                 node = to_extent_backref(cur);
5841                 cur = cur->next;
5842                 if (node->is_data)
5843                         continue;
5844                 back = to_tree_backref(node);
5845                 if (parent > 0) {
5846                         if (!node->full_backref)
5847                                 continue;
5848                         if (parent == back->parent)
5849                                 return back;
5850                 } else {
5851                         if (node->full_backref)
5852                                 continue;
5853                         if (back->root == root)
5854                                 return back;
5855                 }
5856         }
5857         return NULL;
5858 }
5859
5860 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5861                                                 u64 parent, u64 root)
5862 {
5863         struct tree_backref *ref = malloc(sizeof(*ref));
5864
5865         if (!ref)
5866                 return NULL;
5867         memset(&ref->node, 0, sizeof(ref->node));
5868         if (parent > 0) {
5869                 ref->parent = parent;
5870                 ref->node.full_backref = 1;
5871         } else {
5872                 ref->root = root;
5873                 ref->node.full_backref = 0;
5874         }
5875         list_add_tail(&ref->node.list, &rec->backrefs);
5876
5877         return ref;
5878 }
5879
5880 static struct data_backref *find_data_backref(struct extent_record *rec,
5881                                                 u64 parent, u64 root,
5882                                                 u64 owner, u64 offset,
5883                                                 int found_ref,
5884                                                 u64 disk_bytenr, u64 bytes)
5885 {
5886         struct list_head *cur = rec->backrefs.next;
5887         struct extent_backref *node;
5888         struct data_backref *back;
5889
5890         while(cur != &rec->backrefs) {
5891                 node = to_extent_backref(cur);
5892                 cur = cur->next;
5893                 if (!node->is_data)
5894                         continue;
5895                 back = to_data_backref(node);
5896                 if (parent > 0) {
5897                         if (!node->full_backref)
5898                                 continue;
5899                         if (parent == back->parent)
5900                                 return back;
5901                 } else {
5902                         if (node->full_backref)
5903                                 continue;
5904                         if (back->root == root && back->owner == owner &&
5905                             back->offset == offset) {
5906                                 if (found_ref && node->found_ref &&
5907                                     (back->bytes != bytes ||
5908                                     back->disk_bytenr != disk_bytenr))
5909                                         continue;
5910                                 return back;
5911                         }
5912                 }
5913         }
5914         return NULL;
5915 }
5916
5917 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5918                                                 u64 parent, u64 root,
5919                                                 u64 owner, u64 offset,
5920                                                 u64 max_size)
5921 {
5922         struct data_backref *ref = malloc(sizeof(*ref));
5923
5924         if (!ref)
5925                 return NULL;
5926         memset(&ref->node, 0, sizeof(ref->node));
5927         ref->node.is_data = 1;
5928
5929         if (parent > 0) {
5930                 ref->parent = parent;
5931                 ref->owner = 0;
5932                 ref->offset = 0;
5933                 ref->node.full_backref = 1;
5934         } else {
5935                 ref->root = root;
5936                 ref->owner = owner;
5937                 ref->offset = offset;
5938                 ref->node.full_backref = 0;
5939         }
5940         ref->bytes = max_size;
5941         ref->found_ref = 0;
5942         ref->num_refs = 0;
5943         list_add_tail(&ref->node.list, &rec->backrefs);
5944         if (max_size > rec->max_size)
5945                 rec->max_size = max_size;
5946         return ref;
5947 }
5948
5949 /* Check if the type of extent matches with its chunk */
5950 static void check_extent_type(struct extent_record *rec)
5951 {
5952         struct btrfs_block_group_cache *bg_cache;
5953
5954         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5955         if (!bg_cache)
5956                 return;
5957
5958         /* data extent, check chunk directly*/
5959         if (!rec->metadata) {
5960                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5961                         rec->wrong_chunk_type = 1;
5962                 return;
5963         }
5964
5965         /* metadata extent, check the obvious case first */
5966         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5967                                  BTRFS_BLOCK_GROUP_METADATA))) {
5968                 rec->wrong_chunk_type = 1;
5969                 return;
5970         }
5971
5972         /*
5973          * Check SYSTEM extent, as it's also marked as metadata, we can only
5974          * make sure it's a SYSTEM extent by its backref
5975          */
5976         if (!list_empty(&rec->backrefs)) {
5977                 struct extent_backref *node;
5978                 struct tree_backref *tback;
5979                 u64 bg_type;
5980
5981                 node = to_extent_backref(rec->backrefs.next);
5982                 if (node->is_data) {
5983                         /* tree block shouldn't have data backref */
5984                         rec->wrong_chunk_type = 1;
5985                         return;
5986                 }
5987                 tback = container_of(node, struct tree_backref, node);
5988
5989                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5990                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5991                 else
5992                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
5993                 if (!(bg_cache->flags & bg_type))
5994                         rec->wrong_chunk_type = 1;
5995         }
5996 }
5997
5998 /*
5999  * Allocate a new extent record, fill default values from @tmpl and insert int
6000  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6001  * the cache, otherwise it fails.
6002  */
6003 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6004                 struct extent_record *tmpl)
6005 {
6006         struct extent_record *rec;
6007         int ret = 0;
6008
6009         rec = malloc(sizeof(*rec));
6010         if (!rec)
6011                 return -ENOMEM;
6012         rec->start = tmpl->start;
6013         rec->max_size = tmpl->max_size;
6014         rec->nr = max(tmpl->nr, tmpl->max_size);
6015         rec->found_rec = tmpl->found_rec;
6016         rec->content_checked = tmpl->content_checked;
6017         rec->owner_ref_checked = tmpl->owner_ref_checked;
6018         rec->num_duplicates = 0;
6019         rec->metadata = tmpl->metadata;
6020         rec->flag_block_full_backref = FLAG_UNSET;
6021         rec->bad_full_backref = 0;
6022         rec->crossing_stripes = 0;
6023         rec->wrong_chunk_type = 0;
6024         rec->is_root = tmpl->is_root;
6025         rec->refs = tmpl->refs;
6026         rec->extent_item_refs = tmpl->extent_item_refs;
6027         rec->parent_generation = tmpl->parent_generation;
6028         INIT_LIST_HEAD(&rec->backrefs);
6029         INIT_LIST_HEAD(&rec->dups);
6030         INIT_LIST_HEAD(&rec->list);
6031         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6032         rec->cache.start = tmpl->start;
6033         rec->cache.size = tmpl->nr;
6034         ret = insert_cache_extent(extent_cache, &rec->cache);
6035         if (ret) {
6036                 free(rec);
6037                 return ret;
6038         }
6039         bytes_used += rec->nr;
6040
6041         if (tmpl->metadata)
6042                 rec->crossing_stripes = check_crossing_stripes(global_info,
6043                                 rec->start, global_info->tree_root->nodesize);
6044         check_extent_type(rec);
6045         return ret;
6046 }
6047
6048 /*
6049  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6050  * some are hints:
6051  * - refs              - if found, increase refs
6052  * - is_root           - if found, set
6053  * - content_checked   - if found, set
6054  * - owner_ref_checked - if found, set
6055  *
6056  * If not found, create a new one, initialize and insert.
6057  */
6058 static int add_extent_rec(struct cache_tree *extent_cache,
6059                 struct extent_record *tmpl)
6060 {
6061         struct extent_record *rec;
6062         struct cache_extent *cache;
6063         int ret = 0;
6064         int dup = 0;
6065
6066         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6067         if (cache) {
6068                 rec = container_of(cache, struct extent_record, cache);
6069                 if (tmpl->refs)
6070                         rec->refs++;
6071                 if (rec->nr == 1)
6072                         rec->nr = max(tmpl->nr, tmpl->max_size);
6073
6074                 /*
6075                  * We need to make sure to reset nr to whatever the extent
6076                  * record says was the real size, this way we can compare it to
6077                  * the backrefs.
6078                  */
6079                 if (tmpl->found_rec) {
6080                         if (tmpl->start != rec->start || rec->found_rec) {
6081                                 struct extent_record *tmp;
6082
6083                                 dup = 1;
6084                                 if (list_empty(&rec->list))
6085                                         list_add_tail(&rec->list,
6086                                                       &duplicate_extents);
6087
6088                                 /*
6089                                  * We have to do this song and dance in case we
6090                                  * find an extent record that falls inside of
6091                                  * our current extent record but does not have
6092                                  * the same objectid.
6093                                  */
6094                                 tmp = malloc(sizeof(*tmp));
6095                                 if (!tmp)
6096                                         return -ENOMEM;
6097                                 tmp->start = tmpl->start;
6098                                 tmp->max_size = tmpl->max_size;
6099                                 tmp->nr = tmpl->nr;
6100                                 tmp->found_rec = 1;
6101                                 tmp->metadata = tmpl->metadata;
6102                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6103                                 INIT_LIST_HEAD(&tmp->list);
6104                                 list_add_tail(&tmp->list, &rec->dups);
6105                                 rec->num_duplicates++;
6106                         } else {
6107                                 rec->nr = tmpl->nr;
6108                                 rec->found_rec = 1;
6109                         }
6110                 }
6111
6112                 if (tmpl->extent_item_refs && !dup) {
6113                         if (rec->extent_item_refs) {
6114                                 fprintf(stderr, "block %llu rec "
6115                                         "extent_item_refs %llu, passed %llu\n",
6116                                         (unsigned long long)tmpl->start,
6117                                         (unsigned long long)
6118                                                         rec->extent_item_refs,
6119                                         (unsigned long long)tmpl->extent_item_refs);
6120                         }
6121                         rec->extent_item_refs = tmpl->extent_item_refs;
6122                 }
6123                 if (tmpl->is_root)
6124                         rec->is_root = 1;
6125                 if (tmpl->content_checked)
6126                         rec->content_checked = 1;
6127                 if (tmpl->owner_ref_checked)
6128                         rec->owner_ref_checked = 1;
6129                 memcpy(&rec->parent_key, &tmpl->parent_key,
6130                                 sizeof(tmpl->parent_key));
6131                 if (tmpl->parent_generation)
6132                         rec->parent_generation = tmpl->parent_generation;
6133                 if (rec->max_size < tmpl->max_size)
6134                         rec->max_size = tmpl->max_size;
6135
6136                 /*
6137                  * A metadata extent can't cross stripe_len boundary, otherwise
6138                  * kernel scrub won't be able to handle it.
6139                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6140                  * it.
6141                  */
6142                 if (tmpl->metadata)
6143                         rec->crossing_stripes = check_crossing_stripes(
6144                                         global_info, rec->start,
6145                                         global_info->tree_root->nodesize);
6146                 check_extent_type(rec);
6147                 maybe_free_extent_rec(extent_cache, rec);
6148                 return ret;
6149         }
6150
6151         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6152
6153         return ret;
6154 }
6155
6156 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6157                             u64 parent, u64 root, int found_ref)
6158 {
6159         struct extent_record *rec;
6160         struct tree_backref *back;
6161         struct cache_extent *cache;
6162         int ret;
6163
6164         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6165         if (!cache) {
6166                 struct extent_record tmpl;
6167
6168                 memset(&tmpl, 0, sizeof(tmpl));
6169                 tmpl.start = bytenr;
6170                 tmpl.nr = 1;
6171                 tmpl.metadata = 1;
6172
6173                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6174                 if (ret)
6175                         return ret;
6176
6177                 /* really a bug in cache_extent implement now */
6178                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6179                 if (!cache)
6180                         return -ENOENT;
6181         }
6182
6183         rec = container_of(cache, struct extent_record, cache);
6184         if (rec->start != bytenr) {
6185                 /*
6186                  * Several cause, from unaligned bytenr to over lapping extents
6187                  */
6188                 return -EEXIST;
6189         }
6190
6191         back = find_tree_backref(rec, parent, root);
6192         if (!back) {
6193                 back = alloc_tree_backref(rec, parent, root);
6194                 if (!back)
6195                         return -ENOMEM;
6196         }
6197
6198         if (found_ref) {
6199                 if (back->node.found_ref) {
6200                         fprintf(stderr, "Extent back ref already exists "
6201                                 "for %llu parent %llu root %llu \n",
6202                                 (unsigned long long)bytenr,
6203                                 (unsigned long long)parent,
6204                                 (unsigned long long)root);
6205                 }
6206                 back->node.found_ref = 1;
6207         } else {
6208                 if (back->node.found_extent_tree) {
6209                         fprintf(stderr, "Extent back ref already exists "
6210                                 "for %llu parent %llu root %llu \n",
6211                                 (unsigned long long)bytenr,
6212                                 (unsigned long long)parent,
6213                                 (unsigned long long)root);
6214                 }
6215                 back->node.found_extent_tree = 1;
6216         }
6217         check_extent_type(rec);
6218         maybe_free_extent_rec(extent_cache, rec);
6219         return 0;
6220 }
6221
6222 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6223                             u64 parent, u64 root, u64 owner, u64 offset,
6224                             u32 num_refs, int found_ref, u64 max_size)
6225 {
6226         struct extent_record *rec;
6227         struct data_backref *back;
6228         struct cache_extent *cache;
6229         int ret;
6230
6231         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6232         if (!cache) {
6233                 struct extent_record tmpl;
6234
6235                 memset(&tmpl, 0, sizeof(tmpl));
6236                 tmpl.start = bytenr;
6237                 tmpl.nr = 1;
6238                 tmpl.max_size = max_size;
6239
6240                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6241                 if (ret)
6242                         return ret;
6243
6244                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6245                 if (!cache)
6246                         abort();
6247         }
6248
6249         rec = container_of(cache, struct extent_record, cache);
6250         if (rec->max_size < max_size)
6251                 rec->max_size = max_size;
6252
6253         /*
6254          * If found_ref is set then max_size is the real size and must match the
6255          * existing refs.  So if we have already found a ref then we need to
6256          * make sure that this ref matches the existing one, otherwise we need
6257          * to add a new backref so we can notice that the backrefs don't match
6258          * and we need to figure out who is telling the truth.  This is to
6259          * account for that awful fsync bug I introduced where we'd end up with
6260          * a btrfs_file_extent_item that would have its length include multiple
6261          * prealloc extents or point inside of a prealloc extent.
6262          */
6263         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6264                                  bytenr, max_size);
6265         if (!back) {
6266                 back = alloc_data_backref(rec, parent, root, owner, offset,
6267                                           max_size);
6268                 BUG_ON(!back);
6269         }
6270
6271         if (found_ref) {
6272                 BUG_ON(num_refs != 1);
6273                 if (back->node.found_ref)
6274                         BUG_ON(back->bytes != max_size);
6275                 back->node.found_ref = 1;
6276                 back->found_ref += 1;
6277                 back->bytes = max_size;
6278                 back->disk_bytenr = bytenr;
6279                 rec->refs += 1;
6280                 rec->content_checked = 1;
6281                 rec->owner_ref_checked = 1;
6282         } else {
6283                 if (back->node.found_extent_tree) {
6284                         fprintf(stderr, "Extent back ref already exists "
6285                                 "for %llu parent %llu root %llu "
6286                                 "owner %llu offset %llu num_refs %lu\n",
6287                                 (unsigned long long)bytenr,
6288                                 (unsigned long long)parent,
6289                                 (unsigned long long)root,
6290                                 (unsigned long long)owner,
6291                                 (unsigned long long)offset,
6292                                 (unsigned long)num_refs);
6293                 }
6294                 back->num_refs = num_refs;
6295                 back->node.found_extent_tree = 1;
6296         }
6297         maybe_free_extent_rec(extent_cache, rec);
6298         return 0;
6299 }
6300
6301 static int add_pending(struct cache_tree *pending,
6302                        struct cache_tree *seen, u64 bytenr, u32 size)
6303 {
6304         int ret;
6305         ret = add_cache_extent(seen, bytenr, size);
6306         if (ret)
6307                 return ret;
6308         add_cache_extent(pending, bytenr, size);
6309         return 0;
6310 }
6311
6312 static int pick_next_pending(struct cache_tree *pending,
6313                         struct cache_tree *reada,
6314                         struct cache_tree *nodes,
6315                         u64 last, struct block_info *bits, int bits_nr,
6316                         int *reada_bits)
6317 {
6318         unsigned long node_start = last;
6319         struct cache_extent *cache;
6320         int ret;
6321
6322         cache = search_cache_extent(reada, 0);
6323         if (cache) {
6324                 bits[0].start = cache->start;
6325                 bits[0].size = cache->size;
6326                 *reada_bits = 1;
6327                 return 1;
6328         }
6329         *reada_bits = 0;
6330         if (node_start > 32768)
6331                 node_start -= 32768;
6332
6333         cache = search_cache_extent(nodes, node_start);
6334         if (!cache)
6335                 cache = search_cache_extent(nodes, 0);
6336
6337         if (!cache) {
6338                  cache = search_cache_extent(pending, 0);
6339                  if (!cache)
6340                          return 0;
6341                  ret = 0;
6342                  do {
6343                          bits[ret].start = cache->start;
6344                          bits[ret].size = cache->size;
6345                          cache = next_cache_extent(cache);
6346                          ret++;
6347                  } while (cache && ret < bits_nr);
6348                  return ret;
6349         }
6350
6351         ret = 0;
6352         do {
6353                 bits[ret].start = cache->start;
6354                 bits[ret].size = cache->size;
6355                 cache = next_cache_extent(cache);
6356                 ret++;
6357         } while (cache && ret < bits_nr);
6358
6359         if (bits_nr - ret > 8) {
6360                 u64 lookup = bits[0].start + bits[0].size;
6361                 struct cache_extent *next;
6362                 next = search_cache_extent(pending, lookup);
6363                 while(next) {
6364                         if (next->start - lookup > 32768)
6365                                 break;
6366                         bits[ret].start = next->start;
6367                         bits[ret].size = next->size;
6368                         lookup = next->start + next->size;
6369                         ret++;
6370                         if (ret == bits_nr)
6371                                 break;
6372                         next = next_cache_extent(next);
6373                         if (!next)
6374                                 break;
6375                 }
6376         }
6377         return ret;
6378 }
6379
6380 static void free_chunk_record(struct cache_extent *cache)
6381 {
6382         struct chunk_record *rec;
6383
6384         rec = container_of(cache, struct chunk_record, cache);
6385         list_del_init(&rec->list);
6386         list_del_init(&rec->dextents);
6387         free(rec);
6388 }
6389
6390 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6391 {
6392         cache_tree_free_extents(chunk_cache, free_chunk_record);
6393 }
6394
6395 static void free_device_record(struct rb_node *node)
6396 {
6397         struct device_record *rec;
6398
6399         rec = container_of(node, struct device_record, node);
6400         free(rec);
6401 }
6402
6403 FREE_RB_BASED_TREE(device_cache, free_device_record);
6404
6405 int insert_block_group_record(struct block_group_tree *tree,
6406                               struct block_group_record *bg_rec)
6407 {
6408         int ret;
6409
6410         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6411         if (ret)
6412                 return ret;
6413
6414         list_add_tail(&bg_rec->list, &tree->block_groups);
6415         return 0;
6416 }
6417
6418 static void free_block_group_record(struct cache_extent *cache)
6419 {
6420         struct block_group_record *rec;
6421
6422         rec = container_of(cache, struct block_group_record, cache);
6423         list_del_init(&rec->list);
6424         free(rec);
6425 }
6426
6427 void free_block_group_tree(struct block_group_tree *tree)
6428 {
6429         cache_tree_free_extents(&tree->tree, free_block_group_record);
6430 }
6431
6432 int insert_device_extent_record(struct device_extent_tree *tree,
6433                                 struct device_extent_record *de_rec)
6434 {
6435         int ret;
6436
6437         /*
6438          * Device extent is a bit different from the other extents, because
6439          * the extents which belong to the different devices may have the
6440          * same start and size, so we need use the special extent cache
6441          * search/insert functions.
6442          */
6443         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6444         if (ret)
6445                 return ret;
6446
6447         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6448         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6449         return 0;
6450 }
6451
6452 static void free_device_extent_record(struct cache_extent *cache)
6453 {
6454         struct device_extent_record *rec;
6455
6456         rec = container_of(cache, struct device_extent_record, cache);
6457         if (!list_empty(&rec->chunk_list))
6458                 list_del_init(&rec->chunk_list);
6459         if (!list_empty(&rec->device_list))
6460                 list_del_init(&rec->device_list);
6461         free(rec);
6462 }
6463
6464 void free_device_extent_tree(struct device_extent_tree *tree)
6465 {
6466         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6467 }
6468
6469 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6470 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6471                                  struct extent_buffer *leaf, int slot)
6472 {
6473         struct btrfs_extent_ref_v0 *ref0;
6474         struct btrfs_key key;
6475         int ret;
6476
6477         btrfs_item_key_to_cpu(leaf, &key, slot);
6478         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6479         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6480                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6481                                 0, 0);
6482         } else {
6483                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6484                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6485         }
6486         return ret;
6487 }
6488 #endif
6489
6490 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6491                                             struct btrfs_key *key,
6492                                             int slot)
6493 {
6494         struct btrfs_chunk *ptr;
6495         struct chunk_record *rec;
6496         int num_stripes, i;
6497
6498         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6499         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6500
6501         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6502         if (!rec) {
6503                 fprintf(stderr, "memory allocation failed\n");
6504                 exit(-1);
6505         }
6506
6507         INIT_LIST_HEAD(&rec->list);
6508         INIT_LIST_HEAD(&rec->dextents);
6509         rec->bg_rec = NULL;
6510
6511         rec->cache.start = key->offset;
6512         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6513
6514         rec->generation = btrfs_header_generation(leaf);
6515
6516         rec->objectid = key->objectid;
6517         rec->type = key->type;
6518         rec->offset = key->offset;
6519
6520         rec->length = rec->cache.size;
6521         rec->owner = btrfs_chunk_owner(leaf, ptr);
6522         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6523         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6524         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6525         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6526         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6527         rec->num_stripes = num_stripes;
6528         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6529
6530         for (i = 0; i < rec->num_stripes; ++i) {
6531                 rec->stripes[i].devid =
6532                         btrfs_stripe_devid_nr(leaf, ptr, i);
6533                 rec->stripes[i].offset =
6534                         btrfs_stripe_offset_nr(leaf, ptr, i);
6535                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6536                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6537                                 BTRFS_UUID_SIZE);
6538         }
6539
6540         return rec;
6541 }
6542
6543 static int process_chunk_item(struct cache_tree *chunk_cache,
6544                               struct btrfs_key *key, struct extent_buffer *eb,
6545                               int slot)
6546 {
6547         struct chunk_record *rec;
6548         struct btrfs_chunk *chunk;
6549         int ret = 0;
6550
6551         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6552         /*
6553          * Do extra check for this chunk item,
6554          *
6555          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6556          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6557          * and owner<->key_type check.
6558          */
6559         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6560                                       key->offset);
6561         if (ret < 0) {
6562                 error("chunk(%llu, %llu) is not valid, ignore it",
6563                       key->offset, btrfs_chunk_length(eb, chunk));
6564                 return 0;
6565         }
6566         rec = btrfs_new_chunk_record(eb, key, slot);
6567         ret = insert_cache_extent(chunk_cache, &rec->cache);
6568         if (ret) {
6569                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6570                         rec->offset, rec->length);
6571                 free(rec);
6572         }
6573
6574         return ret;
6575 }
6576
6577 static int process_device_item(struct rb_root *dev_cache,
6578                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6579 {
6580         struct btrfs_dev_item *ptr;
6581         struct device_record *rec;
6582         int ret = 0;
6583
6584         ptr = btrfs_item_ptr(eb,
6585                 slot, struct btrfs_dev_item);
6586
6587         rec = malloc(sizeof(*rec));
6588         if (!rec) {
6589                 fprintf(stderr, "memory allocation failed\n");
6590                 return -ENOMEM;
6591         }
6592
6593         rec->devid = key->offset;
6594         rec->generation = btrfs_header_generation(eb);
6595
6596         rec->objectid = key->objectid;
6597         rec->type = key->type;
6598         rec->offset = key->offset;
6599
6600         rec->devid = btrfs_device_id(eb, ptr);
6601         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6602         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6603
6604         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6605         if (ret) {
6606                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6607                 free(rec);
6608         }
6609
6610         return ret;
6611 }
6612
6613 struct block_group_record *
6614 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6615                              int slot)
6616 {
6617         struct btrfs_block_group_item *ptr;
6618         struct block_group_record *rec;
6619
6620         rec = calloc(1, sizeof(*rec));
6621         if (!rec) {
6622                 fprintf(stderr, "memory allocation failed\n");
6623                 exit(-1);
6624         }
6625
6626         rec->cache.start = key->objectid;
6627         rec->cache.size = key->offset;
6628
6629         rec->generation = btrfs_header_generation(leaf);
6630
6631         rec->objectid = key->objectid;
6632         rec->type = key->type;
6633         rec->offset = key->offset;
6634
6635         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6636         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6637
6638         INIT_LIST_HEAD(&rec->list);
6639
6640         return rec;
6641 }
6642
6643 static int process_block_group_item(struct block_group_tree *block_group_cache,
6644                                     struct btrfs_key *key,
6645                                     struct extent_buffer *eb, int slot)
6646 {
6647         struct block_group_record *rec;
6648         int ret = 0;
6649
6650         rec = btrfs_new_block_group_record(eb, key, slot);
6651         ret = insert_block_group_record(block_group_cache, rec);
6652         if (ret) {
6653                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6654                         rec->objectid, rec->offset);
6655                 free(rec);
6656         }
6657
6658         return ret;
6659 }
6660
6661 struct device_extent_record *
6662 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6663                                struct btrfs_key *key, int slot)
6664 {
6665         struct device_extent_record *rec;
6666         struct btrfs_dev_extent *ptr;
6667
6668         rec = calloc(1, sizeof(*rec));
6669         if (!rec) {
6670                 fprintf(stderr, "memory allocation failed\n");
6671                 exit(-1);
6672         }
6673
6674         rec->cache.objectid = key->objectid;
6675         rec->cache.start = key->offset;
6676
6677         rec->generation = btrfs_header_generation(leaf);
6678
6679         rec->objectid = key->objectid;
6680         rec->type = key->type;
6681         rec->offset = key->offset;
6682
6683         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6684         rec->chunk_objecteid =
6685                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6686         rec->chunk_offset =
6687                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6688         rec->length = btrfs_dev_extent_length(leaf, ptr);
6689         rec->cache.size = rec->length;
6690
6691         INIT_LIST_HEAD(&rec->chunk_list);
6692         INIT_LIST_HEAD(&rec->device_list);
6693
6694         return rec;
6695 }
6696
6697 static int
6698 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6699                            struct btrfs_key *key, struct extent_buffer *eb,
6700                            int slot)
6701 {
6702         struct device_extent_record *rec;
6703         int ret;
6704
6705         rec = btrfs_new_device_extent_record(eb, key, slot);
6706         ret = insert_device_extent_record(dev_extent_cache, rec);
6707         if (ret) {
6708                 fprintf(stderr,
6709                         "Device extent[%llu, %llu, %llu] existed.\n",
6710                         rec->objectid, rec->offset, rec->length);
6711                 free(rec);
6712         }
6713
6714         return ret;
6715 }
6716
6717 static int process_extent_item(struct btrfs_root *root,
6718                                struct cache_tree *extent_cache,
6719                                struct extent_buffer *eb, int slot)
6720 {
6721         struct btrfs_extent_item *ei;
6722         struct btrfs_extent_inline_ref *iref;
6723         struct btrfs_extent_data_ref *dref;
6724         struct btrfs_shared_data_ref *sref;
6725         struct btrfs_key key;
6726         struct extent_record tmpl;
6727         unsigned long end;
6728         unsigned long ptr;
6729         int ret;
6730         int type;
6731         u32 item_size = btrfs_item_size_nr(eb, slot);
6732         u64 refs = 0;
6733         u64 offset;
6734         u64 num_bytes;
6735         int metadata = 0;
6736
6737         btrfs_item_key_to_cpu(eb, &key, slot);
6738
6739         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6740                 metadata = 1;
6741                 num_bytes = root->nodesize;
6742         } else {
6743                 num_bytes = key.offset;
6744         }
6745
6746         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6747                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6748                       key.objectid, root->sectorsize);
6749                 return -EIO;
6750         }
6751         if (item_size < sizeof(*ei)) {
6752 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6753                 struct btrfs_extent_item_v0 *ei0;
6754                 BUG_ON(item_size != sizeof(*ei0));
6755                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6756                 refs = btrfs_extent_refs_v0(eb, ei0);
6757 #else
6758                 BUG();
6759 #endif
6760                 memset(&tmpl, 0, sizeof(tmpl));
6761                 tmpl.start = key.objectid;
6762                 tmpl.nr = num_bytes;
6763                 tmpl.extent_item_refs = refs;
6764                 tmpl.metadata = metadata;
6765                 tmpl.found_rec = 1;
6766                 tmpl.max_size = num_bytes;
6767
6768                 return add_extent_rec(extent_cache, &tmpl);
6769         }
6770
6771         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6772         refs = btrfs_extent_refs(eb, ei);
6773         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6774                 metadata = 1;
6775         else
6776                 metadata = 0;
6777         if (metadata && num_bytes != root->nodesize) {
6778                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6779                       num_bytes, root->nodesize);
6780                 return -EIO;
6781         }
6782         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6783                 error("ignore invalid data extent, length %llu is not aligned to %u",
6784                       num_bytes, root->sectorsize);
6785                 return -EIO;
6786         }
6787
6788         memset(&tmpl, 0, sizeof(tmpl));
6789         tmpl.start = key.objectid;
6790         tmpl.nr = num_bytes;
6791         tmpl.extent_item_refs = refs;
6792         tmpl.metadata = metadata;
6793         tmpl.found_rec = 1;
6794         tmpl.max_size = num_bytes;
6795         add_extent_rec(extent_cache, &tmpl);
6796
6797         ptr = (unsigned long)(ei + 1);
6798         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6799             key.type == BTRFS_EXTENT_ITEM_KEY)
6800                 ptr += sizeof(struct btrfs_tree_block_info);
6801
6802         end = (unsigned long)ei + item_size;
6803         while (ptr < end) {
6804                 iref = (struct btrfs_extent_inline_ref *)ptr;
6805                 type = btrfs_extent_inline_ref_type(eb, iref);
6806                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6807                 switch (type) {
6808                 case BTRFS_TREE_BLOCK_REF_KEY:
6809                         ret = add_tree_backref(extent_cache, key.objectid,
6810                                         0, offset, 0);
6811                         if (ret < 0)
6812                                 error("add_tree_backref failed: %s",
6813                                       strerror(-ret));
6814                         break;
6815                 case BTRFS_SHARED_BLOCK_REF_KEY:
6816                         ret = add_tree_backref(extent_cache, key.objectid,
6817                                         offset, 0, 0);
6818                         if (ret < 0)
6819                                 error("add_tree_backref failed: %s",
6820                                       strerror(-ret));
6821                         break;
6822                 case BTRFS_EXTENT_DATA_REF_KEY:
6823                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6824                         add_data_backref(extent_cache, key.objectid, 0,
6825                                         btrfs_extent_data_ref_root(eb, dref),
6826                                         btrfs_extent_data_ref_objectid(eb,
6827                                                                        dref),
6828                                         btrfs_extent_data_ref_offset(eb, dref),
6829                                         btrfs_extent_data_ref_count(eb, dref),
6830                                         0, num_bytes);
6831                         break;
6832                 case BTRFS_SHARED_DATA_REF_KEY:
6833                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6834                         add_data_backref(extent_cache, key.objectid, offset,
6835                                         0, 0, 0,
6836                                         btrfs_shared_data_ref_count(eb, sref),
6837                                         0, num_bytes);
6838                         break;
6839                 default:
6840                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6841                                 key.objectid, key.type, num_bytes);
6842                         goto out;
6843                 }
6844                 ptr += btrfs_extent_inline_ref_size(type);
6845         }
6846         WARN_ON(ptr > end);
6847 out:
6848         return 0;
6849 }
6850
6851 static int check_cache_range(struct btrfs_root *root,
6852                              struct btrfs_block_group_cache *cache,
6853                              u64 offset, u64 bytes)
6854 {
6855         struct btrfs_free_space *entry;
6856         u64 *logical;
6857         u64 bytenr;
6858         int stripe_len;
6859         int i, nr, ret;
6860
6861         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6862                 bytenr = btrfs_sb_offset(i);
6863                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6864                                        cache->key.objectid, bytenr, 0,
6865                                        &logical, &nr, &stripe_len);
6866                 if (ret)
6867                         return ret;
6868
6869                 while (nr--) {
6870                         if (logical[nr] + stripe_len <= offset)
6871                                 continue;
6872                         if (offset + bytes <= logical[nr])
6873                                 continue;
6874                         if (logical[nr] == offset) {
6875                                 if (stripe_len >= bytes) {
6876                                         free(logical);
6877                                         return 0;
6878                                 }
6879                                 bytes -= stripe_len;
6880                                 offset += stripe_len;
6881                         } else if (logical[nr] < offset) {
6882                                 if (logical[nr] + stripe_len >=
6883                                     offset + bytes) {
6884                                         free(logical);
6885                                         return 0;
6886                                 }
6887                                 bytes = (offset + bytes) -
6888                                         (logical[nr] + stripe_len);
6889                                 offset = logical[nr] + stripe_len;
6890                         } else {
6891                                 /*
6892                                  * Could be tricky, the super may land in the
6893                                  * middle of the area we're checking.  First
6894                                  * check the easiest case, it's at the end.
6895                                  */
6896                                 if (logical[nr] + stripe_len >=
6897                                     bytes + offset) {
6898                                         bytes = logical[nr] - offset;
6899                                         continue;
6900                                 }
6901
6902                                 /* Check the left side */
6903                                 ret = check_cache_range(root, cache,
6904                                                         offset,
6905                                                         logical[nr] - offset);
6906                                 if (ret) {
6907                                         free(logical);
6908                                         return ret;
6909                                 }
6910
6911                                 /* Now we continue with the right side */
6912                                 bytes = (offset + bytes) -
6913                                         (logical[nr] + stripe_len);
6914                                 offset = logical[nr] + stripe_len;
6915                         }
6916                 }
6917
6918                 free(logical);
6919         }
6920
6921         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6922         if (!entry) {
6923                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6924                         offset, offset+bytes);
6925                 return -EINVAL;
6926         }
6927
6928         if (entry->offset != offset) {
6929                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6930                         entry->offset);
6931                 return -EINVAL;
6932         }
6933
6934         if (entry->bytes != bytes) {
6935                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6936                         bytes, entry->bytes, offset);
6937                 return -EINVAL;
6938         }
6939
6940         unlink_free_space(cache->free_space_ctl, entry);
6941         free(entry);
6942         return 0;
6943 }
6944
6945 static int verify_space_cache(struct btrfs_root *root,
6946                               struct btrfs_block_group_cache *cache)
6947 {
6948         struct btrfs_path path;
6949         struct extent_buffer *leaf;
6950         struct btrfs_key key;
6951         u64 last;
6952         int ret = 0;
6953
6954         root = root->fs_info->extent_root;
6955
6956         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6957
6958         btrfs_init_path(&path);
6959         key.objectid = last;
6960         key.offset = 0;
6961         key.type = BTRFS_EXTENT_ITEM_KEY;
6962         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6963         if (ret < 0)
6964                 goto out;
6965         ret = 0;
6966         while (1) {
6967                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6968                         ret = btrfs_next_leaf(root, &path);
6969                         if (ret < 0)
6970                                 goto out;
6971                         if (ret > 0) {
6972                                 ret = 0;
6973                                 break;
6974                         }
6975                 }
6976                 leaf = path.nodes[0];
6977                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6978                 if (key.objectid >= cache->key.offset + cache->key.objectid)
6979                         break;
6980                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6981                     key.type != BTRFS_METADATA_ITEM_KEY) {
6982                         path.slots[0]++;
6983                         continue;
6984                 }
6985
6986                 if (last == key.objectid) {
6987                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
6988                                 last = key.objectid + key.offset;
6989                         else
6990                                 last = key.objectid + root->nodesize;
6991                         path.slots[0]++;
6992                         continue;
6993                 }
6994
6995                 ret = check_cache_range(root, cache, last,
6996                                         key.objectid - last);
6997                 if (ret)
6998                         break;
6999                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7000                         last = key.objectid + key.offset;
7001                 else
7002                         last = key.objectid + root->nodesize;
7003                 path.slots[0]++;
7004         }
7005
7006         if (last < cache->key.objectid + cache->key.offset)
7007                 ret = check_cache_range(root, cache, last,
7008                                         cache->key.objectid +
7009                                         cache->key.offset - last);
7010
7011 out:
7012         btrfs_release_path(&path);
7013
7014         if (!ret &&
7015             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7016                 fprintf(stderr, "There are still entries left in the space "
7017                         "cache\n");
7018                 ret = -EINVAL;
7019         }
7020
7021         return ret;
7022 }
7023
7024 static int check_space_cache(struct btrfs_root *root)
7025 {
7026         struct btrfs_block_group_cache *cache;
7027         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7028         int ret;
7029         int error = 0;
7030
7031         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7032             btrfs_super_generation(root->fs_info->super_copy) !=
7033             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7034                 printf("cache and super generation don't match, space cache "
7035                        "will be invalidated\n");
7036                 return 0;
7037         }
7038
7039         if (ctx.progress_enabled) {
7040                 ctx.tp = TASK_FREE_SPACE;
7041                 task_start(ctx.info);
7042         }
7043
7044         while (1) {
7045                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7046                 if (!cache)
7047                         break;
7048
7049                 start = cache->key.objectid + cache->key.offset;
7050                 if (!cache->free_space_ctl) {
7051                         if (btrfs_init_free_space_ctl(cache,
7052                                                       root->sectorsize)) {
7053                                 ret = -ENOMEM;
7054                                 break;
7055                         }
7056                 } else {
7057                         btrfs_remove_free_space_cache(cache);
7058                 }
7059
7060                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7061                         ret = exclude_super_stripes(root, cache);
7062                         if (ret) {
7063                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7064                                         strerror(-ret));
7065                                 error++;
7066                                 continue;
7067                         }
7068                         ret = load_free_space_tree(root->fs_info, cache);
7069                         free_excluded_extents(root, cache);
7070                         if (ret < 0) {
7071                                 fprintf(stderr, "could not load free space tree: %s\n",
7072                                         strerror(-ret));
7073                                 error++;
7074                                 continue;
7075                         }
7076                         error += ret;
7077                 } else {
7078                         ret = load_free_space_cache(root->fs_info, cache);
7079                         if (!ret)
7080                                 continue;
7081                 }
7082
7083                 ret = verify_space_cache(root, cache);
7084                 if (ret) {
7085                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7086                                 cache->key.objectid);
7087                         error++;
7088                 }
7089         }
7090
7091         task_stop(ctx.info);
7092
7093         return error ? -EINVAL : 0;
7094 }
7095
7096 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7097                         u64 num_bytes, unsigned long leaf_offset,
7098                         struct extent_buffer *eb) {
7099
7100         u64 offset = 0;
7101         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7102         char *data;
7103         unsigned long csum_offset;
7104         u32 csum;
7105         u32 csum_expected;
7106         u64 read_len;
7107         u64 data_checked = 0;
7108         u64 tmp;
7109         int ret = 0;
7110         int mirror;
7111         int num_copies;
7112
7113         if (num_bytes % root->sectorsize)
7114                 return -EINVAL;
7115
7116         data = malloc(num_bytes);
7117         if (!data)
7118                 return -ENOMEM;
7119
7120         while (offset < num_bytes) {
7121                 mirror = 0;
7122 again:
7123                 read_len = num_bytes - offset;
7124                 /* read as much space once a time */
7125                 ret = read_extent_data(root, data + offset,
7126                                 bytenr + offset, &read_len, mirror);
7127                 if (ret)
7128                         goto out;
7129                 data_checked = 0;
7130                 /* verify every 4k data's checksum */
7131                 while (data_checked < read_len) {
7132                         csum = ~(u32)0;
7133                         tmp = offset + data_checked;
7134
7135                         csum = btrfs_csum_data((char *)data + tmp,
7136                                                csum, root->sectorsize);
7137                         btrfs_csum_final(csum, (u8 *)&csum);
7138
7139                         csum_offset = leaf_offset +
7140                                  tmp / root->sectorsize * csum_size;
7141                         read_extent_buffer(eb, (char *)&csum_expected,
7142                                            csum_offset, csum_size);
7143                         /* try another mirror */
7144                         if (csum != csum_expected) {
7145                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7146                                                 mirror, bytenr + tmp,
7147                                                 csum, csum_expected);
7148                                 num_copies = btrfs_num_copies(
7149                                                 &root->fs_info->mapping_tree,
7150                                                 bytenr, num_bytes);
7151                                 if (mirror < num_copies - 1) {
7152                                         mirror += 1;
7153                                         goto again;
7154                                 }
7155                         }
7156                         data_checked += root->sectorsize;
7157                 }
7158                 offset += read_len;
7159         }
7160 out:
7161         free(data);
7162         return ret;
7163 }
7164
7165 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7166                                u64 num_bytes)
7167 {
7168         struct btrfs_path path;
7169         struct extent_buffer *leaf;
7170         struct btrfs_key key;
7171         int ret;
7172
7173         btrfs_init_path(&path);
7174         key.objectid = bytenr;
7175         key.type = BTRFS_EXTENT_ITEM_KEY;
7176         key.offset = (u64)-1;
7177
7178 again:
7179         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7180                                 0, 0);
7181         if (ret < 0) {
7182                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7183                 btrfs_release_path(&path);
7184                 return ret;
7185         } else if (ret) {
7186                 if (path.slots[0] > 0) {
7187                         path.slots[0]--;
7188                 } else {
7189                         ret = btrfs_prev_leaf(root, &path);
7190                         if (ret < 0) {
7191                                 goto out;
7192                         } else if (ret > 0) {
7193                                 ret = 0;
7194                                 goto out;
7195                         }
7196                 }
7197         }
7198
7199         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7200
7201         /*
7202          * Block group items come before extent items if they have the same
7203          * bytenr, so walk back one more just in case.  Dear future traveller,
7204          * first congrats on mastering time travel.  Now if it's not too much
7205          * trouble could you go back to 2006 and tell Chris to make the
7206          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7207          * EXTENT_ITEM_KEY please?
7208          */
7209         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7210                 if (path.slots[0] > 0) {
7211                         path.slots[0]--;
7212                 } else {
7213                         ret = btrfs_prev_leaf(root, &path);
7214                         if (ret < 0) {
7215                                 goto out;
7216                         } else if (ret > 0) {
7217                                 ret = 0;
7218                                 goto out;
7219                         }
7220                 }
7221                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7222         }
7223
7224         while (num_bytes) {
7225                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7226                         ret = btrfs_next_leaf(root, &path);
7227                         if (ret < 0) {
7228                                 fprintf(stderr, "Error going to next leaf "
7229                                         "%d\n", ret);
7230                                 btrfs_release_path(&path);
7231                                 return ret;
7232                         } else if (ret) {
7233                                 break;
7234                         }
7235                 }
7236                 leaf = path.nodes[0];
7237                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7238                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7239                         path.slots[0]++;
7240                         continue;
7241                 }
7242                 if (key.objectid + key.offset < bytenr) {
7243                         path.slots[0]++;
7244                         continue;
7245                 }
7246                 if (key.objectid > bytenr + num_bytes)
7247                         break;
7248
7249                 if (key.objectid == bytenr) {
7250                         if (key.offset >= num_bytes) {
7251                                 num_bytes = 0;
7252                                 break;
7253                         }
7254                         num_bytes -= key.offset;
7255                         bytenr += key.offset;
7256                 } else if (key.objectid < bytenr) {
7257                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7258                                 num_bytes = 0;
7259                                 break;
7260                         }
7261                         num_bytes = (bytenr + num_bytes) -
7262                                 (key.objectid + key.offset);
7263                         bytenr = key.objectid + key.offset;
7264                 } else {
7265                         if (key.objectid + key.offset < bytenr + num_bytes) {
7266                                 u64 new_start = key.objectid + key.offset;
7267                                 u64 new_bytes = bytenr + num_bytes - new_start;
7268
7269                                 /*
7270                                  * Weird case, the extent is in the middle of
7271                                  * our range, we'll have to search one side
7272                                  * and then the other.  Not sure if this happens
7273                                  * in real life, but no harm in coding it up
7274                                  * anyway just in case.
7275                                  */
7276                                 btrfs_release_path(&path);
7277                                 ret = check_extent_exists(root, new_start,
7278                                                           new_bytes);
7279                                 if (ret) {
7280                                         fprintf(stderr, "Right section didn't "
7281                                                 "have a record\n");
7282                                         break;
7283                                 }
7284                                 num_bytes = key.objectid - bytenr;
7285                                 goto again;
7286                         }
7287                         num_bytes = key.objectid - bytenr;
7288                 }
7289                 path.slots[0]++;
7290         }
7291         ret = 0;
7292
7293 out:
7294         if (num_bytes && !ret) {
7295                 fprintf(stderr, "There are no extents for csum range "
7296                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7297                 ret = 1;
7298         }
7299
7300         btrfs_release_path(&path);
7301         return ret;
7302 }
7303
7304 static int check_csums(struct btrfs_root *root)
7305 {
7306         struct btrfs_path path;
7307         struct extent_buffer *leaf;
7308         struct btrfs_key key;
7309         u64 offset = 0, num_bytes = 0;
7310         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7311         int errors = 0;
7312         int ret;
7313         u64 data_len;
7314         unsigned long leaf_offset;
7315
7316         root = root->fs_info->csum_root;
7317         if (!extent_buffer_uptodate(root->node)) {
7318                 fprintf(stderr, "No valid csum tree found\n");
7319                 return -ENOENT;
7320         }
7321
7322         btrfs_init_path(&path);
7323         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7324         key.type = BTRFS_EXTENT_CSUM_KEY;
7325         key.offset = 0;
7326         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7327         if (ret < 0) {
7328                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7329                 btrfs_release_path(&path);
7330                 return ret;
7331         }
7332
7333         if (ret > 0 && path.slots[0])
7334                 path.slots[0]--;
7335         ret = 0;
7336
7337         while (1) {
7338                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7339                         ret = btrfs_next_leaf(root, &path);
7340                         if (ret < 0) {
7341                                 fprintf(stderr, "Error going to next leaf "
7342                                         "%d\n", ret);
7343                                 break;
7344                         }
7345                         if (ret)
7346                                 break;
7347                 }
7348                 leaf = path.nodes[0];
7349
7350                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7351                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7352                         path.slots[0]++;
7353                         continue;
7354                 }
7355
7356                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7357                               csum_size) * root->sectorsize;
7358                 if (!check_data_csum)
7359                         goto skip_csum_check;
7360                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7361                 ret = check_extent_csums(root, key.offset, data_len,
7362                                          leaf_offset, leaf);
7363                 if (ret)
7364                         break;
7365 skip_csum_check:
7366                 if (!num_bytes) {
7367                         offset = key.offset;
7368                 } else if (key.offset != offset + num_bytes) {
7369                         ret = check_extent_exists(root, offset, num_bytes);
7370                         if (ret) {
7371                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7372                                         "there is no extent record\n",
7373                                         offset, offset+num_bytes);
7374                                 errors++;
7375                         }
7376                         offset = key.offset;
7377                         num_bytes = 0;
7378                 }
7379                 num_bytes += data_len;
7380                 path.slots[0]++;
7381         }
7382
7383         btrfs_release_path(&path);
7384         return errors;
7385 }
7386
7387 static int is_dropped_key(struct btrfs_key *key,
7388                           struct btrfs_key *drop_key) {
7389         if (key->objectid < drop_key->objectid)
7390                 return 1;
7391         else if (key->objectid == drop_key->objectid) {
7392                 if (key->type < drop_key->type)
7393                         return 1;
7394                 else if (key->type == drop_key->type) {
7395                         if (key->offset < drop_key->offset)
7396                                 return 1;
7397                 }
7398         }
7399         return 0;
7400 }
7401
7402 /*
7403  * Here are the rules for FULL_BACKREF.
7404  *
7405  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7406  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7407  *      FULL_BACKREF set.
7408  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7409  *    if it happened after the relocation occurred since we'll have dropped the
7410  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7411  *    have no real way to know for sure.
7412  *
7413  * We process the blocks one root at a time, and we start from the lowest root
7414  * objectid and go to the highest.  So we can just lookup the owner backref for
7415  * the record and if we don't find it then we know it doesn't exist and we have
7416  * a FULL BACKREF.
7417  *
7418  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7419  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7420  * be set or not and then we can check later once we've gathered all the refs.
7421  */
7422 static int calc_extent_flag(struct cache_tree *extent_cache,
7423                            struct extent_buffer *buf,
7424                            struct root_item_record *ri,
7425                            u64 *flags)
7426 {
7427         struct extent_record *rec;
7428         struct cache_extent *cache;
7429         struct tree_backref *tback;
7430         u64 owner = 0;
7431
7432         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7433         /* we have added this extent before */
7434         if (!cache)
7435                 return -ENOENT;
7436
7437         rec = container_of(cache, struct extent_record, cache);
7438
7439         /*
7440          * Except file/reloc tree, we can not have
7441          * FULL BACKREF MODE
7442          */
7443         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7444                 goto normal;
7445         /*
7446          * root node
7447          */
7448         if (buf->start == ri->bytenr)
7449                 goto normal;
7450
7451         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7452                 goto full_backref;
7453
7454         owner = btrfs_header_owner(buf);
7455         if (owner == ri->objectid)
7456                 goto normal;
7457
7458         tback = find_tree_backref(rec, 0, owner);
7459         if (!tback)
7460                 goto full_backref;
7461 normal:
7462         *flags = 0;
7463         if (rec->flag_block_full_backref != FLAG_UNSET &&
7464             rec->flag_block_full_backref != 0)
7465                 rec->bad_full_backref = 1;
7466         return 0;
7467 full_backref:
7468         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7469         if (rec->flag_block_full_backref != FLAG_UNSET &&
7470             rec->flag_block_full_backref != 1)
7471                 rec->bad_full_backref = 1;
7472         return 0;
7473 }
7474
7475 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7476 {
7477         fprintf(stderr, "Invalid key type(");
7478         print_key_type(stderr, 0, key_type);
7479         fprintf(stderr, ") found in root(");
7480         print_objectid(stderr, rootid, 0);
7481         fprintf(stderr, ")\n");
7482 }
7483
7484 /*
7485  * Check if the key is valid with its extent buffer.
7486  *
7487  * This is a early check in case invalid key exists in a extent buffer
7488  * This is not comprehensive yet, but should prevent wrong key/item passed
7489  * further
7490  */
7491 static int check_type_with_root(u64 rootid, u8 key_type)
7492 {
7493         switch (key_type) {
7494         /* Only valid in chunk tree */
7495         case BTRFS_DEV_ITEM_KEY:
7496         case BTRFS_CHUNK_ITEM_KEY:
7497                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7498                         goto err;
7499                 break;
7500         /* valid in csum and log tree */
7501         case BTRFS_CSUM_TREE_OBJECTID:
7502                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7503                       is_fstree(rootid)))
7504                         goto err;
7505                 break;
7506         case BTRFS_EXTENT_ITEM_KEY:
7507         case BTRFS_METADATA_ITEM_KEY:
7508         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7509                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7510                         goto err;
7511                 break;
7512         case BTRFS_ROOT_ITEM_KEY:
7513                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7514                         goto err;
7515                 break;
7516         case BTRFS_DEV_EXTENT_KEY:
7517                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7518                         goto err;
7519                 break;
7520         }
7521         return 0;
7522 err:
7523         report_mismatch_key_root(key_type, rootid);
7524         return -EINVAL;
7525 }
7526
7527 static int run_next_block(struct btrfs_root *root,
7528                           struct block_info *bits,
7529                           int bits_nr,
7530                           u64 *last,
7531                           struct cache_tree *pending,
7532                           struct cache_tree *seen,
7533                           struct cache_tree *reada,
7534                           struct cache_tree *nodes,
7535                           struct cache_tree *extent_cache,
7536                           struct cache_tree *chunk_cache,
7537                           struct rb_root *dev_cache,
7538                           struct block_group_tree *block_group_cache,
7539                           struct device_extent_tree *dev_extent_cache,
7540                           struct root_item_record *ri)
7541 {
7542         struct extent_buffer *buf;
7543         struct extent_record *rec = NULL;
7544         u64 bytenr;
7545         u32 size;
7546         u64 parent;
7547         u64 owner;
7548         u64 flags;
7549         u64 ptr;
7550         u64 gen = 0;
7551         int ret = 0;
7552         int i;
7553         int nritems;
7554         struct btrfs_key key;
7555         struct cache_extent *cache;
7556         int reada_bits;
7557
7558         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7559                                     bits_nr, &reada_bits);
7560         if (nritems == 0)
7561                 return 1;
7562
7563         if (!reada_bits) {
7564                 for(i = 0; i < nritems; i++) {
7565                         ret = add_cache_extent(reada, bits[i].start,
7566                                                bits[i].size);
7567                         if (ret == -EEXIST)
7568                                 continue;
7569
7570                         /* fixme, get the parent transid */
7571                         readahead_tree_block(root, bits[i].start,
7572                                              bits[i].size, 0);
7573                 }
7574         }
7575         *last = bits[0].start;
7576         bytenr = bits[0].start;
7577         size = bits[0].size;
7578
7579         cache = lookup_cache_extent(pending, bytenr, size);
7580         if (cache) {
7581                 remove_cache_extent(pending, cache);
7582                 free(cache);
7583         }
7584         cache = lookup_cache_extent(reada, bytenr, size);
7585         if (cache) {
7586                 remove_cache_extent(reada, cache);
7587                 free(cache);
7588         }
7589         cache = lookup_cache_extent(nodes, bytenr, size);
7590         if (cache) {
7591                 remove_cache_extent(nodes, cache);
7592                 free(cache);
7593         }
7594         cache = lookup_cache_extent(extent_cache, bytenr, size);
7595         if (cache) {
7596                 rec = container_of(cache, struct extent_record, cache);
7597                 gen = rec->parent_generation;
7598         }
7599
7600         /* fixme, get the real parent transid */
7601         buf = read_tree_block(root, bytenr, size, gen);
7602         if (!extent_buffer_uptodate(buf)) {
7603                 record_bad_block_io(root->fs_info,
7604                                     extent_cache, bytenr, size);
7605                 goto out;
7606         }
7607
7608         nritems = btrfs_header_nritems(buf);
7609
7610         flags = 0;
7611         if (!init_extent_tree) {
7612                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7613                                        btrfs_header_level(buf), 1, NULL,
7614                                        &flags);
7615                 if (ret < 0) {
7616                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7617                         if (ret < 0) {
7618                                 fprintf(stderr, "Couldn't calc extent flags\n");
7619                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7620                         }
7621                 }
7622         } else {
7623                 flags = 0;
7624                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7625                 if (ret < 0) {
7626                         fprintf(stderr, "Couldn't calc extent flags\n");
7627                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7628                 }
7629         }
7630
7631         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7632                 if (ri != NULL &&
7633                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7634                     ri->objectid == btrfs_header_owner(buf)) {
7635                         /*
7636                          * Ok we got to this block from it's original owner and
7637                          * we have FULL_BACKREF set.  Relocation can leave
7638                          * converted blocks over so this is altogether possible,
7639                          * however it's not possible if the generation > the
7640                          * last snapshot, so check for this case.
7641                          */
7642                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7643                             btrfs_header_generation(buf) > ri->last_snapshot) {
7644                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7645                                 rec->bad_full_backref = 1;
7646                         }
7647                 }
7648         } else {
7649                 if (ri != NULL &&
7650                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7651                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7652                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7653                         rec->bad_full_backref = 1;
7654                 }
7655         }
7656
7657         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7658                 rec->flag_block_full_backref = 1;
7659                 parent = bytenr;
7660                 owner = 0;
7661         } else {
7662                 rec->flag_block_full_backref = 0;
7663                 parent = 0;
7664                 owner = btrfs_header_owner(buf);
7665         }
7666
7667         ret = check_block(root, extent_cache, buf, flags);
7668         if (ret)
7669                 goto out;
7670
7671         if (btrfs_is_leaf(buf)) {
7672                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7673                 for (i = 0; i < nritems; i++) {
7674                         struct btrfs_file_extent_item *fi;
7675                         btrfs_item_key_to_cpu(buf, &key, i);
7676                         /*
7677                          * Check key type against the leaf owner.
7678                          * Could filter quite a lot of early error if
7679                          * owner is correct
7680                          */
7681                         if (check_type_with_root(btrfs_header_owner(buf),
7682                                                  key.type)) {
7683                                 fprintf(stderr, "ignoring invalid key\n");
7684                                 continue;
7685                         }
7686                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7687                                 process_extent_item(root, extent_cache, buf,
7688                                                     i);
7689                                 continue;
7690                         }
7691                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7692                                 process_extent_item(root, extent_cache, buf,
7693                                                     i);
7694                                 continue;
7695                         }
7696                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7697                                 total_csum_bytes +=
7698                                         btrfs_item_size_nr(buf, i);
7699                                 continue;
7700                         }
7701                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7702                                 process_chunk_item(chunk_cache, &key, buf, i);
7703                                 continue;
7704                         }
7705                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7706                                 process_device_item(dev_cache, &key, buf, i);
7707                                 continue;
7708                         }
7709                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7710                                 process_block_group_item(block_group_cache,
7711                                         &key, buf, i);
7712                                 continue;
7713                         }
7714                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7715                                 process_device_extent_item(dev_extent_cache,
7716                                         &key, buf, i);
7717                                 continue;
7718
7719                         }
7720                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7721 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7722                                 process_extent_ref_v0(extent_cache, buf, i);
7723 #else
7724                                 BUG();
7725 #endif
7726                                 continue;
7727                         }
7728
7729                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7730                                 ret = add_tree_backref(extent_cache,
7731                                                 key.objectid, 0, key.offset, 0);
7732                                 if (ret < 0)
7733                                         error("add_tree_backref failed: %s",
7734                                               strerror(-ret));
7735                                 continue;
7736                         }
7737                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7738                                 ret = add_tree_backref(extent_cache,
7739                                                 key.objectid, key.offset, 0, 0);
7740                                 if (ret < 0)
7741                                         error("add_tree_backref failed: %s",
7742                                               strerror(-ret));
7743                                 continue;
7744                         }
7745                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7746                                 struct btrfs_extent_data_ref *ref;
7747                                 ref = btrfs_item_ptr(buf, i,
7748                                                 struct btrfs_extent_data_ref);
7749                                 add_data_backref(extent_cache,
7750                                         key.objectid, 0,
7751                                         btrfs_extent_data_ref_root(buf, ref),
7752                                         btrfs_extent_data_ref_objectid(buf,
7753                                                                        ref),
7754                                         btrfs_extent_data_ref_offset(buf, ref),
7755                                         btrfs_extent_data_ref_count(buf, ref),
7756                                         0, root->sectorsize);
7757                                 continue;
7758                         }
7759                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7760                                 struct btrfs_shared_data_ref *ref;
7761                                 ref = btrfs_item_ptr(buf, i,
7762                                                 struct btrfs_shared_data_ref);
7763                                 add_data_backref(extent_cache,
7764                                         key.objectid, key.offset, 0, 0, 0,
7765                                         btrfs_shared_data_ref_count(buf, ref),
7766                                         0, root->sectorsize);
7767                                 continue;
7768                         }
7769                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7770                                 struct bad_item *bad;
7771
7772                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7773                                         continue;
7774                                 if (!owner)
7775                                         continue;
7776                                 bad = malloc(sizeof(struct bad_item));
7777                                 if (!bad)
7778                                         continue;
7779                                 INIT_LIST_HEAD(&bad->list);
7780                                 memcpy(&bad->key, &key,
7781                                        sizeof(struct btrfs_key));
7782                                 bad->root_id = owner;
7783                                 list_add_tail(&bad->list, &delete_items);
7784                                 continue;
7785                         }
7786                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7787                                 continue;
7788                         fi = btrfs_item_ptr(buf, i,
7789                                             struct btrfs_file_extent_item);
7790                         if (btrfs_file_extent_type(buf, fi) ==
7791                             BTRFS_FILE_EXTENT_INLINE)
7792                                 continue;
7793                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7794                                 continue;
7795
7796                         data_bytes_allocated +=
7797                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7798                         if (data_bytes_allocated < root->sectorsize) {
7799                                 abort();
7800                         }
7801                         data_bytes_referenced +=
7802                                 btrfs_file_extent_num_bytes(buf, fi);
7803                         add_data_backref(extent_cache,
7804                                 btrfs_file_extent_disk_bytenr(buf, fi),
7805                                 parent, owner, key.objectid, key.offset -
7806                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7807                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7808                 }
7809         } else {
7810                 int level;
7811                 struct btrfs_key first_key;
7812
7813                 first_key.objectid = 0;
7814
7815                 if (nritems > 0)
7816                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7817                 level = btrfs_header_level(buf);
7818                 for (i = 0; i < nritems; i++) {
7819                         struct extent_record tmpl;
7820
7821                         ptr = btrfs_node_blockptr(buf, i);
7822                         size = root->nodesize;
7823                         btrfs_node_key_to_cpu(buf, &key, i);
7824                         if (ri != NULL) {
7825                                 if ((level == ri->drop_level)
7826                                     && is_dropped_key(&key, &ri->drop_key)) {
7827                                         continue;
7828                                 }
7829                         }
7830
7831                         memset(&tmpl, 0, sizeof(tmpl));
7832                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7833                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7834                         tmpl.start = ptr;
7835                         tmpl.nr = size;
7836                         tmpl.refs = 1;
7837                         tmpl.metadata = 1;
7838                         tmpl.max_size = size;
7839                         ret = add_extent_rec(extent_cache, &tmpl);
7840                         if (ret < 0)
7841                                 goto out;
7842
7843                         ret = add_tree_backref(extent_cache, ptr, parent,
7844                                         owner, 1);
7845                         if (ret < 0) {
7846                                 error("add_tree_backref failed: %s",
7847                                       strerror(-ret));
7848                                 continue;
7849                         }
7850
7851                         if (level > 1) {
7852                                 add_pending(nodes, seen, ptr, size);
7853                         } else {
7854                                 add_pending(pending, seen, ptr, size);
7855                         }
7856                 }
7857                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7858                                       nritems) * sizeof(struct btrfs_key_ptr);
7859         }
7860         total_btree_bytes += buf->len;
7861         if (fs_root_objectid(btrfs_header_owner(buf)))
7862                 total_fs_tree_bytes += buf->len;
7863         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7864                 total_extent_tree_bytes += buf->len;
7865         if (!found_old_backref &&
7866             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7867             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7868             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7869                 found_old_backref = 1;
7870 out:
7871         free_extent_buffer(buf);
7872         return ret;
7873 }
7874
7875 static int add_root_to_pending(struct extent_buffer *buf,
7876                                struct cache_tree *extent_cache,
7877                                struct cache_tree *pending,
7878                                struct cache_tree *seen,
7879                                struct cache_tree *nodes,
7880                                u64 objectid)
7881 {
7882         struct extent_record tmpl;
7883         int ret;
7884
7885         if (btrfs_header_level(buf) > 0)
7886                 add_pending(nodes, seen, buf->start, buf->len);
7887         else
7888                 add_pending(pending, seen, buf->start, buf->len);
7889
7890         memset(&tmpl, 0, sizeof(tmpl));
7891         tmpl.start = buf->start;
7892         tmpl.nr = buf->len;
7893         tmpl.is_root = 1;
7894         tmpl.refs = 1;
7895         tmpl.metadata = 1;
7896         tmpl.max_size = buf->len;
7897         add_extent_rec(extent_cache, &tmpl);
7898
7899         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7900             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7901                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7902                                 0, 1);
7903         else
7904                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7905                                 1);
7906         return ret;
7907 }
7908
7909 /* as we fix the tree, we might be deleting blocks that
7910  * we're tracking for repair.  This hook makes sure we
7911  * remove any backrefs for blocks as we are fixing them.
7912  */
7913 static int free_extent_hook(struct btrfs_trans_handle *trans,
7914                             struct btrfs_root *root,
7915                             u64 bytenr, u64 num_bytes, u64 parent,
7916                             u64 root_objectid, u64 owner, u64 offset,
7917                             int refs_to_drop)
7918 {
7919         struct extent_record *rec;
7920         struct cache_extent *cache;
7921         int is_data;
7922         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7923
7924         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7925         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7926         if (!cache)
7927                 return 0;
7928
7929         rec = container_of(cache, struct extent_record, cache);
7930         if (is_data) {
7931                 struct data_backref *back;
7932                 back = find_data_backref(rec, parent, root_objectid, owner,
7933                                          offset, 1, bytenr, num_bytes);
7934                 if (!back)
7935                         goto out;
7936                 if (back->node.found_ref) {
7937                         back->found_ref -= refs_to_drop;
7938                         if (rec->refs)
7939                                 rec->refs -= refs_to_drop;
7940                 }
7941                 if (back->node.found_extent_tree) {
7942                         back->num_refs -= refs_to_drop;
7943                         if (rec->extent_item_refs)
7944                                 rec->extent_item_refs -= refs_to_drop;
7945                 }
7946                 if (back->found_ref == 0)
7947                         back->node.found_ref = 0;
7948                 if (back->num_refs == 0)
7949                         back->node.found_extent_tree = 0;
7950
7951                 if (!back->node.found_extent_tree && back->node.found_ref) {
7952                         list_del(&back->node.list);
7953                         free(back);
7954                 }
7955         } else {
7956                 struct tree_backref *back;
7957                 back = find_tree_backref(rec, parent, root_objectid);
7958                 if (!back)
7959                         goto out;
7960                 if (back->node.found_ref) {
7961                         if (rec->refs)
7962                                 rec->refs--;
7963                         back->node.found_ref = 0;
7964                 }
7965                 if (back->node.found_extent_tree) {
7966                         if (rec->extent_item_refs)
7967                                 rec->extent_item_refs--;
7968                         back->node.found_extent_tree = 0;
7969                 }
7970                 if (!back->node.found_extent_tree && back->node.found_ref) {
7971                         list_del(&back->node.list);
7972                         free(back);
7973                 }
7974         }
7975         maybe_free_extent_rec(extent_cache, rec);
7976 out:
7977         return 0;
7978 }
7979
7980 static int delete_extent_records(struct btrfs_trans_handle *trans,
7981                                  struct btrfs_root *root,
7982                                  struct btrfs_path *path,
7983                                  u64 bytenr)
7984 {
7985         struct btrfs_key key;
7986         struct btrfs_key found_key;
7987         struct extent_buffer *leaf;
7988         int ret;
7989         int slot;
7990
7991
7992         key.objectid = bytenr;
7993         key.type = (u8)-1;
7994         key.offset = (u64)-1;
7995
7996         while(1) {
7997                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7998                                         &key, path, 0, 1);
7999                 if (ret < 0)
8000                         break;
8001
8002                 if (ret > 0) {
8003                         ret = 0;
8004                         if (path->slots[0] == 0)
8005                                 break;
8006                         path->slots[0]--;
8007                 }
8008                 ret = 0;
8009
8010                 leaf = path->nodes[0];
8011                 slot = path->slots[0];
8012
8013                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8014                 if (found_key.objectid != bytenr)
8015                         break;
8016
8017                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8018                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8019                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8020                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8021                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8022                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8023                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8024                         btrfs_release_path(path);
8025                         if (found_key.type == 0) {
8026                                 if (found_key.offset == 0)
8027                                         break;
8028                                 key.offset = found_key.offset - 1;
8029                                 key.type = found_key.type;
8030                         }
8031                         key.type = found_key.type - 1;
8032                         key.offset = (u64)-1;
8033                         continue;
8034                 }
8035
8036                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8037                         found_key.objectid, found_key.type, found_key.offset);
8038
8039                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8040                 if (ret)
8041                         break;
8042                 btrfs_release_path(path);
8043
8044                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8045                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8046                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8047                                 found_key.offset : root->nodesize;
8048
8049                         ret = btrfs_update_block_group(trans, root, bytenr,
8050                                                        bytes, 0, 0);
8051                         if (ret)
8052                                 break;
8053                 }
8054         }
8055
8056         btrfs_release_path(path);
8057         return ret;
8058 }
8059
8060 /*
8061  * for a single backref, this will allocate a new extent
8062  * and add the backref to it.
8063  */
8064 static int record_extent(struct btrfs_trans_handle *trans,
8065                          struct btrfs_fs_info *info,
8066                          struct btrfs_path *path,
8067                          struct extent_record *rec,
8068                          struct extent_backref *back,
8069                          int allocated, u64 flags)
8070 {
8071         int ret = 0;
8072         struct btrfs_root *extent_root = info->extent_root;
8073         struct extent_buffer *leaf;
8074         struct btrfs_key ins_key;
8075         struct btrfs_extent_item *ei;
8076         struct data_backref *dback;
8077         struct btrfs_tree_block_info *bi;
8078
8079         if (!back->is_data)
8080                 rec->max_size = max_t(u64, rec->max_size,
8081                                     info->extent_root->nodesize);
8082
8083         if (!allocated) {
8084                 u32 item_size = sizeof(*ei);
8085
8086                 if (!back->is_data)
8087                         item_size += sizeof(*bi);
8088
8089                 ins_key.objectid = rec->start;
8090                 ins_key.offset = rec->max_size;
8091                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8092
8093                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8094                                         &ins_key, item_size);
8095                 if (ret)
8096                         goto fail;
8097
8098                 leaf = path->nodes[0];
8099                 ei = btrfs_item_ptr(leaf, path->slots[0],
8100                                     struct btrfs_extent_item);
8101
8102                 btrfs_set_extent_refs(leaf, ei, 0);
8103                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8104
8105                 if (back->is_data) {
8106                         btrfs_set_extent_flags(leaf, ei,
8107                                                BTRFS_EXTENT_FLAG_DATA);
8108                 } else {
8109                         struct btrfs_disk_key copy_key;;
8110
8111                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8112                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8113                                              sizeof(*bi));
8114
8115                         btrfs_set_disk_key_objectid(&copy_key,
8116                                                     rec->info_objectid);
8117                         btrfs_set_disk_key_type(&copy_key, 0);
8118                         btrfs_set_disk_key_offset(&copy_key, 0);
8119
8120                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8121                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8122
8123                         btrfs_set_extent_flags(leaf, ei,
8124                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8125                 }
8126
8127                 btrfs_mark_buffer_dirty(leaf);
8128                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8129                                                rec->max_size, 1, 0);
8130                 if (ret)
8131                         goto fail;
8132                 btrfs_release_path(path);
8133         }
8134
8135         if (back->is_data) {
8136                 u64 parent;
8137                 int i;
8138
8139                 dback = to_data_backref(back);
8140                 if (back->full_backref)
8141                         parent = dback->parent;
8142                 else
8143                         parent = 0;
8144
8145                 for (i = 0; i < dback->found_ref; i++) {
8146                         /* if parent != 0, we're doing a full backref
8147                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8148                          * just makes the backref allocator create a data
8149                          * backref
8150                          */
8151                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8152                                                    rec->start, rec->max_size,
8153                                                    parent,
8154                                                    dback->root,
8155                                                    parent ?
8156                                                    BTRFS_FIRST_FREE_OBJECTID :
8157                                                    dback->owner,
8158                                                    dback->offset);
8159                         if (ret)
8160                                 break;
8161                 }
8162                 fprintf(stderr, "adding new data backref"
8163                                 " on %llu %s %llu owner %llu"
8164                                 " offset %llu found %d\n",
8165                                 (unsigned long long)rec->start,
8166                                 back->full_backref ?
8167                                 "parent" : "root",
8168                                 back->full_backref ?
8169                                 (unsigned long long)parent :
8170                                 (unsigned long long)dback->root,
8171                                 (unsigned long long)dback->owner,
8172                                 (unsigned long long)dback->offset,
8173                                 dback->found_ref);
8174         } else {
8175                 u64 parent;
8176                 struct tree_backref *tback;
8177
8178                 tback = to_tree_backref(back);
8179                 if (back->full_backref)
8180                         parent = tback->parent;
8181                 else
8182                         parent = 0;
8183
8184                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8185                                            rec->start, rec->max_size,
8186                                            parent, tback->root, 0, 0);
8187                 fprintf(stderr, "adding new tree backref on "
8188                         "start %llu len %llu parent %llu root %llu\n",
8189                         rec->start, rec->max_size, parent, tback->root);
8190         }
8191 fail:
8192         btrfs_release_path(path);
8193         return ret;
8194 }
8195
8196 static struct extent_entry *find_entry(struct list_head *entries,
8197                                        u64 bytenr, u64 bytes)
8198 {
8199         struct extent_entry *entry = NULL;
8200
8201         list_for_each_entry(entry, entries, list) {
8202                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8203                         return entry;
8204         }
8205
8206         return NULL;
8207 }
8208
8209 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8210 {
8211         struct extent_entry *entry, *best = NULL, *prev = NULL;
8212
8213         list_for_each_entry(entry, entries, list) {
8214                 /*
8215                  * If there are as many broken entries as entries then we know
8216                  * not to trust this particular entry.
8217                  */
8218                 if (entry->broken == entry->count)
8219                         continue;
8220
8221                 /*
8222                  * Special case, when there are only two entries and 'best' is
8223                  * the first one
8224                  */
8225                 if (!prev) {
8226                         best = entry;
8227                         prev = entry;
8228                         continue;
8229                 }
8230
8231                 /*
8232                  * If our current entry == best then we can't be sure our best
8233                  * is really the best, so we need to keep searching.
8234                  */
8235                 if (best && best->count == entry->count) {
8236                         prev = entry;
8237                         best = NULL;
8238                         continue;
8239                 }
8240
8241                 /* Prev == entry, not good enough, have to keep searching */
8242                 if (!prev->broken && prev->count == entry->count)
8243                         continue;
8244
8245                 if (!best)
8246                         best = (prev->count > entry->count) ? prev : entry;
8247                 else if (best->count < entry->count)
8248                         best = entry;
8249                 prev = entry;
8250         }
8251
8252         return best;
8253 }
8254
8255 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8256                       struct data_backref *dback, struct extent_entry *entry)
8257 {
8258         struct btrfs_trans_handle *trans;
8259         struct btrfs_root *root;
8260         struct btrfs_file_extent_item *fi;
8261         struct extent_buffer *leaf;
8262         struct btrfs_key key;
8263         u64 bytenr, bytes;
8264         int ret, err;
8265
8266         key.objectid = dback->root;
8267         key.type = BTRFS_ROOT_ITEM_KEY;
8268         key.offset = (u64)-1;
8269         root = btrfs_read_fs_root(info, &key);
8270         if (IS_ERR(root)) {
8271                 fprintf(stderr, "Couldn't find root for our ref\n");
8272                 return -EINVAL;
8273         }
8274
8275         /*
8276          * The backref points to the original offset of the extent if it was
8277          * split, so we need to search down to the offset we have and then walk
8278          * forward until we find the backref we're looking for.
8279          */
8280         key.objectid = dback->owner;
8281         key.type = BTRFS_EXTENT_DATA_KEY;
8282         key.offset = dback->offset;
8283         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8284         if (ret < 0) {
8285                 fprintf(stderr, "Error looking up ref %d\n", ret);
8286                 return ret;
8287         }
8288
8289         while (1) {
8290                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8291                         ret = btrfs_next_leaf(root, path);
8292                         if (ret) {
8293                                 fprintf(stderr, "Couldn't find our ref, next\n");
8294                                 return -EINVAL;
8295                         }
8296                 }
8297                 leaf = path->nodes[0];
8298                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8299                 if (key.objectid != dback->owner ||
8300                     key.type != BTRFS_EXTENT_DATA_KEY) {
8301                         fprintf(stderr, "Couldn't find our ref, search\n");
8302                         return -EINVAL;
8303                 }
8304                 fi = btrfs_item_ptr(leaf, path->slots[0],
8305                                     struct btrfs_file_extent_item);
8306                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8307                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8308
8309                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8310                         break;
8311                 path->slots[0]++;
8312         }
8313
8314         btrfs_release_path(path);
8315
8316         trans = btrfs_start_transaction(root, 1);
8317         if (IS_ERR(trans))
8318                 return PTR_ERR(trans);
8319
8320         /*
8321          * Ok we have the key of the file extent we want to fix, now we can cow
8322          * down to the thing and fix it.
8323          */
8324         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8325         if (ret < 0) {
8326                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8327                         key.objectid, key.type, key.offset, ret);
8328                 goto out;
8329         }
8330         if (ret > 0) {
8331                 fprintf(stderr, "Well that's odd, we just found this key "
8332                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8333                         key.offset);
8334                 ret = -EINVAL;
8335                 goto out;
8336         }
8337         leaf = path->nodes[0];
8338         fi = btrfs_item_ptr(leaf, path->slots[0],
8339                             struct btrfs_file_extent_item);
8340
8341         if (btrfs_file_extent_compression(leaf, fi) &&
8342             dback->disk_bytenr != entry->bytenr) {
8343                 fprintf(stderr, "Ref doesn't match the record start and is "
8344                         "compressed, please take a btrfs-image of this file "
8345                         "system and send it to a btrfs developer so they can "
8346                         "complete this functionality for bytenr %Lu\n",
8347                         dback->disk_bytenr);
8348                 ret = -EINVAL;
8349                 goto out;
8350         }
8351
8352         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8353                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8354         } else if (dback->disk_bytenr > entry->bytenr) {
8355                 u64 off_diff, offset;
8356
8357                 off_diff = dback->disk_bytenr - entry->bytenr;
8358                 offset = btrfs_file_extent_offset(leaf, fi);
8359                 if (dback->disk_bytenr + offset +
8360                     btrfs_file_extent_num_bytes(leaf, fi) >
8361                     entry->bytenr + entry->bytes) {
8362                         fprintf(stderr, "Ref is past the entry end, please "
8363                                 "take a btrfs-image of this file system and "
8364                                 "send it to a btrfs developer, ref %Lu\n",
8365                                 dback->disk_bytenr);
8366                         ret = -EINVAL;
8367                         goto out;
8368                 }
8369                 offset += off_diff;
8370                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8371                 btrfs_set_file_extent_offset(leaf, fi, offset);
8372         } else if (dback->disk_bytenr < entry->bytenr) {
8373                 u64 offset;
8374
8375                 offset = btrfs_file_extent_offset(leaf, fi);
8376                 if (dback->disk_bytenr + offset < entry->bytenr) {
8377                         fprintf(stderr, "Ref is before the entry start, please"
8378                                 " take a btrfs-image of this file system and "
8379                                 "send it to a btrfs developer, ref %Lu\n",
8380                                 dback->disk_bytenr);
8381                         ret = -EINVAL;
8382                         goto out;
8383                 }
8384
8385                 offset += dback->disk_bytenr;
8386                 offset -= entry->bytenr;
8387                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8388                 btrfs_set_file_extent_offset(leaf, fi, offset);
8389         }
8390
8391         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8392
8393         /*
8394          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8395          * only do this if we aren't using compression, otherwise it's a
8396          * trickier case.
8397          */
8398         if (!btrfs_file_extent_compression(leaf, fi))
8399                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8400         else
8401                 printf("ram bytes may be wrong?\n");
8402         btrfs_mark_buffer_dirty(leaf);
8403 out:
8404         err = btrfs_commit_transaction(trans, root);
8405         btrfs_release_path(path);
8406         return ret ? ret : err;
8407 }
8408
8409 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8410                            struct extent_record *rec)
8411 {
8412         struct extent_backref *back;
8413         struct data_backref *dback;
8414         struct extent_entry *entry, *best = NULL;
8415         LIST_HEAD(entries);
8416         int nr_entries = 0;
8417         int broken_entries = 0;
8418         int ret = 0;
8419         short mismatch = 0;
8420
8421         /*
8422          * Metadata is easy and the backrefs should always agree on bytenr and
8423          * size, if not we've got bigger issues.
8424          */
8425         if (rec->metadata)
8426                 return 0;
8427
8428         list_for_each_entry(back, &rec->backrefs, list) {
8429                 if (back->full_backref || !back->is_data)
8430                         continue;
8431
8432                 dback = to_data_backref(back);
8433
8434                 /*
8435                  * We only pay attention to backrefs that we found a real
8436                  * backref for.
8437                  */
8438                 if (dback->found_ref == 0)
8439                         continue;
8440
8441                 /*
8442                  * For now we only catch when the bytes don't match, not the
8443                  * bytenr.  We can easily do this at the same time, but I want
8444                  * to have a fs image to test on before we just add repair
8445                  * functionality willy-nilly so we know we won't screw up the
8446                  * repair.
8447                  */
8448
8449                 entry = find_entry(&entries, dback->disk_bytenr,
8450                                    dback->bytes);
8451                 if (!entry) {
8452                         entry = malloc(sizeof(struct extent_entry));
8453                         if (!entry) {
8454                                 ret = -ENOMEM;
8455                                 goto out;
8456                         }
8457                         memset(entry, 0, sizeof(*entry));
8458                         entry->bytenr = dback->disk_bytenr;
8459                         entry->bytes = dback->bytes;
8460                         list_add_tail(&entry->list, &entries);
8461                         nr_entries++;
8462                 }
8463
8464                 /*
8465                  * If we only have on entry we may think the entries agree when
8466                  * in reality they don't so we have to do some extra checking.
8467                  */
8468                 if (dback->disk_bytenr != rec->start ||
8469                     dback->bytes != rec->nr || back->broken)
8470                         mismatch = 1;
8471
8472                 if (back->broken) {
8473                         entry->broken++;
8474                         broken_entries++;
8475                 }
8476
8477                 entry->count++;
8478         }
8479
8480         /* Yay all the backrefs agree, carry on good sir */
8481         if (nr_entries <= 1 && !mismatch)
8482                 goto out;
8483
8484         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8485                 "%Lu\n", rec->start);
8486
8487         /*
8488          * First we want to see if the backrefs can agree amongst themselves who
8489          * is right, so figure out which one of the entries has the highest
8490          * count.
8491          */
8492         best = find_most_right_entry(&entries);
8493
8494         /*
8495          * Ok so we may have an even split between what the backrefs think, so
8496          * this is where we use the extent ref to see what it thinks.
8497          */
8498         if (!best) {
8499                 entry = find_entry(&entries, rec->start, rec->nr);
8500                 if (!entry && (!broken_entries || !rec->found_rec)) {
8501                         fprintf(stderr, "Backrefs don't agree with each other "
8502                                 "and extent record doesn't agree with anybody,"
8503                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8504                                 rec->start, rec->nr);
8505                         ret = -EINVAL;
8506                         goto out;
8507                 } else if (!entry) {
8508                         /*
8509                          * Ok our backrefs were broken, we'll assume this is the
8510                          * correct value and add an entry for this range.
8511                          */
8512                         entry = malloc(sizeof(struct extent_entry));
8513                         if (!entry) {
8514                                 ret = -ENOMEM;
8515                                 goto out;
8516                         }
8517                         memset(entry, 0, sizeof(*entry));
8518                         entry->bytenr = rec->start;
8519                         entry->bytes = rec->nr;
8520                         list_add_tail(&entry->list, &entries);
8521                         nr_entries++;
8522                 }
8523                 entry->count++;
8524                 best = find_most_right_entry(&entries);
8525                 if (!best) {
8526                         fprintf(stderr, "Backrefs and extent record evenly "
8527                                 "split on who is right, this is going to "
8528                                 "require user input to fix bytenr %Lu bytes "
8529                                 "%Lu\n", rec->start, rec->nr);
8530                         ret = -EINVAL;
8531                         goto out;
8532                 }
8533         }
8534
8535         /*
8536          * I don't think this can happen currently as we'll abort() if we catch
8537          * this case higher up, but in case somebody removes that we still can't
8538          * deal with it properly here yet, so just bail out of that's the case.
8539          */
8540         if (best->bytenr != rec->start) {
8541                 fprintf(stderr, "Extent start and backref starts don't match, "
8542                         "please use btrfs-image on this file system and send "
8543                         "it to a btrfs developer so they can make fsck fix "
8544                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8545                         rec->start, rec->nr);
8546                 ret = -EINVAL;
8547                 goto out;
8548         }
8549
8550         /*
8551          * Ok great we all agreed on an extent record, let's go find the real
8552          * references and fix up the ones that don't match.
8553          */
8554         list_for_each_entry(back, &rec->backrefs, list) {
8555                 if (back->full_backref || !back->is_data)
8556                         continue;
8557
8558                 dback = to_data_backref(back);
8559
8560                 /*
8561                  * Still ignoring backrefs that don't have a real ref attached
8562                  * to them.
8563                  */
8564                 if (dback->found_ref == 0)
8565                         continue;
8566
8567                 if (dback->bytes == best->bytes &&
8568                     dback->disk_bytenr == best->bytenr)
8569                         continue;
8570
8571                 ret = repair_ref(info, path, dback, best);
8572                 if (ret)
8573                         goto out;
8574         }
8575
8576         /*
8577          * Ok we messed with the actual refs, which means we need to drop our
8578          * entire cache and go back and rescan.  I know this is a huge pain and
8579          * adds a lot of extra work, but it's the only way to be safe.  Once all
8580          * the backrefs agree we may not need to do anything to the extent
8581          * record itself.
8582          */
8583         ret = -EAGAIN;
8584 out:
8585         while (!list_empty(&entries)) {
8586                 entry = list_entry(entries.next, struct extent_entry, list);
8587                 list_del_init(&entry->list);
8588                 free(entry);
8589         }
8590         return ret;
8591 }
8592
8593 static int process_duplicates(struct cache_tree *extent_cache,
8594                               struct extent_record *rec)
8595 {
8596         struct extent_record *good, *tmp;
8597         struct cache_extent *cache;
8598         int ret;
8599
8600         /*
8601          * If we found a extent record for this extent then return, or if we
8602          * have more than one duplicate we are likely going to need to delete
8603          * something.
8604          */
8605         if (rec->found_rec || rec->num_duplicates > 1)
8606                 return 0;
8607
8608         /* Shouldn't happen but just in case */
8609         BUG_ON(!rec->num_duplicates);
8610
8611         /*
8612          * So this happens if we end up with a backref that doesn't match the
8613          * actual extent entry.  So either the backref is bad or the extent
8614          * entry is bad.  Either way we want to have the extent_record actually
8615          * reflect what we found in the extent_tree, so we need to take the
8616          * duplicate out and use that as the extent_record since the only way we
8617          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8618          */
8619         remove_cache_extent(extent_cache, &rec->cache);
8620
8621         good = to_extent_record(rec->dups.next);
8622         list_del_init(&good->list);
8623         INIT_LIST_HEAD(&good->backrefs);
8624         INIT_LIST_HEAD(&good->dups);
8625         good->cache.start = good->start;
8626         good->cache.size = good->nr;
8627         good->content_checked = 0;
8628         good->owner_ref_checked = 0;
8629         good->num_duplicates = 0;
8630         good->refs = rec->refs;
8631         list_splice_init(&rec->backrefs, &good->backrefs);
8632         while (1) {
8633                 cache = lookup_cache_extent(extent_cache, good->start,
8634                                             good->nr);
8635                 if (!cache)
8636                         break;
8637                 tmp = container_of(cache, struct extent_record, cache);
8638
8639                 /*
8640                  * If we find another overlapping extent and it's found_rec is
8641                  * set then it's a duplicate and we need to try and delete
8642                  * something.
8643                  */
8644                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8645                         if (list_empty(&good->list))
8646                                 list_add_tail(&good->list,
8647                                               &duplicate_extents);
8648                         good->num_duplicates += tmp->num_duplicates + 1;
8649                         list_splice_init(&tmp->dups, &good->dups);
8650                         list_del_init(&tmp->list);
8651                         list_add_tail(&tmp->list, &good->dups);
8652                         remove_cache_extent(extent_cache, &tmp->cache);
8653                         continue;
8654                 }
8655
8656                 /*
8657                  * Ok we have another non extent item backed extent rec, so lets
8658                  * just add it to this extent and carry on like we did above.
8659                  */
8660                 good->refs += tmp->refs;
8661                 list_splice_init(&tmp->backrefs, &good->backrefs);
8662                 remove_cache_extent(extent_cache, &tmp->cache);
8663                 free(tmp);
8664         }
8665         ret = insert_cache_extent(extent_cache, &good->cache);
8666         BUG_ON(ret);
8667         free(rec);
8668         return good->num_duplicates ? 0 : 1;
8669 }
8670
8671 static int delete_duplicate_records(struct btrfs_root *root,
8672                                     struct extent_record *rec)
8673 {
8674         struct btrfs_trans_handle *trans;
8675         LIST_HEAD(delete_list);
8676         struct btrfs_path path;
8677         struct extent_record *tmp, *good, *n;
8678         int nr_del = 0;
8679         int ret = 0, err;
8680         struct btrfs_key key;
8681
8682         btrfs_init_path(&path);
8683
8684         good = rec;
8685         /* Find the record that covers all of the duplicates. */
8686         list_for_each_entry(tmp, &rec->dups, list) {
8687                 if (good->start < tmp->start)
8688                         continue;
8689                 if (good->nr > tmp->nr)
8690                         continue;
8691
8692                 if (tmp->start + tmp->nr < good->start + good->nr) {
8693                         fprintf(stderr, "Ok we have overlapping extents that "
8694                                 "aren't completely covered by each other, this "
8695                                 "is going to require more careful thought.  "
8696                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8697                                 tmp->start, tmp->nr, good->start, good->nr);
8698                         abort();
8699                 }
8700                 good = tmp;
8701         }
8702
8703         if (good != rec)
8704                 list_add_tail(&rec->list, &delete_list);
8705
8706         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8707                 if (tmp == good)
8708                         continue;
8709                 list_move_tail(&tmp->list, &delete_list);
8710         }
8711
8712         root = root->fs_info->extent_root;
8713         trans = btrfs_start_transaction(root, 1);
8714         if (IS_ERR(trans)) {
8715                 ret = PTR_ERR(trans);
8716                 goto out;
8717         }
8718
8719         list_for_each_entry(tmp, &delete_list, list) {
8720                 if (tmp->found_rec == 0)
8721                         continue;
8722                 key.objectid = tmp->start;
8723                 key.type = BTRFS_EXTENT_ITEM_KEY;
8724                 key.offset = tmp->nr;
8725
8726                 /* Shouldn't happen but just in case */
8727                 if (tmp->metadata) {
8728                         fprintf(stderr, "Well this shouldn't happen, extent "
8729                                 "record overlaps but is metadata? "
8730                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8731                         abort();
8732                 }
8733
8734                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8735                 if (ret) {
8736                         if (ret > 0)
8737                                 ret = -EINVAL;
8738                         break;
8739                 }
8740                 ret = btrfs_del_item(trans, root, &path);
8741                 if (ret)
8742                         break;
8743                 btrfs_release_path(&path);
8744                 nr_del++;
8745         }
8746         err = btrfs_commit_transaction(trans, root);
8747         if (err && !ret)
8748                 ret = err;
8749 out:
8750         while (!list_empty(&delete_list)) {
8751                 tmp = to_extent_record(delete_list.next);
8752                 list_del_init(&tmp->list);
8753                 if (tmp == rec)
8754                         continue;
8755                 free(tmp);
8756         }
8757
8758         while (!list_empty(&rec->dups)) {
8759                 tmp = to_extent_record(rec->dups.next);
8760                 list_del_init(&tmp->list);
8761                 free(tmp);
8762         }
8763
8764         btrfs_release_path(&path);
8765
8766         if (!ret && !nr_del)
8767                 rec->num_duplicates = 0;
8768
8769         return ret ? ret : nr_del;
8770 }
8771
8772 static int find_possible_backrefs(struct btrfs_fs_info *info,
8773                                   struct btrfs_path *path,
8774                                   struct cache_tree *extent_cache,
8775                                   struct extent_record *rec)
8776 {
8777         struct btrfs_root *root;
8778         struct extent_backref *back;
8779         struct data_backref *dback;
8780         struct cache_extent *cache;
8781         struct btrfs_file_extent_item *fi;
8782         struct btrfs_key key;
8783         u64 bytenr, bytes;
8784         int ret;
8785
8786         list_for_each_entry(back, &rec->backrefs, list) {
8787                 /* Don't care about full backrefs (poor unloved backrefs) */
8788                 if (back->full_backref || !back->is_data)
8789                         continue;
8790
8791                 dback = to_data_backref(back);
8792
8793                 /* We found this one, we don't need to do a lookup */
8794                 if (dback->found_ref)
8795                         continue;
8796
8797                 key.objectid = dback->root;
8798                 key.type = BTRFS_ROOT_ITEM_KEY;
8799                 key.offset = (u64)-1;
8800
8801                 root = btrfs_read_fs_root(info, &key);
8802
8803                 /* No root, definitely a bad ref, skip */
8804                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8805                         continue;
8806                 /* Other err, exit */
8807                 if (IS_ERR(root))
8808                         return PTR_ERR(root);
8809
8810                 key.objectid = dback->owner;
8811                 key.type = BTRFS_EXTENT_DATA_KEY;
8812                 key.offset = dback->offset;
8813                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8814                 if (ret) {
8815                         btrfs_release_path(path);
8816                         if (ret < 0)
8817                                 return ret;
8818                         /* Didn't find it, we can carry on */
8819                         ret = 0;
8820                         continue;
8821                 }
8822
8823                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8824                                     struct btrfs_file_extent_item);
8825                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8826                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8827                 btrfs_release_path(path);
8828                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8829                 if (cache) {
8830                         struct extent_record *tmp;
8831                         tmp = container_of(cache, struct extent_record, cache);
8832
8833                         /*
8834                          * If we found an extent record for the bytenr for this
8835                          * particular backref then we can't add it to our
8836                          * current extent record.  We only want to add backrefs
8837                          * that don't have a corresponding extent item in the
8838                          * extent tree since they likely belong to this record
8839                          * and we need to fix it if it doesn't match bytenrs.
8840                          */
8841                         if  (tmp->found_rec)
8842                                 continue;
8843                 }
8844
8845                 dback->found_ref += 1;
8846                 dback->disk_bytenr = bytenr;
8847                 dback->bytes = bytes;
8848
8849                 /*
8850                  * Set this so the verify backref code knows not to trust the
8851                  * values in this backref.
8852                  */
8853                 back->broken = 1;
8854         }
8855
8856         return 0;
8857 }
8858
8859 /*
8860  * Record orphan data ref into corresponding root.
8861  *
8862  * Return 0 if the extent item contains data ref and recorded.
8863  * Return 1 if the extent item contains no useful data ref
8864  *   On that case, it may contains only shared_dataref or metadata backref
8865  *   or the file extent exists(this should be handled by the extent bytenr
8866  *   recovery routine)
8867  * Return <0 if something goes wrong.
8868  */
8869 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8870                                       struct extent_record *rec)
8871 {
8872         struct btrfs_key key;
8873         struct btrfs_root *dest_root;
8874         struct extent_backref *back;
8875         struct data_backref *dback;
8876         struct orphan_data_extent *orphan;
8877         struct btrfs_path path;
8878         int recorded_data_ref = 0;
8879         int ret = 0;
8880
8881         if (rec->metadata)
8882                 return 1;
8883         btrfs_init_path(&path);
8884         list_for_each_entry(back, &rec->backrefs, list) {
8885                 if (back->full_backref || !back->is_data ||
8886                     !back->found_extent_tree)
8887                         continue;
8888                 dback = to_data_backref(back);
8889                 if (dback->found_ref)
8890                         continue;
8891                 key.objectid = dback->root;
8892                 key.type = BTRFS_ROOT_ITEM_KEY;
8893                 key.offset = (u64)-1;
8894
8895                 dest_root = btrfs_read_fs_root(fs_info, &key);
8896
8897                 /* For non-exist root we just skip it */
8898                 if (IS_ERR(dest_root) || !dest_root)
8899                         continue;
8900
8901                 key.objectid = dback->owner;
8902                 key.type = BTRFS_EXTENT_DATA_KEY;
8903                 key.offset = dback->offset;
8904
8905                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8906                 btrfs_release_path(&path);
8907                 /*
8908                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8909                  * we need to record it for inode/file extent rebuild.
8910                  * For ret > 0, we record it only for file extent rebuild.
8911                  * For ret == 0, the file extent exists but only bytenr
8912                  * mismatch, let the original bytenr fix routine to handle,
8913                  * don't record it.
8914                  */
8915                 if (ret == 0)
8916                         continue;
8917                 ret = 0;
8918                 orphan = malloc(sizeof(*orphan));
8919                 if (!orphan) {
8920                         ret = -ENOMEM;
8921                         goto out;
8922                 }
8923                 INIT_LIST_HEAD(&orphan->list);
8924                 orphan->root = dback->root;
8925                 orphan->objectid = dback->owner;
8926                 orphan->offset = dback->offset;
8927                 orphan->disk_bytenr = rec->cache.start;
8928                 orphan->disk_len = rec->cache.size;
8929                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8930                 recorded_data_ref = 1;
8931         }
8932 out:
8933         btrfs_release_path(&path);
8934         if (!ret)
8935                 return !recorded_data_ref;
8936         else
8937                 return ret;
8938 }
8939
8940 /*
8941  * when an incorrect extent item is found, this will delete
8942  * all of the existing entries for it and recreate them
8943  * based on what the tree scan found.
8944  */
8945 static int fixup_extent_refs(struct btrfs_fs_info *info,
8946                              struct cache_tree *extent_cache,
8947                              struct extent_record *rec)
8948 {
8949         struct btrfs_trans_handle *trans = NULL;
8950         int ret;
8951         struct btrfs_path path;
8952         struct list_head *cur = rec->backrefs.next;
8953         struct cache_extent *cache;
8954         struct extent_backref *back;
8955         int allocated = 0;
8956         u64 flags = 0;
8957
8958         if (rec->flag_block_full_backref)
8959                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8960
8961         btrfs_init_path(&path);
8962         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8963                 /*
8964                  * Sometimes the backrefs themselves are so broken they don't
8965                  * get attached to any meaningful rec, so first go back and
8966                  * check any of our backrefs that we couldn't find and throw
8967                  * them into the list if we find the backref so that
8968                  * verify_backrefs can figure out what to do.
8969                  */
8970                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8971                 if (ret < 0)
8972                         goto out;
8973         }
8974
8975         /* step one, make sure all of the backrefs agree */
8976         ret = verify_backrefs(info, &path, rec);
8977         if (ret < 0)
8978                 goto out;
8979
8980         trans = btrfs_start_transaction(info->extent_root, 1);
8981         if (IS_ERR(trans)) {
8982                 ret = PTR_ERR(trans);
8983                 goto out;
8984         }
8985
8986         /* step two, delete all the existing records */
8987         ret = delete_extent_records(trans, info->extent_root, &path,
8988                                     rec->start);
8989
8990         if (ret < 0)
8991                 goto out;
8992
8993         /* was this block corrupt?  If so, don't add references to it */
8994         cache = lookup_cache_extent(info->corrupt_blocks,
8995                                     rec->start, rec->max_size);
8996         if (cache) {
8997                 ret = 0;
8998                 goto out;
8999         }
9000
9001         /* step three, recreate all the refs we did find */
9002         while(cur != &rec->backrefs) {
9003                 back = to_extent_backref(cur);
9004                 cur = cur->next;
9005
9006                 /*
9007                  * if we didn't find any references, don't create a
9008                  * new extent record
9009                  */
9010                 if (!back->found_ref)
9011                         continue;
9012
9013                 rec->bad_full_backref = 0;
9014                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9015                 allocated = 1;
9016
9017                 if (ret)
9018                         goto out;
9019         }
9020 out:
9021         if (trans) {
9022                 int err = btrfs_commit_transaction(trans, info->extent_root);
9023                 if (!ret)
9024                         ret = err;
9025         }
9026
9027         if (!ret)
9028                 fprintf(stderr, "Repaired extent references for %llu\n",
9029                                 (unsigned long long)rec->start);
9030
9031         btrfs_release_path(&path);
9032         return ret;
9033 }
9034
9035 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9036                               struct extent_record *rec)
9037 {
9038         struct btrfs_trans_handle *trans;
9039         struct btrfs_root *root = fs_info->extent_root;
9040         struct btrfs_path path;
9041         struct btrfs_extent_item *ei;
9042         struct btrfs_key key;
9043         u64 flags;
9044         int ret = 0;
9045
9046         key.objectid = rec->start;
9047         if (rec->metadata) {
9048                 key.type = BTRFS_METADATA_ITEM_KEY;
9049                 key.offset = rec->info_level;
9050         } else {
9051                 key.type = BTRFS_EXTENT_ITEM_KEY;
9052                 key.offset = rec->max_size;
9053         }
9054
9055         trans = btrfs_start_transaction(root, 0);
9056         if (IS_ERR(trans))
9057                 return PTR_ERR(trans);
9058
9059         btrfs_init_path(&path);
9060         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9061         if (ret < 0) {
9062                 btrfs_release_path(&path);
9063                 btrfs_commit_transaction(trans, root);
9064                 return ret;
9065         } else if (ret) {
9066                 fprintf(stderr, "Didn't find extent for %llu\n",
9067                         (unsigned long long)rec->start);
9068                 btrfs_release_path(&path);
9069                 btrfs_commit_transaction(trans, root);
9070                 return -ENOENT;
9071         }
9072
9073         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9074                             struct btrfs_extent_item);
9075         flags = btrfs_extent_flags(path.nodes[0], ei);
9076         if (rec->flag_block_full_backref) {
9077                 fprintf(stderr, "setting full backref on %llu\n",
9078                         (unsigned long long)key.objectid);
9079                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9080         } else {
9081                 fprintf(stderr, "clearing full backref on %llu\n",
9082                         (unsigned long long)key.objectid);
9083                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9084         }
9085         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9086         btrfs_mark_buffer_dirty(path.nodes[0]);
9087         btrfs_release_path(&path);
9088         ret = btrfs_commit_transaction(trans, root);
9089         if (!ret)
9090                 fprintf(stderr, "Repaired extent flags for %llu\n",
9091                                 (unsigned long long)rec->start);
9092
9093         return ret;
9094 }
9095
9096 /* right now we only prune from the extent allocation tree */
9097 static int prune_one_block(struct btrfs_trans_handle *trans,
9098                            struct btrfs_fs_info *info,
9099                            struct btrfs_corrupt_block *corrupt)
9100 {
9101         int ret;
9102         struct btrfs_path path;
9103         struct extent_buffer *eb;
9104         u64 found;
9105         int slot;
9106         int nritems;
9107         int level = corrupt->level + 1;
9108
9109         btrfs_init_path(&path);
9110 again:
9111         /* we want to stop at the parent to our busted block */
9112         path.lowest_level = level;
9113
9114         ret = btrfs_search_slot(trans, info->extent_root,
9115                                 &corrupt->key, &path, -1, 1);
9116
9117         if (ret < 0)
9118                 goto out;
9119
9120         eb = path.nodes[level];
9121         if (!eb) {
9122                 ret = -ENOENT;
9123                 goto out;
9124         }
9125
9126         /*
9127          * hopefully the search gave us the block we want to prune,
9128          * lets try that first
9129          */
9130         slot = path.slots[level];
9131         found =  btrfs_node_blockptr(eb, slot);
9132         if (found == corrupt->cache.start)
9133                 goto del_ptr;
9134
9135         nritems = btrfs_header_nritems(eb);
9136
9137         /* the search failed, lets scan this node and hope we find it */
9138         for (slot = 0; slot < nritems; slot++) {
9139                 found =  btrfs_node_blockptr(eb, slot);
9140                 if (found == corrupt->cache.start)
9141                         goto del_ptr;
9142         }
9143         /*
9144          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9145          * to this block
9146          */
9147         if (eb == info->extent_root->node) {
9148                 ret = -ENOENT;
9149                 goto out;
9150         } else {
9151                 level++;
9152                 btrfs_release_path(&path);
9153                 goto again;
9154         }
9155
9156 del_ptr:
9157         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9158         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9159
9160 out:
9161         btrfs_release_path(&path);
9162         return ret;
9163 }
9164
9165 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9166 {
9167         struct btrfs_trans_handle *trans = NULL;
9168         struct cache_extent *cache;
9169         struct btrfs_corrupt_block *corrupt;
9170
9171         while (1) {
9172                 cache = search_cache_extent(info->corrupt_blocks, 0);
9173                 if (!cache)
9174                         break;
9175                 if (!trans) {
9176                         trans = btrfs_start_transaction(info->extent_root, 1);
9177                         if (IS_ERR(trans))
9178                                 return PTR_ERR(trans);
9179                 }
9180                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9181                 prune_one_block(trans, info, corrupt);
9182                 remove_cache_extent(info->corrupt_blocks, cache);
9183         }
9184         if (trans)
9185                 return btrfs_commit_transaction(trans, info->extent_root);
9186         return 0;
9187 }
9188
9189 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9190 {
9191         struct btrfs_block_group_cache *cache;
9192         u64 start, end;
9193         int ret;
9194
9195         while (1) {
9196                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9197                                             &start, &end, EXTENT_DIRTY);
9198                 if (ret)
9199                         break;
9200                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9201         }
9202
9203         start = 0;
9204         while (1) {
9205                 cache = btrfs_lookup_first_block_group(fs_info, start);
9206                 if (!cache)
9207                         break;
9208                 if (cache->cached)
9209                         cache->cached = 0;
9210                 start = cache->key.objectid + cache->key.offset;
9211         }
9212 }
9213
9214 static int check_extent_refs(struct btrfs_root *root,
9215                              struct cache_tree *extent_cache)
9216 {
9217         struct extent_record *rec;
9218         struct cache_extent *cache;
9219         int ret = 0;
9220         int had_dups = 0;
9221
9222         if (repair) {
9223                 /*
9224                  * if we're doing a repair, we have to make sure
9225                  * we don't allocate from the problem extents.
9226                  * In the worst case, this will be all the
9227                  * extents in the FS
9228                  */
9229                 cache = search_cache_extent(extent_cache, 0);
9230                 while(cache) {
9231                         rec = container_of(cache, struct extent_record, cache);
9232                         set_extent_dirty(root->fs_info->excluded_extents,
9233                                          rec->start,
9234                                          rec->start + rec->max_size - 1);
9235                         cache = next_cache_extent(cache);
9236                 }
9237
9238                 /* pin down all the corrupted blocks too */
9239                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9240                 while(cache) {
9241                         set_extent_dirty(root->fs_info->excluded_extents,
9242                                          cache->start,
9243                                          cache->start + cache->size - 1);
9244                         cache = next_cache_extent(cache);
9245                 }
9246                 prune_corrupt_blocks(root->fs_info);
9247                 reset_cached_block_groups(root->fs_info);
9248         }
9249
9250         reset_cached_block_groups(root->fs_info);
9251
9252         /*
9253          * We need to delete any duplicate entries we find first otherwise we
9254          * could mess up the extent tree when we have backrefs that actually
9255          * belong to a different extent item and not the weird duplicate one.
9256          */
9257         while (repair && !list_empty(&duplicate_extents)) {
9258                 rec = to_extent_record(duplicate_extents.next);
9259                 list_del_init(&rec->list);
9260
9261                 /* Sometimes we can find a backref before we find an actual
9262                  * extent, so we need to process it a little bit to see if there
9263                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9264                  * if this is a backref screwup.  If we need to delete stuff
9265                  * process_duplicates() will return 0, otherwise it will return
9266                  * 1 and we
9267                  */
9268                 if (process_duplicates(extent_cache, rec))
9269                         continue;
9270                 ret = delete_duplicate_records(root, rec);
9271                 if (ret < 0)
9272                         return ret;
9273                 /*
9274                  * delete_duplicate_records will return the number of entries
9275                  * deleted, so if it's greater than 0 then we know we actually
9276                  * did something and we need to remove.
9277                  */
9278                 if (ret)
9279                         had_dups = 1;
9280         }
9281
9282         if (had_dups)
9283                 return -EAGAIN;
9284
9285         while(1) {
9286                 int cur_err = 0;
9287                 int fix = 0;
9288
9289                 cache = search_cache_extent(extent_cache, 0);
9290                 if (!cache)
9291                         break;
9292                 rec = container_of(cache, struct extent_record, cache);
9293                 if (rec->num_duplicates) {
9294                         fprintf(stderr, "extent item %llu has multiple extent "
9295                                 "items\n", (unsigned long long)rec->start);
9296                         cur_err = 1;
9297                 }
9298
9299                 if (rec->refs != rec->extent_item_refs) {
9300                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9301                                 (unsigned long long)rec->start,
9302                                 (unsigned long long)rec->nr);
9303                         fprintf(stderr, "extent item %llu, found %llu\n",
9304                                 (unsigned long long)rec->extent_item_refs,
9305                                 (unsigned long long)rec->refs);
9306                         ret = record_orphan_data_extents(root->fs_info, rec);
9307                         if (ret < 0)
9308                                 goto repair_abort;
9309                         fix = ret;
9310                         cur_err = 1;
9311                 }
9312                 if (all_backpointers_checked(rec, 1)) {
9313                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9314                                 (unsigned long long)rec->start,
9315                                 (unsigned long long)rec->nr);
9316                         fix = 1;
9317                         cur_err = 1;
9318                 }
9319                 if (!rec->owner_ref_checked) {
9320                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9321                                 (unsigned long long)rec->start,
9322                                 (unsigned long long)rec->nr);
9323                         fix = 1;
9324                         cur_err = 1;
9325                 }
9326
9327                 if (repair && fix) {
9328                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9329                         if (ret)
9330                                 goto repair_abort;
9331                 }
9332
9333
9334                 if (rec->bad_full_backref) {
9335                         fprintf(stderr, "bad full backref, on [%llu]\n",
9336                                 (unsigned long long)rec->start);
9337                         if (repair) {
9338                                 ret = fixup_extent_flags(root->fs_info, rec);
9339                                 if (ret)
9340                                         goto repair_abort;
9341                                 fix = 1;
9342                         }
9343                         cur_err = 1;
9344                 }
9345                 /*
9346                  * Although it's not a extent ref's problem, we reuse this
9347                  * routine for error reporting.
9348                  * No repair function yet.
9349                  */
9350                 if (rec->crossing_stripes) {
9351                         fprintf(stderr,
9352                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9353                                 rec->start, rec->start + rec->max_size);
9354                         cur_err = 1;
9355                 }
9356
9357                 if (rec->wrong_chunk_type) {
9358                         fprintf(stderr,
9359                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9360                                 rec->start, rec->start + rec->max_size);
9361                         cur_err = 1;
9362                 }
9363
9364                 remove_cache_extent(extent_cache, cache);
9365                 free_all_extent_backrefs(rec);
9366                 if (!init_extent_tree && repair && (!cur_err || fix))
9367                         clear_extent_dirty(root->fs_info->excluded_extents,
9368                                            rec->start,
9369                                            rec->start + rec->max_size - 1);
9370                 free(rec);
9371         }
9372 repair_abort:
9373         if (repair) {
9374                 if (ret && ret != -EAGAIN) {
9375                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9376                         exit(1);
9377                 } else if (!ret) {
9378                         struct btrfs_trans_handle *trans;
9379
9380                         root = root->fs_info->extent_root;
9381                         trans = btrfs_start_transaction(root, 1);
9382                         if (IS_ERR(trans)) {
9383                                 ret = PTR_ERR(trans);
9384                                 goto repair_abort;
9385                         }
9386
9387                         btrfs_fix_block_accounting(trans, root);
9388                         ret = btrfs_commit_transaction(trans, root);
9389                         if (ret)
9390                                 goto repair_abort;
9391                 }
9392                 return ret;
9393         }
9394         return 0;
9395 }
9396
9397 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9398 {
9399         u64 stripe_size;
9400
9401         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9402                 stripe_size = length;
9403                 stripe_size /= num_stripes;
9404         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9405                 stripe_size = length * 2;
9406                 stripe_size /= num_stripes;
9407         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9408                 stripe_size = length;
9409                 stripe_size /= (num_stripes - 1);
9410         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9411                 stripe_size = length;
9412                 stripe_size /= (num_stripes - 2);
9413         } else {
9414                 stripe_size = length;
9415         }
9416         return stripe_size;
9417 }
9418
9419 /*
9420  * Check the chunk with its block group/dev list ref:
9421  * Return 0 if all refs seems valid.
9422  * Return 1 if part of refs seems valid, need later check for rebuild ref
9423  * like missing block group and needs to search extent tree to rebuild them.
9424  * Return -1 if essential refs are missing and unable to rebuild.
9425  */
9426 static int check_chunk_refs(struct chunk_record *chunk_rec,
9427                             struct block_group_tree *block_group_cache,
9428                             struct device_extent_tree *dev_extent_cache,
9429                             int silent)
9430 {
9431         struct cache_extent *block_group_item;
9432         struct block_group_record *block_group_rec;
9433         struct cache_extent *dev_extent_item;
9434         struct device_extent_record *dev_extent_rec;
9435         u64 devid;
9436         u64 offset;
9437         u64 length;
9438         int metadump_v2 = 0;
9439         int i;
9440         int ret = 0;
9441
9442         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9443                                                chunk_rec->offset,
9444                                                chunk_rec->length);
9445         if (block_group_item) {
9446                 block_group_rec = container_of(block_group_item,
9447                                                struct block_group_record,
9448                                                cache);
9449                 if (chunk_rec->length != block_group_rec->offset ||
9450                     chunk_rec->offset != block_group_rec->objectid ||
9451                     (!metadump_v2 &&
9452                      chunk_rec->type_flags != block_group_rec->flags)) {
9453                         if (!silent)
9454                                 fprintf(stderr,
9455                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9456                                         chunk_rec->objectid,
9457                                         chunk_rec->type,
9458                                         chunk_rec->offset,
9459                                         chunk_rec->length,
9460                                         chunk_rec->offset,
9461                                         chunk_rec->type_flags,
9462                                         block_group_rec->objectid,
9463                                         block_group_rec->type,
9464                                         block_group_rec->offset,
9465                                         block_group_rec->offset,
9466                                         block_group_rec->objectid,
9467                                         block_group_rec->flags);
9468                         ret = -1;
9469                 } else {
9470                         list_del_init(&block_group_rec->list);
9471                         chunk_rec->bg_rec = block_group_rec;
9472                 }
9473         } else {
9474                 if (!silent)
9475                         fprintf(stderr,
9476                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9477                                 chunk_rec->objectid,
9478                                 chunk_rec->type,
9479                                 chunk_rec->offset,
9480                                 chunk_rec->length,
9481                                 chunk_rec->offset,
9482                                 chunk_rec->type_flags);
9483                 ret = 1;
9484         }
9485
9486         if (metadump_v2)
9487                 return ret;
9488
9489         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9490                                     chunk_rec->num_stripes);
9491         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9492                 devid = chunk_rec->stripes[i].devid;
9493                 offset = chunk_rec->stripes[i].offset;
9494                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9495                                                        devid, offset, length);
9496                 if (dev_extent_item) {
9497                         dev_extent_rec = container_of(dev_extent_item,
9498                                                 struct device_extent_record,
9499                                                 cache);
9500                         if (dev_extent_rec->objectid != devid ||
9501                             dev_extent_rec->offset != offset ||
9502                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9503                             dev_extent_rec->length != length) {
9504                                 if (!silent)
9505                                         fprintf(stderr,
9506                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9507                                                 chunk_rec->objectid,
9508                                                 chunk_rec->type,
9509                                                 chunk_rec->offset,
9510                                                 chunk_rec->stripes[i].devid,
9511                                                 chunk_rec->stripes[i].offset,
9512                                                 dev_extent_rec->objectid,
9513                                                 dev_extent_rec->offset,
9514                                                 dev_extent_rec->length);
9515                                 ret = -1;
9516                         } else {
9517                                 list_move(&dev_extent_rec->chunk_list,
9518                                           &chunk_rec->dextents);
9519                         }
9520                 } else {
9521                         if (!silent)
9522                                 fprintf(stderr,
9523                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9524                                         chunk_rec->objectid,
9525                                         chunk_rec->type,
9526                                         chunk_rec->offset,
9527                                         chunk_rec->stripes[i].devid,
9528                                         chunk_rec->stripes[i].offset);
9529                         ret = -1;
9530                 }
9531         }
9532         return ret;
9533 }
9534
9535 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9536 int check_chunks(struct cache_tree *chunk_cache,
9537                  struct block_group_tree *block_group_cache,
9538                  struct device_extent_tree *dev_extent_cache,
9539                  struct list_head *good, struct list_head *bad,
9540                  struct list_head *rebuild, int silent)
9541 {
9542         struct cache_extent *chunk_item;
9543         struct chunk_record *chunk_rec;
9544         struct block_group_record *bg_rec;
9545         struct device_extent_record *dext_rec;
9546         int err;
9547         int ret = 0;
9548
9549         chunk_item = first_cache_extent(chunk_cache);
9550         while (chunk_item) {
9551                 chunk_rec = container_of(chunk_item, struct chunk_record,
9552                                          cache);
9553                 err = check_chunk_refs(chunk_rec, block_group_cache,
9554                                        dev_extent_cache, silent);
9555                 if (err < 0)
9556                         ret = err;
9557                 if (err == 0 && good)
9558                         list_add_tail(&chunk_rec->list, good);
9559                 if (err > 0 && rebuild)
9560                         list_add_tail(&chunk_rec->list, rebuild);
9561                 if (err < 0 && bad)
9562                         list_add_tail(&chunk_rec->list, bad);
9563                 chunk_item = next_cache_extent(chunk_item);
9564         }
9565
9566         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9567                 if (!silent)
9568                         fprintf(stderr,
9569                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9570                                 bg_rec->objectid,
9571                                 bg_rec->offset,
9572                                 bg_rec->flags);
9573                 if (!ret)
9574                         ret = 1;
9575         }
9576
9577         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9578                             chunk_list) {
9579                 if (!silent)
9580                         fprintf(stderr,
9581                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9582                                 dext_rec->objectid,
9583                                 dext_rec->offset,
9584                                 dext_rec->length);
9585                 if (!ret)
9586                         ret = 1;
9587         }
9588         return ret;
9589 }
9590
9591
9592 static int check_device_used(struct device_record *dev_rec,
9593                              struct device_extent_tree *dext_cache)
9594 {
9595         struct cache_extent *cache;
9596         struct device_extent_record *dev_extent_rec;
9597         u64 total_byte = 0;
9598
9599         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9600         while (cache) {
9601                 dev_extent_rec = container_of(cache,
9602                                               struct device_extent_record,
9603                                               cache);
9604                 if (dev_extent_rec->objectid != dev_rec->devid)
9605                         break;
9606
9607                 list_del_init(&dev_extent_rec->device_list);
9608                 total_byte += dev_extent_rec->length;
9609                 cache = next_cache_extent(cache);
9610         }
9611
9612         if (total_byte != dev_rec->byte_used) {
9613                 fprintf(stderr,
9614                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9615                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9616                         dev_rec->type, dev_rec->offset);
9617                 return -1;
9618         } else {
9619                 return 0;
9620         }
9621 }
9622
9623 /* check btrfs_dev_item -> btrfs_dev_extent */
9624 static int check_devices(struct rb_root *dev_cache,
9625                          struct device_extent_tree *dev_extent_cache)
9626 {
9627         struct rb_node *dev_node;
9628         struct device_record *dev_rec;
9629         struct device_extent_record *dext_rec;
9630         int err;
9631         int ret = 0;
9632
9633         dev_node = rb_first(dev_cache);
9634         while (dev_node) {
9635                 dev_rec = container_of(dev_node, struct device_record, node);
9636                 err = check_device_used(dev_rec, dev_extent_cache);
9637                 if (err)
9638                         ret = err;
9639
9640                 dev_node = rb_next(dev_node);
9641         }
9642         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9643                             device_list) {
9644                 fprintf(stderr,
9645                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9646                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9647                 if (!ret)
9648                         ret = 1;
9649         }
9650         return ret;
9651 }
9652
9653 static int add_root_item_to_list(struct list_head *head,
9654                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9655                                   u8 level, u8 drop_level,
9656                                   int level_size, struct btrfs_key *drop_key)
9657 {
9658
9659         struct root_item_record *ri_rec;
9660         ri_rec = malloc(sizeof(*ri_rec));
9661         if (!ri_rec)
9662                 return -ENOMEM;
9663         ri_rec->bytenr = bytenr;
9664         ri_rec->objectid = objectid;
9665         ri_rec->level = level;
9666         ri_rec->level_size = level_size;
9667         ri_rec->drop_level = drop_level;
9668         ri_rec->last_snapshot = last_snapshot;
9669         if (drop_key)
9670                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9671         list_add_tail(&ri_rec->list, head);
9672
9673         return 0;
9674 }
9675
9676 static void free_root_item_list(struct list_head *list)
9677 {
9678         struct root_item_record *ri_rec;
9679
9680         while (!list_empty(list)) {
9681                 ri_rec = list_first_entry(list, struct root_item_record,
9682                                           list);
9683                 list_del_init(&ri_rec->list);
9684                 free(ri_rec);
9685         }
9686 }
9687
9688 static int deal_root_from_list(struct list_head *list,
9689                                struct btrfs_root *root,
9690                                struct block_info *bits,
9691                                int bits_nr,
9692                                struct cache_tree *pending,
9693                                struct cache_tree *seen,
9694                                struct cache_tree *reada,
9695                                struct cache_tree *nodes,
9696                                struct cache_tree *extent_cache,
9697                                struct cache_tree *chunk_cache,
9698                                struct rb_root *dev_cache,
9699                                struct block_group_tree *block_group_cache,
9700                                struct device_extent_tree *dev_extent_cache)
9701 {
9702         int ret = 0;
9703         u64 last;
9704
9705         while (!list_empty(list)) {
9706                 struct root_item_record *rec;
9707                 struct extent_buffer *buf;
9708                 rec = list_entry(list->next,
9709                                  struct root_item_record, list);
9710                 last = 0;
9711                 buf = read_tree_block(root->fs_info->tree_root,
9712                                       rec->bytenr, rec->level_size, 0);
9713                 if (!extent_buffer_uptodate(buf)) {
9714                         free_extent_buffer(buf);
9715                         ret = -EIO;
9716                         break;
9717                 }
9718                 ret = add_root_to_pending(buf, extent_cache, pending,
9719                                     seen, nodes, rec->objectid);
9720                 if (ret < 0)
9721                         break;
9722                 /*
9723                  * To rebuild extent tree, we need deal with snapshot
9724                  * one by one, otherwise we deal with node firstly which
9725                  * can maximize readahead.
9726                  */
9727                 while (1) {
9728                         ret = run_next_block(root, bits, bits_nr, &last,
9729                                              pending, seen, reada, nodes,
9730                                              extent_cache, chunk_cache,
9731                                              dev_cache, block_group_cache,
9732                                              dev_extent_cache, rec);
9733                         if (ret != 0)
9734                                 break;
9735                 }
9736                 free_extent_buffer(buf);
9737                 list_del(&rec->list);
9738                 free(rec);
9739                 if (ret < 0)
9740                         break;
9741         }
9742         while (ret >= 0) {
9743                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9744                                      reada, nodes, extent_cache, chunk_cache,
9745                                      dev_cache, block_group_cache,
9746                                      dev_extent_cache, NULL);
9747                 if (ret != 0) {
9748                         if (ret > 0)
9749                                 ret = 0;
9750                         break;
9751                 }
9752         }
9753         return ret;
9754 }
9755
9756 static int check_chunks_and_extents(struct btrfs_root *root)
9757 {
9758         struct rb_root dev_cache;
9759         struct cache_tree chunk_cache;
9760         struct block_group_tree block_group_cache;
9761         struct device_extent_tree dev_extent_cache;
9762         struct cache_tree extent_cache;
9763         struct cache_tree seen;
9764         struct cache_tree pending;
9765         struct cache_tree reada;
9766         struct cache_tree nodes;
9767         struct extent_io_tree excluded_extents;
9768         struct cache_tree corrupt_blocks;
9769         struct btrfs_path path;
9770         struct btrfs_key key;
9771         struct btrfs_key found_key;
9772         int ret, err = 0;
9773         struct block_info *bits;
9774         int bits_nr;
9775         struct extent_buffer *leaf;
9776         int slot;
9777         struct btrfs_root_item ri;
9778         struct list_head dropping_trees;
9779         struct list_head normal_trees;
9780         struct btrfs_root *root1;
9781         u64 objectid;
9782         u32 level_size;
9783         u8 level;
9784
9785         dev_cache = RB_ROOT;
9786         cache_tree_init(&chunk_cache);
9787         block_group_tree_init(&block_group_cache);
9788         device_extent_tree_init(&dev_extent_cache);
9789
9790         cache_tree_init(&extent_cache);
9791         cache_tree_init(&seen);
9792         cache_tree_init(&pending);
9793         cache_tree_init(&nodes);
9794         cache_tree_init(&reada);
9795         cache_tree_init(&corrupt_blocks);
9796         extent_io_tree_init(&excluded_extents);
9797         INIT_LIST_HEAD(&dropping_trees);
9798         INIT_LIST_HEAD(&normal_trees);
9799
9800         if (repair) {
9801                 root->fs_info->excluded_extents = &excluded_extents;
9802                 root->fs_info->fsck_extent_cache = &extent_cache;
9803                 root->fs_info->free_extent_hook = free_extent_hook;
9804                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9805         }
9806
9807         bits_nr = 1024;
9808         bits = malloc(bits_nr * sizeof(struct block_info));
9809         if (!bits) {
9810                 perror("malloc");
9811                 exit(1);
9812         }
9813
9814         if (ctx.progress_enabled) {
9815                 ctx.tp = TASK_EXTENTS;
9816                 task_start(ctx.info);
9817         }
9818
9819 again:
9820         root1 = root->fs_info->tree_root;
9821         level = btrfs_header_level(root1->node);
9822         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9823                                     root1->node->start, 0, level, 0,
9824                                     root1->nodesize, NULL);
9825         if (ret < 0)
9826                 goto out;
9827         root1 = root->fs_info->chunk_root;
9828         level = btrfs_header_level(root1->node);
9829         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9830                                     root1->node->start, 0, level, 0,
9831                                     root1->nodesize, NULL);
9832         if (ret < 0)
9833                 goto out;
9834         btrfs_init_path(&path);
9835         key.offset = 0;
9836         key.objectid = 0;
9837         key.type = BTRFS_ROOT_ITEM_KEY;
9838         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9839                                         &key, &path, 0, 0);
9840         if (ret < 0)
9841                 goto out;
9842         while(1) {
9843                 leaf = path.nodes[0];
9844                 slot = path.slots[0];
9845                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9846                         ret = btrfs_next_leaf(root, &path);
9847                         if (ret != 0)
9848                                 break;
9849                         leaf = path.nodes[0];
9850                         slot = path.slots[0];
9851                 }
9852                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9853                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9854                         unsigned long offset;
9855                         u64 last_snapshot;
9856
9857                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9858                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9859                         last_snapshot = btrfs_root_last_snapshot(&ri);
9860                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9861                                 level = btrfs_root_level(&ri);
9862                                 level_size = root->nodesize;
9863                                 ret = add_root_item_to_list(&normal_trees,
9864                                                 found_key.objectid,
9865                                                 btrfs_root_bytenr(&ri),
9866                                                 last_snapshot, level,
9867                                                 0, level_size, NULL);
9868                                 if (ret < 0)
9869                                         goto out;
9870                         } else {
9871                                 level = btrfs_root_level(&ri);
9872                                 level_size = root->nodesize;
9873                                 objectid = found_key.objectid;
9874                                 btrfs_disk_key_to_cpu(&found_key,
9875                                                       &ri.drop_progress);
9876                                 ret = add_root_item_to_list(&dropping_trees,
9877                                                 objectid,
9878                                                 btrfs_root_bytenr(&ri),
9879                                                 last_snapshot, level,
9880                                                 ri.drop_level,
9881                                                 level_size, &found_key);
9882                                 if (ret < 0)
9883                                         goto out;
9884                         }
9885                 }
9886                 path.slots[0]++;
9887         }
9888         btrfs_release_path(&path);
9889
9890         /*
9891          * check_block can return -EAGAIN if it fixes something, please keep
9892          * this in mind when dealing with return values from these functions, if
9893          * we get -EAGAIN we want to fall through and restart the loop.
9894          */
9895         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9896                                   &seen, &reada, &nodes, &extent_cache,
9897                                   &chunk_cache, &dev_cache, &block_group_cache,
9898                                   &dev_extent_cache);
9899         if (ret < 0) {
9900                 if (ret == -EAGAIN)
9901                         goto loop;
9902                 goto out;
9903         }
9904         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9905                                   &pending, &seen, &reada, &nodes,
9906                                   &extent_cache, &chunk_cache, &dev_cache,
9907                                   &block_group_cache, &dev_extent_cache);
9908         if (ret < 0) {
9909                 if (ret == -EAGAIN)
9910                         goto loop;
9911                 goto out;
9912         }
9913
9914         ret = check_chunks(&chunk_cache, &block_group_cache,
9915                            &dev_extent_cache, NULL, NULL, NULL, 0);
9916         if (ret) {
9917                 if (ret == -EAGAIN)
9918                         goto loop;
9919                 err = ret;
9920         }
9921
9922         ret = check_extent_refs(root, &extent_cache);
9923         if (ret < 0) {
9924                 if (ret == -EAGAIN)
9925                         goto loop;
9926                 goto out;
9927         }
9928
9929         ret = check_devices(&dev_cache, &dev_extent_cache);
9930         if (ret && err)
9931                 ret = err;
9932
9933 out:
9934         task_stop(ctx.info);
9935         if (repair) {
9936                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9937                 extent_io_tree_cleanup(&excluded_extents);
9938                 root->fs_info->fsck_extent_cache = NULL;
9939                 root->fs_info->free_extent_hook = NULL;
9940                 root->fs_info->corrupt_blocks = NULL;
9941                 root->fs_info->excluded_extents = NULL;
9942         }
9943         free(bits);
9944         free_chunk_cache_tree(&chunk_cache);
9945         free_device_cache_tree(&dev_cache);
9946         free_block_group_tree(&block_group_cache);
9947         free_device_extent_tree(&dev_extent_cache);
9948         free_extent_cache_tree(&seen);
9949         free_extent_cache_tree(&pending);
9950         free_extent_cache_tree(&reada);
9951         free_extent_cache_tree(&nodes);
9952         return ret;
9953 loop:
9954         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9955         free_extent_cache_tree(&seen);
9956         free_extent_cache_tree(&pending);
9957         free_extent_cache_tree(&reada);
9958         free_extent_cache_tree(&nodes);
9959         free_chunk_cache_tree(&chunk_cache);
9960         free_block_group_tree(&block_group_cache);
9961         free_device_cache_tree(&dev_cache);
9962         free_device_extent_tree(&dev_extent_cache);
9963         free_extent_record_cache(&extent_cache);
9964         free_root_item_list(&normal_trees);
9965         free_root_item_list(&dropping_trees);
9966         extent_io_tree_cleanup(&excluded_extents);
9967         goto again;
9968 }
9969
9970 /*
9971  * Check backrefs of a tree block given by @bytenr or @eb.
9972  *
9973  * @root:       the root containing the @bytenr or @eb
9974  * @eb:         tree block extent buffer, can be NULL
9975  * @bytenr:     bytenr of the tree block to search
9976  * @level:      tree level of the tree block
9977  * @owner:      owner of the tree block
9978  *
9979  * Return >0 for any error found and output error message
9980  * Return 0 for no error found
9981  */
9982 static int check_tree_block_ref(struct btrfs_root *root,
9983                                 struct extent_buffer *eb, u64 bytenr,
9984                                 int level, u64 owner)
9985 {
9986         struct btrfs_key key;
9987         struct btrfs_root *extent_root = root->fs_info->extent_root;
9988         struct btrfs_path path;
9989         struct btrfs_extent_item *ei;
9990         struct btrfs_extent_inline_ref *iref;
9991         struct extent_buffer *leaf;
9992         unsigned long end;
9993         unsigned long ptr;
9994         int slot;
9995         int skinny_level;
9996         int type;
9997         u32 nodesize = root->nodesize;
9998         u32 item_size;
9999         u64 offset;
10000         int tree_reloc_root = 0;
10001         int found_ref = 0;
10002         int err = 0;
10003         int ret;
10004
10005         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10006             btrfs_header_bytenr(root->node) == bytenr)
10007                 tree_reloc_root = 1;
10008
10009         btrfs_init_path(&path);
10010         key.objectid = bytenr;
10011         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10012                 key.type = BTRFS_METADATA_ITEM_KEY;
10013         else
10014                 key.type = BTRFS_EXTENT_ITEM_KEY;
10015         key.offset = (u64)-1;
10016
10017         /* Search for the backref in extent tree */
10018         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10019         if (ret < 0) {
10020                 err |= BACKREF_MISSING;
10021                 goto out;
10022         }
10023         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10024         if (ret) {
10025                 err |= BACKREF_MISSING;
10026                 goto out;
10027         }
10028
10029         leaf = path.nodes[0];
10030         slot = path.slots[0];
10031         btrfs_item_key_to_cpu(leaf, &key, slot);
10032
10033         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10034
10035         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10036                 skinny_level = (int)key.offset;
10037                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10038         } else {
10039                 struct btrfs_tree_block_info *info;
10040
10041                 info = (struct btrfs_tree_block_info *)(ei + 1);
10042                 skinny_level = btrfs_tree_block_level(leaf, info);
10043                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10044         }
10045
10046         if (eb) {
10047                 u64 header_gen;
10048                 u64 extent_gen;
10049
10050                 if (!(btrfs_extent_flags(leaf, ei) &
10051                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10052                         error(
10053                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10054                                 key.objectid, nodesize,
10055                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10056                         err = BACKREF_MISMATCH;
10057                 }
10058                 header_gen = btrfs_header_generation(eb);
10059                 extent_gen = btrfs_extent_generation(leaf, ei);
10060                 if (header_gen != extent_gen) {
10061                         error(
10062         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10063                                 key.objectid, nodesize, header_gen,
10064                                 extent_gen);
10065                         err = BACKREF_MISMATCH;
10066                 }
10067                 if (level != skinny_level) {
10068                         error(
10069                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10070                                 key.objectid, nodesize, level, skinny_level);
10071                         err = BACKREF_MISMATCH;
10072                 }
10073                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10074                         error(
10075                         "extent[%llu %u] is referred by other roots than %llu",
10076                                 key.objectid, nodesize, root->objectid);
10077                         err = BACKREF_MISMATCH;
10078                 }
10079         }
10080
10081         /*
10082          * Iterate the extent/metadata item to find the exact backref
10083          */
10084         item_size = btrfs_item_size_nr(leaf, slot);
10085         ptr = (unsigned long)iref;
10086         end = (unsigned long)ei + item_size;
10087         while (ptr < end) {
10088                 iref = (struct btrfs_extent_inline_ref *)ptr;
10089                 type = btrfs_extent_inline_ref_type(leaf, iref);
10090                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10091
10092                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10093                         (offset == root->objectid || offset == owner)) {
10094                         found_ref = 1;
10095                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10096                         /*
10097                          * Backref of tree reloc root points to itself, no need
10098                          * to check backref any more.
10099                          */
10100                         if (tree_reloc_root)
10101                                 found_ref = 1;
10102                         else
10103                         /* Check if the backref points to valid referencer */
10104                                 found_ref = !check_tree_block_ref(root, NULL,
10105                                                 offset, level + 1, owner);
10106                 }
10107
10108                 if (found_ref)
10109                         break;
10110                 ptr += btrfs_extent_inline_ref_size(type);
10111         }
10112
10113         /*
10114          * Inlined extent item doesn't have what we need, check
10115          * TREE_BLOCK_REF_KEY
10116          */
10117         if (!found_ref) {
10118                 btrfs_release_path(&path);
10119                 key.objectid = bytenr;
10120                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10121                 key.offset = root->objectid;
10122
10123                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10124                 if (!ret)
10125                         found_ref = 1;
10126         }
10127         if (!found_ref)
10128                 err |= BACKREF_MISSING;
10129 out:
10130         btrfs_release_path(&path);
10131         if (eb && (err & BACKREF_MISSING))
10132                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10133                         bytenr, nodesize, owner, level);
10134         return err;
10135 }
10136
10137 /*
10138  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10139  *
10140  * Return >0 any error found and output error message
10141  * Return 0 for no error found
10142  */
10143 static int check_extent_data_item(struct btrfs_root *root,
10144                                   struct extent_buffer *eb, int slot)
10145 {
10146         struct btrfs_file_extent_item *fi;
10147         struct btrfs_path path;
10148         struct btrfs_root *extent_root = root->fs_info->extent_root;
10149         struct btrfs_key fi_key;
10150         struct btrfs_key dbref_key;
10151         struct extent_buffer *leaf;
10152         struct btrfs_extent_item *ei;
10153         struct btrfs_extent_inline_ref *iref;
10154         struct btrfs_extent_data_ref *dref;
10155         u64 owner;
10156         u64 disk_bytenr;
10157         u64 disk_num_bytes;
10158         u64 extent_num_bytes;
10159         u64 extent_flags;
10160         u32 item_size;
10161         unsigned long end;
10162         unsigned long ptr;
10163         int type;
10164         u64 ref_root;
10165         int found_dbackref = 0;
10166         int err = 0;
10167         int ret;
10168
10169         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10170         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10171
10172         /* Nothing to check for hole and inline data extents */
10173         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10174             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10175                 return 0;
10176
10177         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10178         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10179         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10180
10181         /* Check unaligned disk_num_bytes and num_bytes */
10182         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10183                 error(
10184 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10185                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10186                         root->sectorsize);
10187                 err |= BYTES_UNALIGNED;
10188         } else {
10189                 data_bytes_allocated += disk_num_bytes;
10190         }
10191         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10192                 error(
10193 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10194                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10195                         root->sectorsize);
10196                 err |= BYTES_UNALIGNED;
10197         } else {
10198                 data_bytes_referenced += extent_num_bytes;
10199         }
10200         owner = btrfs_header_owner(eb);
10201
10202         /* Check the extent item of the file extent in extent tree */
10203         btrfs_init_path(&path);
10204         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10205         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10206         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10207
10208         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10209         if (ret) {
10210                 err |= BACKREF_MISSING;
10211                 goto error;
10212         }
10213
10214         leaf = path.nodes[0];
10215         slot = path.slots[0];
10216         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10217
10218         extent_flags = btrfs_extent_flags(leaf, ei);
10219
10220         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10221                 error(
10222                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10223                     disk_bytenr, disk_num_bytes,
10224                     BTRFS_EXTENT_FLAG_DATA);
10225                 err |= BACKREF_MISMATCH;
10226         }
10227
10228         /* Check data backref inside that extent item */
10229         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10230         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10231         ptr = (unsigned long)iref;
10232         end = (unsigned long)ei + item_size;
10233         while (ptr < end) {
10234                 iref = (struct btrfs_extent_inline_ref *)ptr;
10235                 type = btrfs_extent_inline_ref_type(leaf, iref);
10236                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10237
10238                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10239                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10240                         if (ref_root == owner || ref_root == root->objectid)
10241                                 found_dbackref = 1;
10242                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10243                         found_dbackref = !check_tree_block_ref(root, NULL,
10244                                 btrfs_extent_inline_ref_offset(leaf, iref),
10245                                 0, owner);
10246                 }
10247
10248                 if (found_dbackref)
10249                         break;
10250                 ptr += btrfs_extent_inline_ref_size(type);
10251         }
10252
10253         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
10254         if (!found_dbackref) {
10255                 btrfs_release_path(&path);
10256
10257                 btrfs_init_path(&path);
10258                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10259                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10260                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10261                                 fi_key.objectid, fi_key.offset);
10262
10263                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10264                                         &dbref_key, &path, 0, 0);
10265                 if (!ret)
10266                         found_dbackref = 1;
10267         }
10268
10269         if (!found_dbackref)
10270                 err |= BACKREF_MISSING;
10271 error:
10272         btrfs_release_path(&path);
10273         if (err & BACKREF_MISSING) {
10274                 error("data extent[%llu %llu] backref lost",
10275                       disk_bytenr, disk_num_bytes);
10276         }
10277         return err;
10278 }
10279
10280 /*
10281  * Get real tree block level for the case like shared block
10282  * Return >= 0 as tree level
10283  * Return <0 for error
10284  */
10285 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10286 {
10287         struct extent_buffer *eb;
10288         struct btrfs_path path;
10289         struct btrfs_key key;
10290         struct btrfs_extent_item *ei;
10291         u64 flags;
10292         u64 transid;
10293         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10294         u8 backref_level;
10295         u8 header_level;
10296         int ret;
10297
10298         /* Search extent tree for extent generation and level */
10299         key.objectid = bytenr;
10300         key.type = BTRFS_METADATA_ITEM_KEY;
10301         key.offset = (u64)-1;
10302
10303         btrfs_init_path(&path);
10304         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10305         if (ret < 0)
10306                 goto release_out;
10307         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10308         if (ret < 0)
10309                 goto release_out;
10310         if (ret > 0) {
10311                 ret = -ENOENT;
10312                 goto release_out;
10313         }
10314
10315         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10316         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10317                             struct btrfs_extent_item);
10318         flags = btrfs_extent_flags(path.nodes[0], ei);
10319         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10320                 ret = -ENOENT;
10321                 goto release_out;
10322         }
10323
10324         /* Get transid for later read_tree_block() check */
10325         transid = btrfs_extent_generation(path.nodes[0], ei);
10326
10327         /* Get backref level as one source */
10328         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10329                 backref_level = key.offset;
10330         } else {
10331                 struct btrfs_tree_block_info *info;
10332
10333                 info = (struct btrfs_tree_block_info *)(ei + 1);
10334                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10335         }
10336         btrfs_release_path(&path);
10337
10338         /* Get level from tree block as an alternative source */
10339         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10340         if (!extent_buffer_uptodate(eb)) {
10341                 free_extent_buffer(eb);
10342                 return -EIO;
10343         }
10344         header_level = btrfs_header_level(eb);
10345         free_extent_buffer(eb);
10346
10347         if (header_level != backref_level)
10348                 return -EIO;
10349         return header_level;
10350
10351 release_out:
10352         btrfs_release_path(&path);
10353         return ret;
10354 }
10355
10356 /*
10357  * Check if a tree block backref is valid (points to a valid tree block)
10358  * if level == -1, level will be resolved
10359  * Return >0 for any error found and print error message
10360  */
10361 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10362                                     u64 bytenr, int level)
10363 {
10364         struct btrfs_root *root;
10365         struct btrfs_key key;
10366         struct btrfs_path path;
10367         struct extent_buffer *eb;
10368         struct extent_buffer *node;
10369         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10370         int err = 0;
10371         int ret;
10372
10373         /* Query level for level == -1 special case */
10374         if (level == -1)
10375                 level = query_tree_block_level(fs_info, bytenr);
10376         if (level < 0) {
10377                 err |= REFERENCER_MISSING;
10378                 goto out;
10379         }
10380
10381         key.objectid = root_id;
10382         key.type = BTRFS_ROOT_ITEM_KEY;
10383         key.offset = (u64)-1;
10384
10385         root = btrfs_read_fs_root(fs_info, &key);
10386         if (IS_ERR(root)) {
10387                 err |= REFERENCER_MISSING;
10388                 goto out;
10389         }
10390
10391         /* Read out the tree block to get item/node key */
10392         eb = read_tree_block(root, bytenr, root->nodesize, 0);
10393         if (!extent_buffer_uptodate(eb)) {
10394                 err |= REFERENCER_MISSING;
10395                 free_extent_buffer(eb);
10396                 goto out;
10397         }
10398
10399         /* Empty tree, no need to check key */
10400         if (!btrfs_header_nritems(eb) && !level) {
10401                 free_extent_buffer(eb);
10402                 goto out;
10403         }
10404
10405         if (level)
10406                 btrfs_node_key_to_cpu(eb, &key, 0);
10407         else
10408                 btrfs_item_key_to_cpu(eb, &key, 0);
10409
10410         free_extent_buffer(eb);
10411
10412         btrfs_init_path(&path);
10413         path.lowest_level = level;
10414         /* Search with the first key, to ensure we can reach it */
10415         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10416         if (ret < 0) {
10417                 err |= REFERENCER_MISSING;
10418                 goto release_out;
10419         }
10420
10421         node = path.nodes[level];
10422         if (btrfs_header_bytenr(node) != bytenr) {
10423                 error(
10424         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10425                         bytenr, nodesize, bytenr,
10426                         btrfs_header_bytenr(node));
10427                 err |= REFERENCER_MISMATCH;
10428         }
10429         if (btrfs_header_level(node) != level) {
10430                 error(
10431         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10432                         bytenr, nodesize, level,
10433                         btrfs_header_level(node));
10434                 err |= REFERENCER_MISMATCH;
10435         }
10436
10437 release_out:
10438         btrfs_release_path(&path);
10439 out:
10440         if (err & REFERENCER_MISSING) {
10441                 if (level < 0)
10442                         error("extent [%llu %d] lost referencer (owner: %llu)",
10443                                 bytenr, nodesize, root_id);
10444                 else
10445                         error(
10446                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10447                                 bytenr, nodesize, root_id, level);
10448         }
10449
10450         return err;
10451 }
10452
10453 /*
10454  * Check if tree block @eb is tree reloc root.
10455  * Return 0 if it's not or any problem happens
10456  * Return 1 if it's a tree reloc root
10457  */
10458 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10459                                  struct extent_buffer *eb)
10460 {
10461         struct btrfs_root *tree_reloc_root;
10462         struct btrfs_key key;
10463         u64 bytenr = btrfs_header_bytenr(eb);
10464         u64 owner = btrfs_header_owner(eb);
10465         int ret = 0;
10466
10467         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10468         key.offset = owner;
10469         key.type = BTRFS_ROOT_ITEM_KEY;
10470
10471         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10472         if (IS_ERR(tree_reloc_root))
10473                 return 0;
10474
10475         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10476                 ret = 1;
10477         btrfs_free_fs_root(tree_reloc_root);
10478         return ret;
10479 }
10480
10481 /*
10482  * Check referencer for shared block backref
10483  * If level == -1, this function will resolve the level.
10484  */
10485 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10486                                      u64 parent, u64 bytenr, int level)
10487 {
10488         struct extent_buffer *eb;
10489         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10490         u32 nr;
10491         int found_parent = 0;
10492         int i;
10493
10494         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10495         if (!extent_buffer_uptodate(eb))
10496                 goto out;
10497
10498         if (level == -1)
10499                 level = query_tree_block_level(fs_info, bytenr);
10500         if (level < 0)
10501                 goto out;
10502
10503         /* It's possible it's a tree reloc root */
10504         if (parent == bytenr) {
10505                 if (is_tree_reloc_root(fs_info, eb))
10506                         found_parent = 1;
10507                 goto out;
10508         }
10509
10510         if (level + 1 != btrfs_header_level(eb))
10511                 goto out;
10512
10513         nr = btrfs_header_nritems(eb);
10514         for (i = 0; i < nr; i++) {
10515                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10516                         found_parent = 1;
10517                         break;
10518                 }
10519         }
10520 out:
10521         free_extent_buffer(eb);
10522         if (!found_parent) {
10523                 error(
10524         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10525                         bytenr, nodesize, parent, level);
10526                 return REFERENCER_MISSING;
10527         }
10528         return 0;
10529 }
10530
10531 /*
10532  * Check referencer for normal (inlined) data ref
10533  * If len == 0, it will be resolved by searching in extent tree
10534  */
10535 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10536                                      u64 root_id, u64 objectid, u64 offset,
10537                                      u64 bytenr, u64 len, u32 count)
10538 {
10539         struct btrfs_root *root;
10540         struct btrfs_root *extent_root = fs_info->extent_root;
10541         struct btrfs_key key;
10542         struct btrfs_path path;
10543         struct extent_buffer *leaf;
10544         struct btrfs_file_extent_item *fi;
10545         u32 found_count = 0;
10546         int slot;
10547         int ret = 0;
10548
10549         if (!len) {
10550                 key.objectid = bytenr;
10551                 key.type = BTRFS_EXTENT_ITEM_KEY;
10552                 key.offset = (u64)-1;
10553
10554                 btrfs_init_path(&path);
10555                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10556                 if (ret < 0)
10557                         goto out;
10558                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10559                 if (ret)
10560                         goto out;
10561                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10562                 if (key.objectid != bytenr ||
10563                     key.type != BTRFS_EXTENT_ITEM_KEY)
10564                         goto out;
10565                 len = key.offset;
10566                 btrfs_release_path(&path);
10567         }
10568         key.objectid = root_id;
10569         key.type = BTRFS_ROOT_ITEM_KEY;
10570         key.offset = (u64)-1;
10571         btrfs_init_path(&path);
10572
10573         root = btrfs_read_fs_root(fs_info, &key);
10574         if (IS_ERR(root))
10575                 goto out;
10576
10577         key.objectid = objectid;
10578         key.type = BTRFS_EXTENT_DATA_KEY;
10579         /*
10580          * It can be nasty as data backref offset is
10581          * file offset - file extent offset, which is smaller or
10582          * equal to original backref offset.  The only special case is
10583          * overflow.  So we need to special check and do further search.
10584          */
10585         key.offset = offset & (1ULL << 63) ? 0 : offset;
10586
10587         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10588         if (ret < 0)
10589                 goto out;
10590
10591         /*
10592          * Search afterwards to get correct one
10593          * NOTE: As we must do a comprehensive check on the data backref to
10594          * make sure the dref count also matches, we must iterate all file
10595          * extents for that inode.
10596          */
10597         while (1) {
10598                 leaf = path.nodes[0];
10599                 slot = path.slots[0];
10600
10601                 if (slot >= btrfs_header_nritems(leaf))
10602                         goto next;
10603                 btrfs_item_key_to_cpu(leaf, &key, slot);
10604                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10605                         break;
10606                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10607                 /*
10608                  * Except normal disk bytenr and disk num bytes, we still
10609                  * need to do extra check on dbackref offset as
10610                  * dbackref offset = file_offset - file_extent_offset
10611                  */
10612                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10613                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10614                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10615                     offset)
10616                         found_count++;
10617
10618 next:
10619                 ret = btrfs_next_item(root, &path);
10620                 if (ret)
10621                         break;
10622         }
10623 out:
10624         btrfs_release_path(&path);
10625         if (found_count != count) {
10626                 error(
10627 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10628                         bytenr, len, root_id, objectid, offset, count, found_count);
10629                 return REFERENCER_MISSING;
10630         }
10631         return 0;
10632 }
10633
10634 /*
10635  * Check if the referencer of a shared data backref exists
10636  */
10637 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10638                                      u64 parent, u64 bytenr)
10639 {
10640         struct extent_buffer *eb;
10641         struct btrfs_key key;
10642         struct btrfs_file_extent_item *fi;
10643         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10644         u32 nr;
10645         int found_parent = 0;
10646         int i;
10647
10648         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10649         if (!extent_buffer_uptodate(eb))
10650                 goto out;
10651
10652         nr = btrfs_header_nritems(eb);
10653         for (i = 0; i < nr; i++) {
10654                 btrfs_item_key_to_cpu(eb, &key, i);
10655                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10656                         continue;
10657
10658                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10659                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10660                         continue;
10661
10662                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10663                         found_parent = 1;
10664                         break;
10665                 }
10666         }
10667
10668 out:
10669         free_extent_buffer(eb);
10670         if (!found_parent) {
10671                 error("shared extent %llu referencer lost (parent: %llu)",
10672                         bytenr, parent);
10673                 return REFERENCER_MISSING;
10674         }
10675         return 0;
10676 }
10677
10678 /*
10679  * This function will check a given extent item, including its backref and
10680  * itself (like crossing stripe boundary and type)
10681  *
10682  * Since we don't use extent_record anymore, introduce new error bit
10683  */
10684 static int check_extent_item(struct btrfs_fs_info *fs_info,
10685                              struct extent_buffer *eb, int slot)
10686 {
10687         struct btrfs_extent_item *ei;
10688         struct btrfs_extent_inline_ref *iref;
10689         struct btrfs_extent_data_ref *dref;
10690         unsigned long end;
10691         unsigned long ptr;
10692         int type;
10693         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10694         u32 item_size = btrfs_item_size_nr(eb, slot);
10695         u64 flags;
10696         u64 offset;
10697         int metadata = 0;
10698         int level;
10699         struct btrfs_key key;
10700         int ret;
10701         int err = 0;
10702
10703         btrfs_item_key_to_cpu(eb, &key, slot);
10704         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10705                 bytes_used += key.offset;
10706         else
10707                 bytes_used += nodesize;
10708
10709         if (item_size < sizeof(*ei)) {
10710                 /*
10711                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10712                  * old thing when on disk format is still un-determined.
10713                  * No need to care about it anymore
10714                  */
10715                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10716                 return -ENOTTY;
10717         }
10718
10719         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10720         flags = btrfs_extent_flags(eb, ei);
10721
10722         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10723                 metadata = 1;
10724         if (metadata && check_crossing_stripes(global_info, key.objectid,
10725                                                eb->len)) {
10726                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10727                       key.objectid, key.objectid + nodesize);
10728                 err |= CROSSING_STRIPE_BOUNDARY;
10729         }
10730
10731         ptr = (unsigned long)(ei + 1);
10732
10733         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10734                 /* Old EXTENT_ITEM metadata */
10735                 struct btrfs_tree_block_info *info;
10736
10737                 info = (struct btrfs_tree_block_info *)ptr;
10738                 level = btrfs_tree_block_level(eb, info);
10739                 ptr += sizeof(struct btrfs_tree_block_info);
10740         } else {
10741                 /* New METADATA_ITEM */
10742                 level = key.offset;
10743         }
10744         end = (unsigned long)ei + item_size;
10745
10746 next:
10747         /* Reached extent item end normally */
10748         if (ptr == end)
10749                 goto out;
10750
10751         /* Beyond extent item end, wrong item size */
10752         if (ptr > end) {
10753                 err |= ITEM_SIZE_MISMATCH;
10754                 error("extent item at bytenr %llu slot %d has wrong size",
10755                         eb->start, slot);
10756                 goto out;
10757         }
10758
10759         /* Now check every backref in this extent item */
10760         iref = (struct btrfs_extent_inline_ref *)ptr;
10761         type = btrfs_extent_inline_ref_type(eb, iref);
10762         offset = btrfs_extent_inline_ref_offset(eb, iref);
10763         switch (type) {
10764         case BTRFS_TREE_BLOCK_REF_KEY:
10765                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10766                                                level);
10767                 err |= ret;
10768                 break;
10769         case BTRFS_SHARED_BLOCK_REF_KEY:
10770                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10771                                                  level);
10772                 err |= ret;
10773                 break;
10774         case BTRFS_EXTENT_DATA_REF_KEY:
10775                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10776                 ret = check_extent_data_backref(fs_info,
10777                                 btrfs_extent_data_ref_root(eb, dref),
10778                                 btrfs_extent_data_ref_objectid(eb, dref),
10779                                 btrfs_extent_data_ref_offset(eb, dref),
10780                                 key.objectid, key.offset,
10781                                 btrfs_extent_data_ref_count(eb, dref));
10782                 err |= ret;
10783                 break;
10784         case BTRFS_SHARED_DATA_REF_KEY:
10785                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10786                 err |= ret;
10787                 break;
10788         default:
10789                 error("extent[%llu %d %llu] has unknown ref type: %d",
10790                         key.objectid, key.type, key.offset, type);
10791                 err |= UNKNOWN_TYPE;
10792                 goto out;
10793         }
10794
10795         ptr += btrfs_extent_inline_ref_size(type);
10796         goto next;
10797
10798 out:
10799         return err;
10800 }
10801
10802 /*
10803  * Check if a dev extent item is referred correctly by its chunk
10804  */
10805 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10806                                  struct extent_buffer *eb, int slot)
10807 {
10808         struct btrfs_root *chunk_root = fs_info->chunk_root;
10809         struct btrfs_dev_extent *ptr;
10810         struct btrfs_path path;
10811         struct btrfs_key chunk_key;
10812         struct btrfs_key devext_key;
10813         struct btrfs_chunk *chunk;
10814         struct extent_buffer *l;
10815         int num_stripes;
10816         u64 length;
10817         int i;
10818         int found_chunk = 0;
10819         int ret;
10820
10821         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10822         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10823         length = btrfs_dev_extent_length(eb, ptr);
10824
10825         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10826         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10827         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10828
10829         btrfs_init_path(&path);
10830         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10831         if (ret)
10832                 goto out;
10833
10834         l = path.nodes[0];
10835         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10836         if (btrfs_chunk_length(l, chunk) != length)
10837                 goto out;
10838
10839         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10840         for (i = 0; i < num_stripes; i++) {
10841                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10842                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10843
10844                 if (devid == devext_key.objectid &&
10845                     offset == devext_key.offset) {
10846                         found_chunk = 1;
10847                         break;
10848                 }
10849         }
10850 out:
10851         btrfs_release_path(&path);
10852         if (!found_chunk) {
10853                 error(
10854                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10855                         devext_key.objectid, devext_key.offset, length);
10856                 return REFERENCER_MISSING;
10857         }
10858         return 0;
10859 }
10860
10861 /*
10862  * Check if the used space is correct with the dev item
10863  */
10864 static int check_dev_item(struct btrfs_fs_info *fs_info,
10865                           struct extent_buffer *eb, int slot)
10866 {
10867         struct btrfs_root *dev_root = fs_info->dev_root;
10868         struct btrfs_dev_item *dev_item;
10869         struct btrfs_path path;
10870         struct btrfs_key key;
10871         struct btrfs_dev_extent *ptr;
10872         u64 dev_id;
10873         u64 used;
10874         u64 total = 0;
10875         int ret;
10876
10877         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10878         dev_id = btrfs_device_id(eb, dev_item);
10879         used = btrfs_device_bytes_used(eb, dev_item);
10880
10881         key.objectid = dev_id;
10882         key.type = BTRFS_DEV_EXTENT_KEY;
10883         key.offset = 0;
10884
10885         btrfs_init_path(&path);
10886         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10887         if (ret < 0) {
10888                 btrfs_item_key_to_cpu(eb, &key, slot);
10889                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10890                         key.objectid, key.type, key.offset);
10891                 btrfs_release_path(&path);
10892                 return REFERENCER_MISSING;
10893         }
10894
10895         /* Iterate dev_extents to calculate the used space of a device */
10896         while (1) {
10897                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10898                         goto next;
10899
10900                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10901                 if (key.objectid > dev_id)
10902                         break;
10903                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10904                         goto next;
10905
10906                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10907                                      struct btrfs_dev_extent);
10908                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10909 next:
10910                 ret = btrfs_next_item(dev_root, &path);
10911                 if (ret)
10912                         break;
10913         }
10914         btrfs_release_path(&path);
10915
10916         if (used != total) {
10917                 btrfs_item_key_to_cpu(eb, &key, slot);
10918                 error(
10919 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10920                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10921                         BTRFS_DEV_EXTENT_KEY, dev_id);
10922                 return ACCOUNTING_MISMATCH;
10923         }
10924         return 0;
10925 }
10926
10927 /*
10928  * Check a block group item with its referener (chunk) and its used space
10929  * with extent/metadata item
10930  */
10931 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10932                                   struct extent_buffer *eb, int slot)
10933 {
10934         struct btrfs_root *extent_root = fs_info->extent_root;
10935         struct btrfs_root *chunk_root = fs_info->chunk_root;
10936         struct btrfs_block_group_item *bi;
10937         struct btrfs_block_group_item bg_item;
10938         struct btrfs_path path;
10939         struct btrfs_key bg_key;
10940         struct btrfs_key chunk_key;
10941         struct btrfs_key extent_key;
10942         struct btrfs_chunk *chunk;
10943         struct extent_buffer *leaf;
10944         struct btrfs_extent_item *ei;
10945         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10946         u64 flags;
10947         u64 bg_flags;
10948         u64 used;
10949         u64 total = 0;
10950         int ret;
10951         int err = 0;
10952
10953         btrfs_item_key_to_cpu(eb, &bg_key, slot);
10954         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10955         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10956         used = btrfs_block_group_used(&bg_item);
10957         bg_flags = btrfs_block_group_flags(&bg_item);
10958
10959         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10960         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10961         chunk_key.offset = bg_key.objectid;
10962
10963         btrfs_init_path(&path);
10964         /* Search for the referencer chunk */
10965         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10966         if (ret) {
10967                 error(
10968                 "block group[%llu %llu] did not find the related chunk item",
10969                         bg_key.objectid, bg_key.offset);
10970                 err |= REFERENCER_MISSING;
10971         } else {
10972                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10973                                         struct btrfs_chunk);
10974                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10975                                                 bg_key.offset) {
10976                         error(
10977         "block group[%llu %llu] related chunk item length does not match",
10978                                 bg_key.objectid, bg_key.offset);
10979                         err |= REFERENCER_MISMATCH;
10980                 }
10981         }
10982         btrfs_release_path(&path);
10983
10984         /* Search from the block group bytenr */
10985         extent_key.objectid = bg_key.objectid;
10986         extent_key.type = 0;
10987         extent_key.offset = 0;
10988
10989         btrfs_init_path(&path);
10990         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10991         if (ret < 0)
10992                 goto out;
10993
10994         /* Iterate extent tree to account used space */
10995         while (1) {
10996                 leaf = path.nodes[0];
10997
10998                 /* Search slot can point to the last item beyond leaf nritems */
10999                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11000                         goto next;
11001
11002                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11003                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11004                         break;
11005
11006                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11007                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11008                         goto next;
11009                 if (extent_key.objectid < bg_key.objectid)
11010                         goto next;
11011
11012                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11013                         total += nodesize;
11014                 else
11015                         total += extent_key.offset;
11016
11017                 ei = btrfs_item_ptr(leaf, path.slots[0],
11018                                     struct btrfs_extent_item);
11019                 flags = btrfs_extent_flags(leaf, ei);
11020                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11021                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11022                                 error(
11023                         "bad extent[%llu, %llu) type mismatch with chunk",
11024                                         extent_key.objectid,
11025                                         extent_key.objectid + extent_key.offset);
11026                                 err |= CHUNK_TYPE_MISMATCH;
11027                         }
11028                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11029                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11030                                     BTRFS_BLOCK_GROUP_METADATA))) {
11031                                 error(
11032                         "bad extent[%llu, %llu) type mismatch with chunk",
11033                                         extent_key.objectid,
11034                                         extent_key.objectid + nodesize);
11035                                 err |= CHUNK_TYPE_MISMATCH;
11036                         }
11037                 }
11038 next:
11039                 ret = btrfs_next_item(extent_root, &path);
11040                 if (ret)
11041                         break;
11042         }
11043
11044 out:
11045         btrfs_release_path(&path);
11046
11047         if (total != used) {
11048                 error(
11049                 "block group[%llu %llu] used %llu but extent items used %llu",
11050                         bg_key.objectid, bg_key.offset, used, total);
11051                 err |= ACCOUNTING_MISMATCH;
11052         }
11053         return err;
11054 }
11055
11056 /*
11057  * Check a chunk item.
11058  * Including checking all referred dev_extents and block group
11059  */
11060 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11061                             struct extent_buffer *eb, int slot)
11062 {
11063         struct btrfs_root *extent_root = fs_info->extent_root;
11064         struct btrfs_root *dev_root = fs_info->dev_root;
11065         struct btrfs_path path;
11066         struct btrfs_key chunk_key;
11067         struct btrfs_key bg_key;
11068         struct btrfs_key devext_key;
11069         struct btrfs_chunk *chunk;
11070         struct extent_buffer *leaf;
11071         struct btrfs_block_group_item *bi;
11072         struct btrfs_block_group_item bg_item;
11073         struct btrfs_dev_extent *ptr;
11074         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11075         u64 length;
11076         u64 chunk_end;
11077         u64 type;
11078         u64 profile;
11079         int num_stripes;
11080         u64 offset;
11081         u64 objectid;
11082         int i;
11083         int ret;
11084         int err = 0;
11085
11086         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11087         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11088         length = btrfs_chunk_length(eb, chunk);
11089         chunk_end = chunk_key.offset + length;
11090         if (!IS_ALIGNED(length, sectorsize)) {
11091                 error("chunk[%llu %llu) not aligned to %u",
11092                         chunk_key.offset, chunk_end, sectorsize);
11093                 err |= BYTES_UNALIGNED;
11094                 goto out;
11095         }
11096
11097         type = btrfs_chunk_type(eb, chunk);
11098         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11099         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11100                 error("chunk[%llu %llu) has no chunk type",
11101                         chunk_key.offset, chunk_end);
11102                 err |= UNKNOWN_TYPE;
11103         }
11104         if (profile && (profile & (profile - 1))) {
11105                 error("chunk[%llu %llu) multiple profiles detected: %llx",
11106                         chunk_key.offset, chunk_end, profile);
11107                 err |= UNKNOWN_TYPE;
11108         }
11109
11110         bg_key.objectid = chunk_key.offset;
11111         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11112         bg_key.offset = length;
11113
11114         btrfs_init_path(&path);
11115         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11116         if (ret) {
11117                 error(
11118                 "chunk[%llu %llu) did not find the related block group item",
11119                         chunk_key.offset, chunk_end);
11120                 err |= REFERENCER_MISSING;
11121         } else{
11122                 leaf = path.nodes[0];
11123                 bi = btrfs_item_ptr(leaf, path.slots[0],
11124                                     struct btrfs_block_group_item);
11125                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11126                                    sizeof(bg_item));
11127                 if (btrfs_block_group_flags(&bg_item) != type) {
11128                         error(
11129 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11130                                 chunk_key.offset, chunk_end, type,
11131                                 btrfs_block_group_flags(&bg_item));
11132                         err |= REFERENCER_MISSING;
11133                 }
11134         }
11135
11136         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11137         for (i = 0; i < num_stripes; i++) {
11138                 btrfs_release_path(&path);
11139                 btrfs_init_path(&path);
11140                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11141                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11142                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11143
11144                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11145                                         0, 0);
11146                 if (ret)
11147                         goto not_match_dev;
11148
11149                 leaf = path.nodes[0];
11150                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11151                                      struct btrfs_dev_extent);
11152                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11153                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11154                 if (objectid != chunk_key.objectid ||
11155                     offset != chunk_key.offset ||
11156                     btrfs_dev_extent_length(leaf, ptr) != length)
11157                         goto not_match_dev;
11158                 continue;
11159 not_match_dev:
11160                 err |= BACKREF_MISSING;
11161                 error(
11162                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11163                         chunk_key.objectid, chunk_end, i);
11164                 continue;
11165         }
11166         btrfs_release_path(&path);
11167 out:
11168         return err;
11169 }
11170
11171 /*
11172  * Main entry function to check known items and update related accounting info
11173  */
11174 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11175 {
11176         struct btrfs_fs_info *fs_info = root->fs_info;
11177         struct btrfs_key key;
11178         int slot = 0;
11179         int type;
11180         struct btrfs_extent_data_ref *dref;
11181         int ret;
11182         int err = 0;
11183
11184 next:
11185         btrfs_item_key_to_cpu(eb, &key, slot);
11186         type = key.type;
11187
11188         switch (type) {
11189         case BTRFS_EXTENT_DATA_KEY:
11190                 ret = check_extent_data_item(root, eb, slot);
11191                 err |= ret;
11192                 break;
11193         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11194                 ret = check_block_group_item(fs_info, eb, slot);
11195                 err |= ret;
11196                 break;
11197         case BTRFS_DEV_ITEM_KEY:
11198                 ret = check_dev_item(fs_info, eb, slot);
11199                 err |= ret;
11200                 break;
11201         case BTRFS_CHUNK_ITEM_KEY:
11202                 ret = check_chunk_item(fs_info, eb, slot);
11203                 err |= ret;
11204                 break;
11205         case BTRFS_DEV_EXTENT_KEY:
11206                 ret = check_dev_extent_item(fs_info, eb, slot);
11207                 err |= ret;
11208                 break;
11209         case BTRFS_EXTENT_ITEM_KEY:
11210         case BTRFS_METADATA_ITEM_KEY:
11211                 ret = check_extent_item(fs_info, eb, slot);
11212                 err |= ret;
11213                 break;
11214         case BTRFS_EXTENT_CSUM_KEY:
11215                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11216                 break;
11217         case BTRFS_TREE_BLOCK_REF_KEY:
11218                 ret = check_tree_block_backref(fs_info, key.offset,
11219                                                key.objectid, -1);
11220                 err |= ret;
11221                 break;
11222         case BTRFS_EXTENT_DATA_REF_KEY:
11223                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11224                 ret = check_extent_data_backref(fs_info,
11225                                 btrfs_extent_data_ref_root(eb, dref),
11226                                 btrfs_extent_data_ref_objectid(eb, dref),
11227                                 btrfs_extent_data_ref_offset(eb, dref),
11228                                 key.objectid, 0,
11229                                 btrfs_extent_data_ref_count(eb, dref));
11230                 err |= ret;
11231                 break;
11232         case BTRFS_SHARED_BLOCK_REF_KEY:
11233                 ret = check_shared_block_backref(fs_info, key.offset,
11234                                                  key.objectid, -1);
11235                 err |= ret;
11236                 break;
11237         case BTRFS_SHARED_DATA_REF_KEY:
11238                 ret = check_shared_data_backref(fs_info, key.offset,
11239                                                 key.objectid);
11240                 err |= ret;
11241                 break;
11242         default:
11243                 break;
11244         }
11245
11246         if (++slot < btrfs_header_nritems(eb))
11247                 goto next;
11248
11249         return err;
11250 }
11251
11252 /*
11253  * Helper function for later fs/subvol tree check.  To determine if a tree
11254  * block should be checked.
11255  * This function will ensure only the direct referencer with lowest rootid to
11256  * check a fs/subvolume tree block.
11257  *
11258  * Backref check at extent tree would detect errors like missing subvolume
11259  * tree, so we can do aggressive check to reduce duplicated checks.
11260  */
11261 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11262 {
11263         struct btrfs_root *extent_root = root->fs_info->extent_root;
11264         struct btrfs_key key;
11265         struct btrfs_path path;
11266         struct extent_buffer *leaf;
11267         int slot;
11268         struct btrfs_extent_item *ei;
11269         unsigned long ptr;
11270         unsigned long end;
11271         int type;
11272         u32 item_size;
11273         u64 offset;
11274         struct btrfs_extent_inline_ref *iref;
11275         int ret;
11276
11277         btrfs_init_path(&path);
11278         key.objectid = btrfs_header_bytenr(eb);
11279         key.type = BTRFS_METADATA_ITEM_KEY;
11280         key.offset = (u64)-1;
11281
11282         /*
11283          * Any failure in backref resolving means we can't determine
11284          * whom the tree block belongs to.
11285          * So in that case, we need to check that tree block
11286          */
11287         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11288         if (ret < 0)
11289                 goto need_check;
11290
11291         ret = btrfs_previous_extent_item(extent_root, &path,
11292                                          btrfs_header_bytenr(eb));
11293         if (ret)
11294                 goto need_check;
11295
11296         leaf = path.nodes[0];
11297         slot = path.slots[0];
11298         btrfs_item_key_to_cpu(leaf, &key, slot);
11299         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11300
11301         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11302                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11303         } else {
11304                 struct btrfs_tree_block_info *info;
11305
11306                 info = (struct btrfs_tree_block_info *)(ei + 1);
11307                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11308         }
11309
11310         item_size = btrfs_item_size_nr(leaf, slot);
11311         ptr = (unsigned long)iref;
11312         end = (unsigned long)ei + item_size;
11313         while (ptr < end) {
11314                 iref = (struct btrfs_extent_inline_ref *)ptr;
11315                 type = btrfs_extent_inline_ref_type(leaf, iref);
11316                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11317
11318                 /*
11319                  * We only check the tree block if current root is
11320                  * the lowest referencer of it.
11321                  */
11322                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11323                     offset < root->objectid) {
11324                         btrfs_release_path(&path);
11325                         return 0;
11326                 }
11327
11328                 ptr += btrfs_extent_inline_ref_size(type);
11329         }
11330         /*
11331          * Normally we should also check keyed tree block ref, but that may be
11332          * very time consuming.  Inlined ref should already make us skip a lot
11333          * of refs now.  So skip search keyed tree block ref.
11334          */
11335
11336 need_check:
11337         btrfs_release_path(&path);
11338         return 1;
11339 }
11340
11341 /*
11342  * Traversal function for tree block. We will do:
11343  * 1) Skip shared fs/subvolume tree blocks
11344  * 2) Update related bytes accounting
11345  * 3) Pre-order traversal
11346  */
11347 static int traverse_tree_block(struct btrfs_root *root,
11348                                 struct extent_buffer *node)
11349 {
11350         struct extent_buffer *eb;
11351         struct btrfs_key key;
11352         struct btrfs_key drop_key;
11353         int level;
11354         u64 nr;
11355         int i;
11356         int err = 0;
11357         int ret;
11358
11359         /*
11360          * Skip shared fs/subvolume tree block, in that case they will
11361          * be checked by referencer with lowest rootid
11362          */
11363         if (is_fstree(root->objectid) && !should_check(root, node))
11364                 return 0;
11365
11366         /* Update bytes accounting */
11367         total_btree_bytes += node->len;
11368         if (fs_root_objectid(btrfs_header_owner(node)))
11369                 total_fs_tree_bytes += node->len;
11370         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11371                 total_extent_tree_bytes += node->len;
11372         if (!found_old_backref &&
11373             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11374             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11375             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11376                 found_old_backref = 1;
11377
11378         /* pre-order tranversal, check itself first */
11379         level = btrfs_header_level(node);
11380         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11381                                    btrfs_header_level(node),
11382                                    btrfs_header_owner(node));
11383         err |= ret;
11384         if (err)
11385                 error(
11386         "check %s failed root %llu bytenr %llu level %d, force continue check",
11387                         level ? "node":"leaf", root->objectid,
11388                         btrfs_header_bytenr(node), btrfs_header_level(node));
11389
11390         if (!level) {
11391                 btree_space_waste += btrfs_leaf_free_space(root, node);
11392                 ret = check_leaf_items(root, node);
11393                 err |= ret;
11394                 return err;
11395         }
11396
11397         nr = btrfs_header_nritems(node);
11398         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11399         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11400                 sizeof(struct btrfs_key_ptr);
11401
11402         /* Then check all its children */
11403         for (i = 0; i < nr; i++) {
11404                 u64 blocknr = btrfs_node_blockptr(node, i);
11405
11406                 btrfs_node_key_to_cpu(node, &key, i);
11407                 if (level == root->root_item.drop_level &&
11408                     is_dropped_key(&key, &drop_key))
11409                         continue;
11410
11411                 /*
11412                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11413                  * to call the function itself.
11414                  */
11415                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11416                 if (extent_buffer_uptodate(eb)) {
11417                         ret = traverse_tree_block(root, eb);
11418                         err |= ret;
11419                 }
11420                 free_extent_buffer(eb);
11421         }
11422
11423         return err;
11424 }
11425
11426 /*
11427  * Low memory usage version check_chunks_and_extents.
11428  */
11429 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11430 {
11431         struct btrfs_path path;
11432         struct btrfs_key key;
11433         struct btrfs_root *root1;
11434         struct btrfs_root *cur_root;
11435         int err = 0;
11436         int ret;
11437
11438         root1 = root->fs_info->chunk_root;
11439         ret = traverse_tree_block(root1, root1->node);
11440         err |= ret;
11441
11442         root1 = root->fs_info->tree_root;
11443         ret = traverse_tree_block(root1, root1->node);
11444         err |= ret;
11445
11446         btrfs_init_path(&path);
11447         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11448         key.offset = 0;
11449         key.type = BTRFS_ROOT_ITEM_KEY;
11450
11451         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11452         if (ret) {
11453                 error("cannot find extent treet in tree_root");
11454                 goto out;
11455         }
11456
11457         while (1) {
11458                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11459                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11460                         goto next;
11461                 key.offset = (u64)-1;
11462
11463                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11464                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11465                                         &key);
11466                 else
11467                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11468                 if (IS_ERR(cur_root) || !cur_root) {
11469                         error("failed to read tree: %lld", key.objectid);
11470                         goto next;
11471                 }
11472
11473                 ret = traverse_tree_block(cur_root, cur_root->node);
11474                 err |= ret;
11475
11476                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11477                         btrfs_free_fs_root(cur_root);
11478 next:
11479                 ret = btrfs_next_item(root1, &path);
11480                 if (ret)
11481                         goto out;
11482         }
11483
11484 out:
11485         btrfs_release_path(&path);
11486         return err;
11487 }
11488
11489 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11490                            struct btrfs_root *root, int overwrite)
11491 {
11492         struct extent_buffer *c;
11493         struct extent_buffer *old = root->node;
11494         int level;
11495         int ret;
11496         struct btrfs_disk_key disk_key = {0,0,0};
11497
11498         level = 0;
11499
11500         if (overwrite) {
11501                 c = old;
11502                 extent_buffer_get(c);
11503                 goto init;
11504         }
11505         c = btrfs_alloc_free_block(trans, root,
11506                                    root->nodesize,
11507                                    root->root_key.objectid,
11508                                    &disk_key, level, 0, 0);
11509         if (IS_ERR(c)) {
11510                 c = old;
11511                 extent_buffer_get(c);
11512                 overwrite = 1;
11513         }
11514 init:
11515         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11516         btrfs_set_header_level(c, level);
11517         btrfs_set_header_bytenr(c, c->start);
11518         btrfs_set_header_generation(c, trans->transid);
11519         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11520         btrfs_set_header_owner(c, root->root_key.objectid);
11521
11522         write_extent_buffer(c, root->fs_info->fsid,
11523                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11524
11525         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11526                             btrfs_header_chunk_tree_uuid(c),
11527                             BTRFS_UUID_SIZE);
11528
11529         btrfs_mark_buffer_dirty(c);
11530         /*
11531          * this case can happen in the following case:
11532          *
11533          * 1.overwrite previous root.
11534          *
11535          * 2.reinit reloc data root, this is because we skip pin
11536          * down reloc data tree before which means we can allocate
11537          * same block bytenr here.
11538          */
11539         if (old->start == c->start) {
11540                 btrfs_set_root_generation(&root->root_item,
11541                                           trans->transid);
11542                 root->root_item.level = btrfs_header_level(root->node);
11543                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11544                                         &root->root_key, &root->root_item);
11545                 if (ret) {
11546                         free_extent_buffer(c);
11547                         return ret;
11548                 }
11549         }
11550         free_extent_buffer(old);
11551         root->node = c;
11552         add_root_to_dirty_list(root);
11553         return 0;
11554 }
11555
11556 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11557                                 struct extent_buffer *eb, int tree_root)
11558 {
11559         struct extent_buffer *tmp;
11560         struct btrfs_root_item *ri;
11561         struct btrfs_key key;
11562         u64 bytenr;
11563         u32 nodesize;
11564         int level = btrfs_header_level(eb);
11565         int nritems;
11566         int ret;
11567         int i;
11568
11569         /*
11570          * If we have pinned this block before, don't pin it again.
11571          * This can not only avoid forever loop with broken filesystem
11572          * but also give us some speedups.
11573          */
11574         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11575                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11576                 return 0;
11577
11578         btrfs_pin_extent(fs_info, eb->start, eb->len);
11579
11580         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11581         nritems = btrfs_header_nritems(eb);
11582         for (i = 0; i < nritems; i++) {
11583                 if (level == 0) {
11584                         btrfs_item_key_to_cpu(eb, &key, i);
11585                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11586                                 continue;
11587                         /* Skip the extent root and reloc roots */
11588                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11589                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11590                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11591                                 continue;
11592                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11593                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11594
11595                         /*
11596                          * If at any point we start needing the real root we
11597                          * will have to build a stump root for the root we are
11598                          * in, but for now this doesn't actually use the root so
11599                          * just pass in extent_root.
11600                          */
11601                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11602                                               nodesize, 0);
11603                         if (!extent_buffer_uptodate(tmp)) {
11604                                 fprintf(stderr, "Error reading root block\n");
11605                                 return -EIO;
11606                         }
11607                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11608                         free_extent_buffer(tmp);
11609                         if (ret)
11610                                 return ret;
11611                 } else {
11612                         bytenr = btrfs_node_blockptr(eb, i);
11613
11614                         /* If we aren't the tree root don't read the block */
11615                         if (level == 1 && !tree_root) {
11616                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11617                                 continue;
11618                         }
11619
11620                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11621                                               nodesize, 0);
11622                         if (!extent_buffer_uptodate(tmp)) {
11623                                 fprintf(stderr, "Error reading tree block\n");
11624                                 return -EIO;
11625                         }
11626                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11627                         free_extent_buffer(tmp);
11628                         if (ret)
11629                                 return ret;
11630                 }
11631         }
11632
11633         return 0;
11634 }
11635
11636 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11637 {
11638         int ret;
11639
11640         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11641         if (ret)
11642                 return ret;
11643
11644         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11645 }
11646
11647 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11648 {
11649         struct btrfs_block_group_cache *cache;
11650         struct btrfs_path path;
11651         struct extent_buffer *leaf;
11652         struct btrfs_chunk *chunk;
11653         struct btrfs_key key;
11654         int ret;
11655         u64 start;
11656
11657         btrfs_init_path(&path);
11658         key.objectid = 0;
11659         key.type = BTRFS_CHUNK_ITEM_KEY;
11660         key.offset = 0;
11661         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11662         if (ret < 0) {
11663                 btrfs_release_path(&path);
11664                 return ret;
11665         }
11666
11667         /*
11668          * We do this in case the block groups were screwed up and had alloc
11669          * bits that aren't actually set on the chunks.  This happens with
11670          * restored images every time and could happen in real life I guess.
11671          */
11672         fs_info->avail_data_alloc_bits = 0;
11673         fs_info->avail_metadata_alloc_bits = 0;
11674         fs_info->avail_system_alloc_bits = 0;
11675
11676         /* First we need to create the in-memory block groups */
11677         while (1) {
11678                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11679                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11680                         if (ret < 0) {
11681                                 btrfs_release_path(&path);
11682                                 return ret;
11683                         }
11684                         if (ret) {
11685                                 ret = 0;
11686                                 break;
11687                         }
11688                 }
11689                 leaf = path.nodes[0];
11690                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11691                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11692                         path.slots[0]++;
11693                         continue;
11694                 }
11695
11696                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11697                 btrfs_add_block_group(fs_info, 0,
11698                                       btrfs_chunk_type(leaf, chunk),
11699                                       key.objectid, key.offset,
11700                                       btrfs_chunk_length(leaf, chunk));
11701                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11702                                  key.offset + btrfs_chunk_length(leaf, chunk));
11703                 path.slots[0]++;
11704         }
11705         start = 0;
11706         while (1) {
11707                 cache = btrfs_lookup_first_block_group(fs_info, start);
11708                 if (!cache)
11709                         break;
11710                 cache->cached = 1;
11711                 start = cache->key.objectid + cache->key.offset;
11712         }
11713
11714         btrfs_release_path(&path);
11715         return 0;
11716 }
11717
11718 static int reset_balance(struct btrfs_trans_handle *trans,
11719                          struct btrfs_fs_info *fs_info)
11720 {
11721         struct btrfs_root *root = fs_info->tree_root;
11722         struct btrfs_path path;
11723         struct extent_buffer *leaf;
11724         struct btrfs_key key;
11725         int del_slot, del_nr = 0;
11726         int ret;
11727         int found = 0;
11728
11729         btrfs_init_path(&path);
11730         key.objectid = BTRFS_BALANCE_OBJECTID;
11731         key.type = BTRFS_BALANCE_ITEM_KEY;
11732         key.offset = 0;
11733         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11734         if (ret) {
11735                 if (ret > 0)
11736                         ret = 0;
11737                 if (!ret)
11738                         goto reinit_data_reloc;
11739                 else
11740                         goto out;
11741         }
11742
11743         ret = btrfs_del_item(trans, root, &path);
11744         if (ret)
11745                 goto out;
11746         btrfs_release_path(&path);
11747
11748         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11749         key.type = BTRFS_ROOT_ITEM_KEY;
11750         key.offset = 0;
11751         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11752         if (ret < 0)
11753                 goto out;
11754         while (1) {
11755                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11756                         if (!found)
11757                                 break;
11758
11759                         if (del_nr) {
11760                                 ret = btrfs_del_items(trans, root, &path,
11761                                                       del_slot, del_nr);
11762                                 del_nr = 0;
11763                                 if (ret)
11764                                         goto out;
11765                         }
11766                         key.offset++;
11767                         btrfs_release_path(&path);
11768
11769                         found = 0;
11770                         ret = btrfs_search_slot(trans, root, &key, &path,
11771                                                 -1, 1);
11772                         if (ret < 0)
11773                                 goto out;
11774                         continue;
11775                 }
11776                 found = 1;
11777                 leaf = path.nodes[0];
11778                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11779                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11780                         break;
11781                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11782                         path.slots[0]++;
11783                         continue;
11784                 }
11785                 if (!del_nr) {
11786                         del_slot = path.slots[0];
11787                         del_nr = 1;
11788                 } else {
11789                         del_nr++;
11790                 }
11791                 path.slots[0]++;
11792         }
11793
11794         if (del_nr) {
11795                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11796                 if (ret)
11797                         goto out;
11798         }
11799         btrfs_release_path(&path);
11800
11801 reinit_data_reloc:
11802         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11803         key.type = BTRFS_ROOT_ITEM_KEY;
11804         key.offset = (u64)-1;
11805         root = btrfs_read_fs_root(fs_info, &key);
11806         if (IS_ERR(root)) {
11807                 fprintf(stderr, "Error reading data reloc tree\n");
11808                 ret = PTR_ERR(root);
11809                 goto out;
11810         }
11811         record_root_in_trans(trans, root);
11812         ret = btrfs_fsck_reinit_root(trans, root, 0);
11813         if (ret)
11814                 goto out;
11815         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11816 out:
11817         btrfs_release_path(&path);
11818         return ret;
11819 }
11820
11821 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11822                               struct btrfs_fs_info *fs_info)
11823 {
11824         u64 start = 0;
11825         int ret;
11826
11827         /*
11828          * The only reason we don't do this is because right now we're just
11829          * walking the trees we find and pinning down their bytes, we don't look
11830          * at any of the leaves.  In order to do mixed groups we'd have to check
11831          * the leaves of any fs roots and pin down the bytes for any file
11832          * extents we find.  Not hard but why do it if we don't have to?
11833          */
11834         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11835                 fprintf(stderr, "We don't support re-initing the extent tree "
11836                         "for mixed block groups yet, please notify a btrfs "
11837                         "developer you want to do this so they can add this "
11838                         "functionality.\n");
11839                 return -EINVAL;
11840         }
11841
11842         /*
11843          * first we need to walk all of the trees except the extent tree and pin
11844          * down the bytes that are in use so we don't overwrite any existing
11845          * metadata.
11846          */
11847         ret = pin_metadata_blocks(fs_info);
11848         if (ret) {
11849                 fprintf(stderr, "error pinning down used bytes\n");
11850                 return ret;
11851         }
11852
11853         /*
11854          * Need to drop all the block groups since we're going to recreate all
11855          * of them again.
11856          */
11857         btrfs_free_block_groups(fs_info);
11858         ret = reset_block_groups(fs_info);
11859         if (ret) {
11860                 fprintf(stderr, "error resetting the block groups\n");
11861                 return ret;
11862         }
11863
11864         /* Ok we can allocate now, reinit the extent root */
11865         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11866         if (ret) {
11867                 fprintf(stderr, "extent root initialization failed\n");
11868                 /*
11869                  * When the transaction code is updated we should end the
11870                  * transaction, but for now progs only knows about commit so
11871                  * just return an error.
11872                  */
11873                 return ret;
11874         }
11875
11876         /*
11877          * Now we have all the in-memory block groups setup so we can make
11878          * allocations properly, and the metadata we care about is safe since we
11879          * pinned all of it above.
11880          */
11881         while (1) {
11882                 struct btrfs_block_group_cache *cache;
11883
11884                 cache = btrfs_lookup_first_block_group(fs_info, start);
11885                 if (!cache)
11886                         break;
11887                 start = cache->key.objectid + cache->key.offset;
11888                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11889                                         &cache->key, &cache->item,
11890                                         sizeof(cache->item));
11891                 if (ret) {
11892                         fprintf(stderr, "Error adding block group\n");
11893                         return ret;
11894                 }
11895                 btrfs_extent_post_op(trans, fs_info->extent_root);
11896         }
11897
11898         ret = reset_balance(trans, fs_info);
11899         if (ret)
11900                 fprintf(stderr, "error resetting the pending balance\n");
11901
11902         return ret;
11903 }
11904
11905 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11906 {
11907         struct btrfs_path path;
11908         struct btrfs_trans_handle *trans;
11909         struct btrfs_key key;
11910         int ret;
11911
11912         printf("Recowing metadata block %llu\n", eb->start);
11913         key.objectid = btrfs_header_owner(eb);
11914         key.type = BTRFS_ROOT_ITEM_KEY;
11915         key.offset = (u64)-1;
11916
11917         root = btrfs_read_fs_root(root->fs_info, &key);
11918         if (IS_ERR(root)) {
11919                 fprintf(stderr, "Couldn't find owner root %llu\n",
11920                         key.objectid);
11921                 return PTR_ERR(root);
11922         }
11923
11924         trans = btrfs_start_transaction(root, 1);
11925         if (IS_ERR(trans))
11926                 return PTR_ERR(trans);
11927
11928         btrfs_init_path(&path);
11929         path.lowest_level = btrfs_header_level(eb);
11930         if (path.lowest_level)
11931                 btrfs_node_key_to_cpu(eb, &key, 0);
11932         else
11933                 btrfs_item_key_to_cpu(eb, &key, 0);
11934
11935         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11936         btrfs_commit_transaction(trans, root);
11937         btrfs_release_path(&path);
11938         return ret;
11939 }
11940
11941 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11942 {
11943         struct btrfs_path path;
11944         struct btrfs_trans_handle *trans;
11945         struct btrfs_key key;
11946         int ret;
11947
11948         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11949                bad->key.type, bad->key.offset);
11950         key.objectid = bad->root_id;
11951         key.type = BTRFS_ROOT_ITEM_KEY;
11952         key.offset = (u64)-1;
11953
11954         root = btrfs_read_fs_root(root->fs_info, &key);
11955         if (IS_ERR(root)) {
11956                 fprintf(stderr, "Couldn't find owner root %llu\n",
11957                         key.objectid);
11958                 return PTR_ERR(root);
11959         }
11960
11961         trans = btrfs_start_transaction(root, 1);
11962         if (IS_ERR(trans))
11963                 return PTR_ERR(trans);
11964
11965         btrfs_init_path(&path);
11966         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11967         if (ret) {
11968                 if (ret > 0)
11969                         ret = 0;
11970                 goto out;
11971         }
11972         ret = btrfs_del_item(trans, root, &path);
11973 out:
11974         btrfs_commit_transaction(trans, root);
11975         btrfs_release_path(&path);
11976         return ret;
11977 }
11978
11979 static int zero_log_tree(struct btrfs_root *root)
11980 {
11981         struct btrfs_trans_handle *trans;
11982         int ret;
11983
11984         trans = btrfs_start_transaction(root, 1);
11985         if (IS_ERR(trans)) {
11986                 ret = PTR_ERR(trans);
11987                 return ret;
11988         }
11989         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11990         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11991         ret = btrfs_commit_transaction(trans, root);
11992         return ret;
11993 }
11994
11995 static int populate_csum(struct btrfs_trans_handle *trans,
11996                          struct btrfs_root *csum_root, char *buf, u64 start,
11997                          u64 len)
11998 {
11999         u64 offset = 0;
12000         u64 sectorsize;
12001         int ret = 0;
12002
12003         while (offset < len) {
12004                 sectorsize = csum_root->sectorsize;
12005                 ret = read_extent_data(csum_root, buf, start + offset,
12006                                        &sectorsize, 0);
12007                 if (ret)
12008                         break;
12009                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12010                                             start + offset, buf, sectorsize);
12011                 if (ret)
12012                         break;
12013                 offset += sectorsize;
12014         }
12015         return ret;
12016 }
12017
12018 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12019                                       struct btrfs_root *csum_root,
12020                                       struct btrfs_root *cur_root)
12021 {
12022         struct btrfs_path path;
12023         struct btrfs_key key;
12024         struct extent_buffer *node;
12025         struct btrfs_file_extent_item *fi;
12026         char *buf = NULL;
12027         u64 start = 0;
12028         u64 len = 0;
12029         int slot = 0;
12030         int ret = 0;
12031
12032         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12033         if (!buf)
12034                 return -ENOMEM;
12035
12036         btrfs_init_path(&path);
12037         key.objectid = 0;
12038         key.offset = 0;
12039         key.type = 0;
12040         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12041         if (ret < 0)
12042                 goto out;
12043         /* Iterate all regular file extents and fill its csum */
12044         while (1) {
12045                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12046
12047                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12048                         goto next;
12049                 node = path.nodes[0];
12050                 slot = path.slots[0];
12051                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12052                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12053                         goto next;
12054                 start = btrfs_file_extent_disk_bytenr(node, fi);
12055                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12056
12057                 ret = populate_csum(trans, csum_root, buf, start, len);
12058                 if (ret == -EEXIST)
12059                         ret = 0;
12060                 if (ret < 0)
12061                         goto out;
12062 next:
12063                 /*
12064                  * TODO: if next leaf is corrupted, jump to nearest next valid
12065                  * leaf.
12066                  */
12067                 ret = btrfs_next_item(cur_root, &path);
12068                 if (ret < 0)
12069                         goto out;
12070                 if (ret > 0) {
12071                         ret = 0;
12072                         goto out;
12073                 }
12074         }
12075
12076 out:
12077         btrfs_release_path(&path);
12078         free(buf);
12079         return ret;
12080 }
12081
12082 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12083                                   struct btrfs_root *csum_root)
12084 {
12085         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12086         struct btrfs_path path;
12087         struct btrfs_root *tree_root = fs_info->tree_root;
12088         struct btrfs_root *cur_root;
12089         struct extent_buffer *node;
12090         struct btrfs_key key;
12091         int slot = 0;
12092         int ret = 0;
12093
12094         btrfs_init_path(&path);
12095         key.objectid = BTRFS_FS_TREE_OBJECTID;
12096         key.offset = 0;
12097         key.type = BTRFS_ROOT_ITEM_KEY;
12098         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12099         if (ret < 0)
12100                 goto out;
12101         if (ret > 0) {
12102                 ret = -ENOENT;
12103                 goto out;
12104         }
12105
12106         while (1) {
12107                 node = path.nodes[0];
12108                 slot = path.slots[0];
12109                 btrfs_item_key_to_cpu(node, &key, slot);
12110                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12111                         goto out;
12112                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12113                         goto next;
12114                 if (!is_fstree(key.objectid))
12115                         goto next;
12116                 key.offset = (u64)-1;
12117
12118                 cur_root = btrfs_read_fs_root(fs_info, &key);
12119                 if (IS_ERR(cur_root) || !cur_root) {
12120                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12121                                 key.objectid);
12122                         goto out;
12123                 }
12124                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12125                                 cur_root);
12126                 if (ret < 0)
12127                         goto out;
12128 next:
12129                 ret = btrfs_next_item(tree_root, &path);
12130                 if (ret > 0) {
12131                         ret = 0;
12132                         goto out;
12133                 }
12134                 if (ret < 0)
12135                         goto out;
12136         }
12137
12138 out:
12139         btrfs_release_path(&path);
12140         return ret;
12141 }
12142
12143 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12144                                       struct btrfs_root *csum_root)
12145 {
12146         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12147         struct btrfs_path path;
12148         struct btrfs_extent_item *ei;
12149         struct extent_buffer *leaf;
12150         char *buf;
12151         struct btrfs_key key;
12152         int ret;
12153
12154         btrfs_init_path(&path);
12155         key.objectid = 0;
12156         key.type = BTRFS_EXTENT_ITEM_KEY;
12157         key.offset = 0;
12158         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12159         if (ret < 0) {
12160                 btrfs_release_path(&path);
12161                 return ret;
12162         }
12163
12164         buf = malloc(csum_root->sectorsize);
12165         if (!buf) {
12166                 btrfs_release_path(&path);
12167                 return -ENOMEM;
12168         }
12169
12170         while (1) {
12171                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12172                         ret = btrfs_next_leaf(extent_root, &path);
12173                         if (ret < 0)
12174                                 break;
12175                         if (ret) {
12176                                 ret = 0;
12177                                 break;
12178                         }
12179                 }
12180                 leaf = path.nodes[0];
12181
12182                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12183                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12184                         path.slots[0]++;
12185                         continue;
12186                 }
12187
12188                 ei = btrfs_item_ptr(leaf, path.slots[0],
12189                                     struct btrfs_extent_item);
12190                 if (!(btrfs_extent_flags(leaf, ei) &
12191                       BTRFS_EXTENT_FLAG_DATA)) {
12192                         path.slots[0]++;
12193                         continue;
12194                 }
12195
12196                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12197                                     key.offset);
12198                 if (ret)
12199                         break;
12200                 path.slots[0]++;
12201         }
12202
12203         btrfs_release_path(&path);
12204         free(buf);
12205         return ret;
12206 }
12207
12208 /*
12209  * Recalculate the csum and put it into the csum tree.
12210  *
12211  * Extent tree init will wipe out all the extent info, so in that case, we
12212  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12213  * will use fs/subvol trees to init the csum tree.
12214  */
12215 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12216                           struct btrfs_root *csum_root,
12217                           int search_fs_tree)
12218 {
12219         if (search_fs_tree)
12220                 return fill_csum_tree_from_fs(trans, csum_root);
12221         else
12222                 return fill_csum_tree_from_extent(trans, csum_root);
12223 }
12224
12225 static void free_roots_info_cache(void)
12226 {
12227         if (!roots_info_cache)
12228                 return;
12229
12230         while (!cache_tree_empty(roots_info_cache)) {
12231                 struct cache_extent *entry;
12232                 struct root_item_info *rii;
12233
12234                 entry = first_cache_extent(roots_info_cache);
12235                 if (!entry)
12236                         break;
12237                 remove_cache_extent(roots_info_cache, entry);
12238                 rii = container_of(entry, struct root_item_info, cache_extent);
12239                 free(rii);
12240         }
12241
12242         free(roots_info_cache);
12243         roots_info_cache = NULL;
12244 }
12245
12246 static int build_roots_info_cache(struct btrfs_fs_info *info)
12247 {
12248         int ret = 0;
12249         struct btrfs_key key;
12250         struct extent_buffer *leaf;
12251         struct btrfs_path path;
12252
12253         if (!roots_info_cache) {
12254                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12255                 if (!roots_info_cache)
12256                         return -ENOMEM;
12257                 cache_tree_init(roots_info_cache);
12258         }
12259
12260         btrfs_init_path(&path);
12261         key.objectid = 0;
12262         key.type = BTRFS_EXTENT_ITEM_KEY;
12263         key.offset = 0;
12264         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12265         if (ret < 0)
12266                 goto out;
12267         leaf = path.nodes[0];
12268
12269         while (1) {
12270                 struct btrfs_key found_key;
12271                 struct btrfs_extent_item *ei;
12272                 struct btrfs_extent_inline_ref *iref;
12273                 int slot = path.slots[0];
12274                 int type;
12275                 u64 flags;
12276                 u64 root_id;
12277                 u8 level;
12278                 struct cache_extent *entry;
12279                 struct root_item_info *rii;
12280
12281                 if (slot >= btrfs_header_nritems(leaf)) {
12282                         ret = btrfs_next_leaf(info->extent_root, &path);
12283                         if (ret < 0) {
12284                                 break;
12285                         } else if (ret) {
12286                                 ret = 0;
12287                                 break;
12288                         }
12289                         leaf = path.nodes[0];
12290                         slot = path.slots[0];
12291                 }
12292
12293                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12294
12295                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12296                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12297                         goto next;
12298
12299                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12300                 flags = btrfs_extent_flags(leaf, ei);
12301
12302                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12303                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12304                         goto next;
12305
12306                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12307                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12308                         level = found_key.offset;
12309                 } else {
12310                         struct btrfs_tree_block_info *binfo;
12311
12312                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12313                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12314                         level = btrfs_tree_block_level(leaf, binfo);
12315                 }
12316
12317                 /*
12318                  * For a root extent, it must be of the following type and the
12319                  * first (and only one) iref in the item.
12320                  */
12321                 type = btrfs_extent_inline_ref_type(leaf, iref);
12322                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12323                         goto next;
12324
12325                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12326                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12327                 if (!entry) {
12328                         rii = malloc(sizeof(struct root_item_info));
12329                         if (!rii) {
12330                                 ret = -ENOMEM;
12331                                 goto out;
12332                         }
12333                         rii->cache_extent.start = root_id;
12334                         rii->cache_extent.size = 1;
12335                         rii->level = (u8)-1;
12336                         entry = &rii->cache_extent;
12337                         ret = insert_cache_extent(roots_info_cache, entry);
12338                         ASSERT(ret == 0);
12339                 } else {
12340                         rii = container_of(entry, struct root_item_info,
12341                                            cache_extent);
12342                 }
12343
12344                 ASSERT(rii->cache_extent.start == root_id);
12345                 ASSERT(rii->cache_extent.size == 1);
12346
12347                 if (level > rii->level || rii->level == (u8)-1) {
12348                         rii->level = level;
12349                         rii->bytenr = found_key.objectid;
12350                         rii->gen = btrfs_extent_generation(leaf, ei);
12351                         rii->node_count = 1;
12352                 } else if (level == rii->level) {
12353                         rii->node_count++;
12354                 }
12355 next:
12356                 path.slots[0]++;
12357         }
12358
12359 out:
12360         btrfs_release_path(&path);
12361
12362         return ret;
12363 }
12364
12365 static int maybe_repair_root_item(struct btrfs_path *path,
12366                                   const struct btrfs_key *root_key,
12367                                   const int read_only_mode)
12368 {
12369         const u64 root_id = root_key->objectid;
12370         struct cache_extent *entry;
12371         struct root_item_info *rii;
12372         struct btrfs_root_item ri;
12373         unsigned long offset;
12374
12375         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12376         if (!entry) {
12377                 fprintf(stderr,
12378                         "Error: could not find extent items for root %llu\n",
12379                         root_key->objectid);
12380                 return -ENOENT;
12381         }
12382
12383         rii = container_of(entry, struct root_item_info, cache_extent);
12384         ASSERT(rii->cache_extent.start == root_id);
12385         ASSERT(rii->cache_extent.size == 1);
12386
12387         if (rii->node_count != 1) {
12388                 fprintf(stderr,
12389                         "Error: could not find btree root extent for root %llu\n",
12390                         root_id);
12391                 return -ENOENT;
12392         }
12393
12394         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12395         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12396
12397         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12398             btrfs_root_level(&ri) != rii->level ||
12399             btrfs_root_generation(&ri) != rii->gen) {
12400
12401                 /*
12402                  * If we're in repair mode but our caller told us to not update
12403                  * the root item, i.e. just check if it needs to be updated, don't
12404                  * print this message, since the caller will call us again shortly
12405                  * for the same root item without read only mode (the caller will
12406                  * open a transaction first).
12407                  */
12408                 if (!(read_only_mode && repair))
12409                         fprintf(stderr,
12410                                 "%sroot item for root %llu,"
12411                                 " current bytenr %llu, current gen %llu, current level %u,"
12412                                 " new bytenr %llu, new gen %llu, new level %u\n",
12413                                 (read_only_mode ? "" : "fixing "),
12414                                 root_id,
12415                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12416                                 btrfs_root_level(&ri),
12417                                 rii->bytenr, rii->gen, rii->level);
12418
12419                 if (btrfs_root_generation(&ri) > rii->gen) {
12420                         fprintf(stderr,
12421                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12422                                 root_id, btrfs_root_generation(&ri), rii->gen);
12423                         return -EINVAL;
12424                 }
12425
12426                 if (!read_only_mode) {
12427                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12428                         btrfs_set_root_level(&ri, rii->level);
12429                         btrfs_set_root_generation(&ri, rii->gen);
12430                         write_extent_buffer(path->nodes[0], &ri,
12431                                             offset, sizeof(ri));
12432                 }
12433
12434                 return 1;
12435         }
12436
12437         return 0;
12438 }
12439
12440 /*
12441  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12442  * caused read-only snapshots to be corrupted if they were created at a moment
12443  * when the source subvolume/snapshot had orphan items. The issue was that the
12444  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12445  * node instead of the post orphan cleanup root node.
12446  * So this function, and its callees, just detects and fixes those cases. Even
12447  * though the regression was for read-only snapshots, this function applies to
12448  * any snapshot/subvolume root.
12449  * This must be run before any other repair code - not doing it so, makes other
12450  * repair code delete or modify backrefs in the extent tree for example, which
12451  * will result in an inconsistent fs after repairing the root items.
12452  */
12453 static int repair_root_items(struct btrfs_fs_info *info)
12454 {
12455         struct btrfs_path path;
12456         struct btrfs_key key;
12457         struct extent_buffer *leaf;
12458         struct btrfs_trans_handle *trans = NULL;
12459         int ret = 0;
12460         int bad_roots = 0;
12461         int need_trans = 0;
12462
12463         btrfs_init_path(&path);
12464
12465         ret = build_roots_info_cache(info);
12466         if (ret)
12467                 goto out;
12468
12469         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12470         key.type = BTRFS_ROOT_ITEM_KEY;
12471         key.offset = 0;
12472
12473 again:
12474         /*
12475          * Avoid opening and committing transactions if a leaf doesn't have
12476          * any root items that need to be fixed, so that we avoid rotating
12477          * backup roots unnecessarily.
12478          */
12479         if (need_trans) {
12480                 trans = btrfs_start_transaction(info->tree_root, 1);
12481                 if (IS_ERR(trans)) {
12482                         ret = PTR_ERR(trans);
12483                         goto out;
12484                 }
12485         }
12486
12487         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12488                                 0, trans ? 1 : 0);
12489         if (ret < 0)
12490                 goto out;
12491         leaf = path.nodes[0];
12492
12493         while (1) {
12494                 struct btrfs_key found_key;
12495
12496                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12497                         int no_more_keys = find_next_key(&path, &key);
12498
12499                         btrfs_release_path(&path);
12500                         if (trans) {
12501                                 ret = btrfs_commit_transaction(trans,
12502                                                                info->tree_root);
12503                                 trans = NULL;
12504                                 if (ret < 0)
12505                                         goto out;
12506                         }
12507                         need_trans = 0;
12508                         if (no_more_keys)
12509                                 break;
12510                         goto again;
12511                 }
12512
12513                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12514
12515                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12516                         goto next;
12517                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12518                         goto next;
12519
12520                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12521                 if (ret < 0)
12522                         goto out;
12523                 if (ret) {
12524                         if (!trans && repair) {
12525                                 need_trans = 1;
12526                                 key = found_key;
12527                                 btrfs_release_path(&path);
12528                                 goto again;
12529                         }
12530                         bad_roots++;
12531                 }
12532 next:
12533                 path.slots[0]++;
12534         }
12535         ret = 0;
12536 out:
12537         free_roots_info_cache();
12538         btrfs_release_path(&path);
12539         if (trans)
12540                 btrfs_commit_transaction(trans, info->tree_root);
12541         if (ret < 0)
12542                 return ret;
12543
12544         return bad_roots;
12545 }
12546
12547 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12548 {
12549         struct btrfs_trans_handle *trans;
12550         struct btrfs_block_group_cache *bg_cache;
12551         u64 current = 0;
12552         int ret = 0;
12553
12554         /* Clear all free space cache inodes and its extent data */
12555         while (1) {
12556                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12557                 if (!bg_cache)
12558                         break;
12559                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12560                 if (ret < 0)
12561                         return ret;
12562                 current = bg_cache->key.objectid + bg_cache->key.offset;
12563         }
12564
12565         /* Don't forget to set cache_generation to -1 */
12566         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12567         if (IS_ERR(trans)) {
12568                 error("failed to update super block cache generation");
12569                 return PTR_ERR(trans);
12570         }
12571         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12572         btrfs_commit_transaction(trans, fs_info->tree_root);
12573
12574         return ret;
12575 }
12576
12577 const char * const cmd_check_usage[] = {
12578         "btrfs check [options] <device>",
12579         "Check structural integrity of a filesystem (unmounted).",
12580         "Check structural integrity of an unmounted filesystem. Verify internal",
12581         "trees' consistency and item connectivity. In the repair mode try to",
12582         "fix the problems found. ",
12583         "WARNING: the repair mode is considered dangerous",
12584         "",
12585         "-s|--super <superblock>     use this superblock copy",
12586         "-b|--backup                 use the first valid backup root copy",
12587         "--repair                    try to repair the filesystem",
12588         "--readonly                  run in read-only mode (default)",
12589         "--init-csum-tree            create a new CRC tree",
12590         "--init-extent-tree          create a new extent tree",
12591         "--mode <MODE>               allows choice of memory/IO trade-offs",
12592         "                            where MODE is one of:",
12593         "                            original - read inodes and extents to memory (requires",
12594         "                                       more memory, does less IO)",
12595         "                            lowmem   - try to use less memory but read blocks again",
12596         "                                       when needed",
12597         "--check-data-csum           verify checksums of data blocks",
12598         "-Q|--qgroup-report          print a report on qgroup consistency",
12599         "-E|--subvol-extents <subvolid>",
12600         "                            print subvolume extents and sharing state",
12601         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12602         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12603         "-p|--progress               indicate progress",
12604         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12605         NULL
12606 };
12607
12608 int cmd_check(int argc, char **argv)
12609 {
12610         struct cache_tree root_cache;
12611         struct btrfs_root *root;
12612         struct btrfs_fs_info *info;
12613         u64 bytenr = 0;
12614         u64 subvolid = 0;
12615         u64 tree_root_bytenr = 0;
12616         u64 chunk_root_bytenr = 0;
12617         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12618         int ret;
12619         int err = 0;
12620         u64 num;
12621         int init_csum_tree = 0;
12622         int readonly = 0;
12623         int clear_space_cache = 0;
12624         int qgroup_report = 0;
12625         int qgroups_repaired = 0;
12626         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12627
12628         while(1) {
12629                 int c;
12630                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12631                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12632                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12633                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12634                 static const struct option long_options[] = {
12635                         { "super", required_argument, NULL, 's' },
12636                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12637                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12638                         { "init-csum-tree", no_argument, NULL,
12639                                 GETOPT_VAL_INIT_CSUM },
12640                         { "init-extent-tree", no_argument, NULL,
12641                                 GETOPT_VAL_INIT_EXTENT },
12642                         { "check-data-csum", no_argument, NULL,
12643                                 GETOPT_VAL_CHECK_CSUM },
12644                         { "backup", no_argument, NULL, 'b' },
12645                         { "subvol-extents", required_argument, NULL, 'E' },
12646                         { "qgroup-report", no_argument, NULL, 'Q' },
12647                         { "tree-root", required_argument, NULL, 'r' },
12648                         { "chunk-root", required_argument, NULL,
12649                                 GETOPT_VAL_CHUNK_TREE },
12650                         { "progress", no_argument, NULL, 'p' },
12651                         { "mode", required_argument, NULL,
12652                                 GETOPT_VAL_MODE },
12653                         { "clear-space-cache", required_argument, NULL,
12654                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12655                         { NULL, 0, NULL, 0}
12656                 };
12657
12658                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12659                 if (c < 0)
12660                         break;
12661                 switch(c) {
12662                         case 'a': /* ignored */ break;
12663                         case 'b':
12664                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12665                                 break;
12666                         case 's':
12667                                 num = arg_strtou64(optarg);
12668                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12669                                         error(
12670                                         "super mirror should be less than %d",
12671                                                 BTRFS_SUPER_MIRROR_MAX);
12672                                         exit(1);
12673                                 }
12674                                 bytenr = btrfs_sb_offset(((int)num));
12675                                 printf("using SB copy %llu, bytenr %llu\n", num,
12676                                        (unsigned long long)bytenr);
12677                                 break;
12678                         case 'Q':
12679                                 qgroup_report = 1;
12680                                 break;
12681                         case 'E':
12682                                 subvolid = arg_strtou64(optarg);
12683                                 break;
12684                         case 'r':
12685                                 tree_root_bytenr = arg_strtou64(optarg);
12686                                 break;
12687                         case GETOPT_VAL_CHUNK_TREE:
12688                                 chunk_root_bytenr = arg_strtou64(optarg);
12689                                 break;
12690                         case 'p':
12691                                 ctx.progress_enabled = true;
12692                                 break;
12693                         case '?':
12694                         case 'h':
12695                                 usage(cmd_check_usage);
12696                         case GETOPT_VAL_REPAIR:
12697                                 printf("enabling repair mode\n");
12698                                 repair = 1;
12699                                 ctree_flags |= OPEN_CTREE_WRITES;
12700                                 break;
12701                         case GETOPT_VAL_READONLY:
12702                                 readonly = 1;
12703                                 break;
12704                         case GETOPT_VAL_INIT_CSUM:
12705                                 printf("Creating a new CRC tree\n");
12706                                 init_csum_tree = 1;
12707                                 repair = 1;
12708                                 ctree_flags |= OPEN_CTREE_WRITES;
12709                                 break;
12710                         case GETOPT_VAL_INIT_EXTENT:
12711                                 init_extent_tree = 1;
12712                                 ctree_flags |= (OPEN_CTREE_WRITES |
12713                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12714                                 repair = 1;
12715                                 break;
12716                         case GETOPT_VAL_CHECK_CSUM:
12717                                 check_data_csum = 1;
12718                                 break;
12719                         case GETOPT_VAL_MODE:
12720                                 check_mode = parse_check_mode(optarg);
12721                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12722                                         error("unknown mode: %s", optarg);
12723                                         exit(1);
12724                                 }
12725                                 break;
12726                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12727                                 if (strcmp(optarg, "v1") == 0) {
12728                                         clear_space_cache = 1;
12729                                 } else if (strcmp(optarg, "v2") == 0) {
12730                                         clear_space_cache = 2;
12731                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12732                                 } else {
12733                                         error(
12734                 "invalid argument to --clear-space-cache, must be v1 or v2");
12735                                         exit(1);
12736                                 }
12737                                 ctree_flags |= OPEN_CTREE_WRITES;
12738                                 break;
12739                 }
12740         }
12741
12742         if (check_argc_exact(argc - optind, 1))
12743                 usage(cmd_check_usage);
12744
12745         if (ctx.progress_enabled) {
12746                 ctx.tp = TASK_NOTHING;
12747                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12748         }
12749
12750         /* This check is the only reason for --readonly to exist */
12751         if (readonly && repair) {
12752                 error("repair options are not compatible with --readonly");
12753                 exit(1);
12754         }
12755
12756         /*
12757          * Not supported yet
12758          */
12759         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12760                 error("low memory mode doesn't support repair yet");
12761                 exit(1);
12762         }
12763
12764         radix_tree_init();
12765         cache_tree_init(&root_cache);
12766
12767         if((ret = check_mounted(argv[optind])) < 0) {
12768                 error("could not check mount status: %s", strerror(-ret));
12769                 err |= !!ret;
12770                 goto err_out;
12771         } else if(ret) {
12772                 error("%s is currently mounted, aborting", argv[optind]);
12773                 ret = -EBUSY;
12774                 err |= !!ret;
12775                 goto err_out;
12776         }
12777
12778         /* only allow partial opening under repair mode */
12779         if (repair)
12780                 ctree_flags |= OPEN_CTREE_PARTIAL;
12781
12782         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12783                                   chunk_root_bytenr, ctree_flags);
12784         if (!info) {
12785                 error("cannot open file system");
12786                 ret = -EIO;
12787                 err |= !!ret;
12788                 goto err_out;
12789         }
12790
12791         global_info = info;
12792         root = info->fs_root;
12793         if (clear_space_cache == 1) {
12794                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12795                         error(
12796                 "free space cache v2 detected, use --clear-space-cache v2");
12797                         ret = 1;
12798                         goto close_out;
12799                 }
12800                 printf("Clearing free space cache\n");
12801                 ret = clear_free_space_cache(info);
12802                 if (ret) {
12803                         error("failed to clear free space cache");
12804                         ret = 1;
12805                 } else {
12806                         printf("Free space cache cleared\n");
12807                 }
12808                 goto close_out;
12809         } else if (clear_space_cache == 2) {
12810                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12811                         printf("no free space cache v2 to clear\n");
12812                         ret = 0;
12813                         goto close_out;
12814                 }
12815                 printf("Clear free space cache v2\n");
12816                 ret = btrfs_clear_free_space_tree(info);
12817                 if (ret) {
12818                         error("failed to clear free space cache v2: %d", ret);
12819                         ret = 1;
12820                 } else {
12821                         printf("free space cache v2 cleared\n");
12822                 }
12823                 goto close_out;
12824         }
12825
12826         /*
12827          * repair mode will force us to commit transaction which
12828          * will make us fail to load log tree when mounting.
12829          */
12830         if (repair && btrfs_super_log_root(info->super_copy)) {
12831                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12832                 if (!ret) {
12833                         ret = 1;
12834                         err |= !!ret;
12835                         goto close_out;
12836                 }
12837                 ret = zero_log_tree(root);
12838                 err |= !!ret;
12839                 if (ret) {
12840                         error("failed to zero log tree: %d", ret);
12841                         goto close_out;
12842                 }
12843         }
12844
12845         uuid_unparse(info->super_copy->fsid, uuidbuf);
12846         if (qgroup_report) {
12847                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12848                        uuidbuf);
12849                 ret = qgroup_verify_all(info);
12850                 err |= !!ret;
12851                 if (ret == 0)
12852                         report_qgroups(1);
12853                 goto close_out;
12854         }
12855         if (subvolid) {
12856                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12857                        subvolid, argv[optind], uuidbuf);
12858                 ret = print_extent_state(info, subvolid);
12859                 err |= !!ret;
12860                 goto close_out;
12861         }
12862         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12863
12864         if (!extent_buffer_uptodate(info->tree_root->node) ||
12865             !extent_buffer_uptodate(info->dev_root->node) ||
12866             !extent_buffer_uptodate(info->chunk_root->node)) {
12867                 error("critical roots corrupted, unable to check the filesystem");
12868                 err |= !!ret;
12869                 ret = -EIO;
12870                 goto close_out;
12871         }
12872
12873         if (init_extent_tree || init_csum_tree) {
12874                 struct btrfs_trans_handle *trans;
12875
12876                 trans = btrfs_start_transaction(info->extent_root, 0);
12877                 if (IS_ERR(trans)) {
12878                         error("error starting transaction");
12879                         ret = PTR_ERR(trans);
12880                         err |= !!ret;
12881                         goto close_out;
12882                 }
12883
12884                 if (init_extent_tree) {
12885                         printf("Creating a new extent tree\n");
12886                         ret = reinit_extent_tree(trans, info);
12887                         err |= !!ret;
12888                         if (ret)
12889                                 goto close_out;
12890                 }
12891
12892                 if (init_csum_tree) {
12893                         printf("Reinitialize checksum tree\n");
12894                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12895                         if (ret) {
12896                                 error("checksum tree initialization failed: %d",
12897                                                 ret);
12898                                 ret = -EIO;
12899                                 err |= !!ret;
12900                                 goto close_out;
12901                         }
12902
12903                         ret = fill_csum_tree(trans, info->csum_root,
12904                                              init_extent_tree);
12905                         err |= !!ret;
12906                         if (ret) {
12907                                 error("checksum tree refilling failed: %d", ret);
12908                                 return -EIO;
12909                         }
12910                 }
12911                 /*
12912                  * Ok now we commit and run the normal fsck, which will add
12913                  * extent entries for all of the items it finds.
12914                  */
12915                 ret = btrfs_commit_transaction(trans, info->extent_root);
12916                 err |= !!ret;
12917                 if (ret)
12918                         goto close_out;
12919         }
12920         if (!extent_buffer_uptodate(info->extent_root->node)) {
12921                 error("critical: extent_root, unable to check the filesystem");
12922                 ret = -EIO;
12923                 err |= !!ret;
12924                 goto close_out;
12925         }
12926         if (!extent_buffer_uptodate(info->csum_root->node)) {
12927                 error("critical: csum_root, unable to check the filesystem");
12928                 ret = -EIO;
12929                 err |= !!ret;
12930                 goto close_out;
12931         }
12932
12933         if (!ctx.progress_enabled)
12934                 fprintf(stderr, "checking extents\n");
12935         if (check_mode == CHECK_MODE_LOWMEM)
12936                 ret = check_chunks_and_extents_v2(root);
12937         else
12938                 ret = check_chunks_and_extents(root);
12939         err |= !!ret;
12940         if (ret)
12941                 error(
12942                 "errors found in extent allocation tree or chunk allocation");
12943
12944         ret = repair_root_items(info);
12945         err |= !!ret;
12946         if (ret < 0) {
12947                 error("failed to repair root items: %s", strerror(-ret));
12948                 goto close_out;
12949         }
12950         if (repair) {
12951                 fprintf(stderr, "Fixed %d roots.\n", ret);
12952                 ret = 0;
12953         } else if (ret > 0) {
12954                 fprintf(stderr,
12955                        "Found %d roots with an outdated root item.\n",
12956                        ret);
12957                 fprintf(stderr,
12958                         "Please run a filesystem check with the option --repair to fix them.\n");
12959                 ret = 1;
12960                 err |= !!ret;
12961                 goto close_out;
12962         }
12963
12964         if (!ctx.progress_enabled) {
12965                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
12966                         fprintf(stderr, "checking free space tree\n");
12967                 else
12968                         fprintf(stderr, "checking free space cache\n");
12969         }
12970         ret = check_space_cache(root);
12971         err |= !!ret;
12972         if (ret) {
12973                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
12974                         error("errors found in free space tree");
12975                 else
12976                         error("errors found in free space cache");
12977                 goto out;
12978         }
12979
12980         /*
12981          * We used to have to have these hole extents in between our real
12982          * extents so if we don't have this flag set we need to make sure there
12983          * are no gaps in the file extents for inodes, otherwise we can just
12984          * ignore it when this happens.
12985          */
12986         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
12987         if (!ctx.progress_enabled)
12988                 fprintf(stderr, "checking fs roots\n");
12989         if (check_mode == CHECK_MODE_LOWMEM)
12990                 ret = check_fs_roots_v2(root->fs_info);
12991         else
12992                 ret = check_fs_roots(root, &root_cache);
12993         err |= !!ret;
12994         if (ret) {
12995                 error("errors found in fs roots");
12996                 goto out;
12997         }
12998
12999         fprintf(stderr, "checking csums\n");
13000         ret = check_csums(root);
13001         err |= !!ret;
13002         if (ret) {
13003                 error("errors found in csum tree");
13004                 goto out;
13005         }
13006
13007         fprintf(stderr, "checking root refs\n");
13008         /* For low memory mode, check_fs_roots_v2 handles root refs */
13009         if (check_mode != CHECK_MODE_LOWMEM) {
13010                 ret = check_root_refs(root, &root_cache);
13011                 err |= !!ret;
13012                 if (ret) {
13013                         error("errors found in root refs");
13014                         goto out;
13015                 }
13016         }
13017
13018         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13019                 struct extent_buffer *eb;
13020
13021                 eb = list_first_entry(&root->fs_info->recow_ebs,
13022                                       struct extent_buffer, recow);
13023                 list_del_init(&eb->recow);
13024                 ret = recow_extent_buffer(root, eb);
13025                 err |= !!ret;
13026                 if (ret) {
13027                         error("fails to fix transid errors");
13028                         break;
13029                 }
13030         }
13031
13032         while (!list_empty(&delete_items)) {
13033                 struct bad_item *bad;
13034
13035                 bad = list_first_entry(&delete_items, struct bad_item, list);
13036                 list_del_init(&bad->list);
13037                 if (repair) {
13038                         ret = delete_bad_item(root, bad);
13039                         err |= !!ret;
13040                 }
13041                 free(bad);
13042         }
13043
13044         if (info->quota_enabled) {
13045                 fprintf(stderr, "checking quota groups\n");
13046                 ret = qgroup_verify_all(info);
13047                 err |= !!ret;
13048                 if (ret) {
13049                         error("failed to check quota groups");
13050                         goto out;
13051                 }
13052                 report_qgroups(0);
13053                 ret = repair_qgroups(info, &qgroups_repaired);
13054                 err |= !!ret;
13055                 if (err) {
13056                         error("failed to repair quota groups");
13057                         goto out;
13058                 }
13059                 ret = 0;
13060         }
13061
13062         if (!list_empty(&root->fs_info->recow_ebs)) {
13063                 error("transid errors in file system");
13064                 ret = 1;
13065                 err |= !!ret;
13066         }
13067 out:
13068         if (found_old_backref) { /*
13069                  * there was a disk format change when mixed
13070                  * backref was in testing tree. The old format
13071                  * existed about one week.
13072                  */
13073                 printf("\n * Found old mixed backref format. "
13074                        "The old format is not supported! *"
13075                        "\n * Please mount the FS in readonly mode, "
13076                        "backup data and re-format the FS. *\n\n");
13077                 err |= 1;
13078         }
13079         printf("found %llu bytes used, ",
13080                (unsigned long long)bytes_used);
13081         if (err)
13082                 printf("error(s) found\n");
13083         else
13084                 printf("no error found\n");
13085         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13086         printf("total tree bytes: %llu\n",
13087                (unsigned long long)total_btree_bytes);
13088         printf("total fs tree bytes: %llu\n",
13089                (unsigned long long)total_fs_tree_bytes);
13090         printf("total extent tree bytes: %llu\n",
13091                (unsigned long long)total_extent_tree_bytes);
13092         printf("btree space waste bytes: %llu\n",
13093                (unsigned long long)btree_space_waste);
13094         printf("file data blocks allocated: %llu\n referenced %llu\n",
13095                 (unsigned long long)data_bytes_allocated,
13096                 (unsigned long long)data_bytes_referenced);
13097
13098         free_qgroup_counts();
13099         free_root_recs_tree(&root_cache);
13100 close_out:
13101         close_ctree(root);
13102 err_out:
13103         if (ctx.progress_enabled)
13104                 task_deinit(ctx.info);
13105
13106         return err;
13107 }