btrfs-progs: move help defines to own header
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         int level_size;
230         struct btrfs_key drop_key;
231 };
232
233 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
235 #define REF_ERR_NO_INODE_REF            (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
238 #define REF_ERR_DUP_INODE_REF           (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF             (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
246
247 struct file_extent_hole {
248         struct rb_node node;
249         u64 start;
250         u64 len;
251 };
252
253 struct inode_record {
254         struct list_head backrefs;
255         unsigned int checked:1;
256         unsigned int merging:1;
257         unsigned int found_inode_item:1;
258         unsigned int found_dir_item:1;
259         unsigned int found_file_extent:1;
260         unsigned int found_csum_item:1;
261         unsigned int some_csum_missing:1;
262         unsigned int nodatasum:1;
263         int errors;
264
265         u64 ino;
266         u32 nlink;
267         u32 imode;
268         u64 isize;
269         u64 nbytes;
270
271         u32 found_link;
272         u64 found_size;
273         u64 extent_start;
274         u64 extent_end;
275         struct rb_root holes;
276         struct list_head orphan_extents;
277
278         u32 refs;
279 };
280
281 #define I_ERR_NO_INODE_ITEM             (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
296
297 struct root_backref {
298         struct list_head list;
299         unsigned int found_dir_item:1;
300         unsigned int found_dir_index:1;
301         unsigned int found_back_ref:1;
302         unsigned int found_forward_ref:1;
303         unsigned int reachable:1;
304         int errors;
305         u64 ref_root;
306         u64 dir;
307         u64 index;
308         u16 namelen;
309         char name[0];
310 };
311
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 {
314         return list_entry(entry, struct root_backref, list);
315 }
316
317 struct root_record {
318         struct list_head backrefs;
319         struct cache_extent cache;
320         unsigned int found_root_item:1;
321         u64 objectid;
322         u32 found_ref;
323 };
324
325 struct ptr_node {
326         struct cache_extent cache;
327         void *data;
328 };
329
330 struct shared_node {
331         struct cache_extent cache;
332         struct cache_tree root_cache;
333         struct cache_tree inode_cache;
334         struct inode_record *current;
335         u32 refs;
336 };
337
338 struct block_info {
339         u64 start;
340         u32 size;
341 };
342
343 struct walk_control {
344         struct cache_tree shared;
345         struct shared_node *nodes[BTRFS_MAX_LEVEL];
346         int active_node;
347         int root_level;
348 };
349
350 struct bad_item {
351         struct btrfs_key key;
352         u64 root_id;
353         struct list_head list;
354 };
355
356 struct extent_entry {
357         u64 bytenr;
358         u64 bytes;
359         int count;
360         int broken;
361         struct list_head list;
362 };
363
364 struct root_item_info {
365         /* level of the root */
366         u8 level;
367         /* number of nodes at this level, must be 1 for a root */
368         int node_count;
369         u64 bytenr;
370         u64 gen;
371         struct cache_extent cache_extent;
372 };
373
374 /*
375  * Error bit for low memory mode check.
376  *
377  * Currently no caller cares about it yet.  Just internal use for error
378  * classification.
379  */
380 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH     (1 << 8)
390
391 static void *print_status_check(void *p)
392 {
393         struct task_ctx *priv = p;
394         const char work_indicator[] = { '.', 'o', 'O', 'o' };
395         uint32_t count = 0;
396         static char *task_position_string[] = {
397                 "checking extents",
398                 "checking free space cache",
399                 "checking fs roots",
400         };
401
402         task_period_start(priv->info, 1000 /* 1s */);
403
404         if (priv->tp == TASK_NOTHING)
405                 return NULL;
406
407         while (1) {
408                 printf("%s [%c]\r", task_position_string[priv->tp],
409                                 work_indicator[count % 4]);
410                 count++;
411                 fflush(stdout);
412                 task_period_wait(priv->info);
413         }
414         return NULL;
415 }
416
417 static int print_status_return(void *p)
418 {
419         printf("\n");
420         fflush(stdout);
421
422         return 0;
423 }
424
425 static enum btrfs_check_mode parse_check_mode(const char *str)
426 {
427         if (strcmp(str, "lowmem") == 0)
428                 return CHECK_MODE_LOWMEM;
429         if (strcmp(str, "orig") == 0)
430                 return CHECK_MODE_ORIGINAL;
431         if (strcmp(str, "original") == 0)
432                 return CHECK_MODE_ORIGINAL;
433
434         return CHECK_MODE_UNKNOWN;
435 }
436
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
439 {
440         struct file_extent_hole *hole;
441
442         if (RB_EMPTY_ROOT(holes))
443                 return (u64)-1;
444
445         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446         return hole->start;
447 }
448
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 {
451         struct file_extent_hole *hole1;
452         struct file_extent_hole *hole2;
453
454         hole1 = rb_entry(node1, struct file_extent_hole, node);
455         hole2 = rb_entry(node2, struct file_extent_hole, node);
456
457         if (hole1->start > hole2->start)
458                 return -1;
459         if (hole1->start < hole2->start)
460                 return 1;
461         /* Now hole1->start == hole2->start */
462         if (hole1->len >= hole2->len)
463                 /*
464                  * Hole 1 will be merge center
465                  * Same hole will be merged later
466                  */
467                 return -1;
468         /* Hole 2 will be merge center */
469         return 1;
470 }
471
472 /*
473  * Add a hole to the record
474  *
475  * This will do hole merge for copy_file_extent_holes(),
476  * which will ensure there won't be continuous holes.
477  */
478 static int add_file_extent_hole(struct rb_root *holes,
479                                 u64 start, u64 len)
480 {
481         struct file_extent_hole *hole;
482         struct file_extent_hole *prev = NULL;
483         struct file_extent_hole *next = NULL;
484
485         hole = malloc(sizeof(*hole));
486         if (!hole)
487                 return -ENOMEM;
488         hole->start = start;
489         hole->len = len;
490         /* Since compare will not return 0, no -EEXIST will happen */
491         rb_insert(holes, &hole->node, compare_hole);
492
493         /* simple merge with previous hole */
494         if (rb_prev(&hole->node))
495                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496                                 node);
497         if (prev && prev->start + prev->len >= hole->start) {
498                 hole->len = hole->start + hole->len - prev->start;
499                 hole->start = prev->start;
500                 rb_erase(&prev->node, holes);
501                 free(prev);
502                 prev = NULL;
503         }
504
505         /* iterate merge with next holes */
506         while (1) {
507                 if (!rb_next(&hole->node))
508                         break;
509                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510                                         node);
511                 if (hole->start + hole->len >= next->start) {
512                         if (hole->start + hole->len <= next->start + next->len)
513                                 hole->len = next->start + next->len -
514                                             hole->start;
515                         rb_erase(&next->node, holes);
516                         free(next);
517                         next = NULL;
518                 } else
519                         break;
520         }
521         return 0;
522 }
523
524 static int compare_hole_range(struct rb_node *node, void *data)
525 {
526         struct file_extent_hole *hole;
527         u64 start;
528
529         hole = (struct file_extent_hole *)data;
530         start = hole->start;
531
532         hole = rb_entry(node, struct file_extent_hole, node);
533         if (start < hole->start)
534                 return -1;
535         if (start >= hole->start && start < hole->start + hole->len)
536                 return 0;
537         return 1;
538 }
539
540 /*
541  * Delete a hole in the record
542  *
543  * This will do the hole split and is much restrict than add.
544  */
545 static int del_file_extent_hole(struct rb_root *holes,
546                                 u64 start, u64 len)
547 {
548         struct file_extent_hole *hole;
549         struct file_extent_hole tmp;
550         u64 prev_start = 0;
551         u64 prev_len = 0;
552         u64 next_start = 0;
553         u64 next_len = 0;
554         struct rb_node *node;
555         int have_prev = 0;
556         int have_next = 0;
557         int ret = 0;
558
559         tmp.start = start;
560         tmp.len = len;
561         node = rb_search(holes, &tmp, compare_hole_range, NULL);
562         if (!node)
563                 return -EEXIST;
564         hole = rb_entry(node, struct file_extent_hole, node);
565         if (start + len > hole->start + hole->len)
566                 return -EEXIST;
567
568         /*
569          * Now there will be no overlap, delete the hole and re-add the
570          * split(s) if they exists.
571          */
572         if (start > hole->start) {
573                 prev_start = hole->start;
574                 prev_len = start - hole->start;
575                 have_prev = 1;
576         }
577         if (hole->start + hole->len > start + len) {
578                 next_start = start + len;
579                 next_len = hole->start + hole->len - start - len;
580                 have_next = 1;
581         }
582         rb_erase(node, holes);
583         free(hole);
584         if (have_prev) {
585                 ret = add_file_extent_hole(holes, prev_start, prev_len);
586                 if (ret < 0)
587                         return ret;
588         }
589         if (have_next) {
590                 ret = add_file_extent_hole(holes, next_start, next_len);
591                 if (ret < 0)
592                         return ret;
593         }
594         return 0;
595 }
596
597 static int copy_file_extent_holes(struct rb_root *dst,
598                                   struct rb_root *src)
599 {
600         struct file_extent_hole *hole;
601         struct rb_node *node;
602         int ret = 0;
603
604         node = rb_first(src);
605         while (node) {
606                 hole = rb_entry(node, struct file_extent_hole, node);
607                 ret = add_file_extent_hole(dst, hole->start, hole->len);
608                 if (ret)
609                         break;
610                 node = rb_next(node);
611         }
612         return ret;
613 }
614
615 static void free_file_extent_holes(struct rb_root *holes)
616 {
617         struct rb_node *node;
618         struct file_extent_hole *hole;
619
620         node = rb_first(holes);
621         while (node) {
622                 hole = rb_entry(node, struct file_extent_hole, node);
623                 rb_erase(node, holes);
624                 free(hole);
625                 node = rb_first(holes);
626         }
627 }
628
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632                                  struct btrfs_root *root)
633 {
634         if (root->last_trans != trans->transid) {
635                 root->track_dirty = 1;
636                 root->last_trans = trans->transid;
637                 root->commit_root = root->node;
638                 extent_buffer_get(root->node);
639         }
640 }
641
642 static u8 imode_to_type(u32 imode)
643 {
644 #define S_SHIFT 12
645         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
647                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
648                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
649                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
650                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
651                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
652                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
653         };
654
655         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
657 }
658
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 {
661         struct device_record *rec1;
662         struct device_record *rec2;
663
664         rec1 = rb_entry(node1, struct device_record, node);
665         rec2 = rb_entry(node2, struct device_record, node);
666         if (rec1->devid > rec2->devid)
667                 return -1;
668         else if (rec1->devid < rec2->devid)
669                 return 1;
670         else
671                 return 0;
672 }
673
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 {
676         struct inode_record *rec;
677         struct inode_backref *backref;
678         struct inode_backref *orig;
679         struct inode_backref *tmp;
680         struct orphan_data_extent *src_orphan;
681         struct orphan_data_extent *dst_orphan;
682         struct rb_node *rb;
683         size_t size;
684         int ret;
685
686         rec = malloc(sizeof(*rec));
687         if (!rec)
688                 return ERR_PTR(-ENOMEM);
689         memcpy(rec, orig_rec, sizeof(*rec));
690         rec->refs = 1;
691         INIT_LIST_HEAD(&rec->backrefs);
692         INIT_LIST_HEAD(&rec->orphan_extents);
693         rec->holes = RB_ROOT;
694
695         list_for_each_entry(orig, &orig_rec->backrefs, list) {
696                 size = sizeof(*orig) + orig->namelen + 1;
697                 backref = malloc(size);
698                 if (!backref) {
699                         ret = -ENOMEM;
700                         goto cleanup;
701                 }
702                 memcpy(backref, orig, size);
703                 list_add_tail(&backref->list, &rec->backrefs);
704         }
705         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706                 dst_orphan = malloc(sizeof(*dst_orphan));
707                 if (!dst_orphan) {
708                         ret = -ENOMEM;
709                         goto cleanup;
710                 }
711                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713         }
714         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715         if (ret < 0)
716                 goto cleanup_rb;
717
718         return rec;
719
720 cleanup_rb:
721         rb = rb_first(&rec->holes);
722         while (rb) {
723                 struct file_extent_hole *hole;
724
725                 hole = rb_entry(rb, struct file_extent_hole, node);
726                 rb = rb_next(rb);
727                 free(hole);
728         }
729
730 cleanup:
731         if (!list_empty(&rec->backrefs))
732                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733                         list_del(&orig->list);
734                         free(orig);
735                 }
736
737         if (!list_empty(&rec->orphan_extents))
738                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739                         list_del(&orig->list);
740                         free(orig);
741                 }
742
743         free(rec);
744
745         return ERR_PTR(ret);
746 }
747
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749                                       u64 objectid)
750 {
751         struct orphan_data_extent *orphan;
752
753         if (list_empty(orphan_extents))
754                 return;
755         printf("The following data extent is lost in tree %llu:\n",
756                objectid);
757         list_for_each_entry(orphan, orphan_extents, list) {
758                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
760                        orphan->disk_len);
761         }
762 }
763
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 {
766         u64 root_objectid = root->root_key.objectid;
767         int errors = rec->errors;
768
769         if (!errors)
770                 return;
771         /* reloc root errors, we print its corresponding fs root objectid*/
772         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773                 root_objectid = root->root_key.offset;
774                 fprintf(stderr, "reloc");
775         }
776         fprintf(stderr, "root %llu inode %llu errors %x",
777                 (unsigned long long) root_objectid,
778                 (unsigned long long) rec->ino, rec->errors);
779
780         if (errors & I_ERR_NO_INODE_ITEM)
781                 fprintf(stderr, ", no inode item");
782         if (errors & I_ERR_NO_ORPHAN_ITEM)
783                 fprintf(stderr, ", no orphan item");
784         if (errors & I_ERR_DUP_INODE_ITEM)
785                 fprintf(stderr, ", dup inode item");
786         if (errors & I_ERR_DUP_DIR_INDEX)
787                 fprintf(stderr, ", dup dir index");
788         if (errors & I_ERR_ODD_DIR_ITEM)
789                 fprintf(stderr, ", odd dir item");
790         if (errors & I_ERR_ODD_FILE_EXTENT)
791                 fprintf(stderr, ", odd file extent");
792         if (errors & I_ERR_BAD_FILE_EXTENT)
793                 fprintf(stderr, ", bad file extent");
794         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795                 fprintf(stderr, ", file extent overlap");
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797                 fprintf(stderr, ", file extent discount");
798         if (errors & I_ERR_DIR_ISIZE_WRONG)
799                 fprintf(stderr, ", dir isize wrong");
800         if (errors & I_ERR_FILE_NBYTES_WRONG)
801                 fprintf(stderr, ", nbytes wrong");
802         if (errors & I_ERR_ODD_CSUM_ITEM)
803                 fprintf(stderr, ", odd csum item");
804         if (errors & I_ERR_SOME_CSUM_MISSING)
805                 fprintf(stderr, ", some csum missing");
806         if (errors & I_ERR_LINK_COUNT_WRONG)
807                 fprintf(stderr, ", link count wrong");
808         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809                 fprintf(stderr, ", orphan file extent");
810         fprintf(stderr, "\n");
811         /* Print the orphan extents if needed */
812         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814
815         /* Print the holes if needed */
816         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817                 struct file_extent_hole *hole;
818                 struct rb_node *node;
819                 int found = 0;
820
821                 node = rb_first(&rec->holes);
822                 fprintf(stderr, "Found file extent holes:\n");
823                 while (node) {
824                         found = 1;
825                         hole = rb_entry(node, struct file_extent_hole, node);
826                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
827                                 hole->start, hole->len);
828                         node = rb_next(node);
829                 }
830                 if (!found)
831                         fprintf(stderr, "\tstart: 0, len: %llu\n",
832                                 round_up(rec->isize, root->sectorsize));
833         }
834 }
835
836 static void print_ref_error(int errors)
837 {
838         if (errors & REF_ERR_NO_DIR_ITEM)
839                 fprintf(stderr, ", no dir item");
840         if (errors & REF_ERR_NO_DIR_INDEX)
841                 fprintf(stderr, ", no dir index");
842         if (errors & REF_ERR_NO_INODE_REF)
843                 fprintf(stderr, ", no inode ref");
844         if (errors & REF_ERR_DUP_DIR_ITEM)
845                 fprintf(stderr, ", dup dir item");
846         if (errors & REF_ERR_DUP_DIR_INDEX)
847                 fprintf(stderr, ", dup dir index");
848         if (errors & REF_ERR_DUP_INODE_REF)
849                 fprintf(stderr, ", dup inode ref");
850         if (errors & REF_ERR_INDEX_UNMATCH)
851                 fprintf(stderr, ", index mismatch");
852         if (errors & REF_ERR_FILETYPE_UNMATCH)
853                 fprintf(stderr, ", filetype mismatch");
854         if (errors & REF_ERR_NAME_TOO_LONG)
855                 fprintf(stderr, ", name too long");
856         if (errors & REF_ERR_NO_ROOT_REF)
857                 fprintf(stderr, ", no root ref");
858         if (errors & REF_ERR_NO_ROOT_BACKREF)
859                 fprintf(stderr, ", no root backref");
860         if (errors & REF_ERR_DUP_ROOT_REF)
861                 fprintf(stderr, ", dup root ref");
862         if (errors & REF_ERR_DUP_ROOT_BACKREF)
863                 fprintf(stderr, ", dup root backref");
864         fprintf(stderr, "\n");
865 }
866
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
868                                           u64 ino, int mod)
869 {
870         struct ptr_node *node;
871         struct cache_extent *cache;
872         struct inode_record *rec = NULL;
873         int ret;
874
875         cache = lookup_cache_extent(inode_cache, ino, 1);
876         if (cache) {
877                 node = container_of(cache, struct ptr_node, cache);
878                 rec = node->data;
879                 if (mod && rec->refs > 1) {
880                         node->data = clone_inode_rec(rec);
881                         if (IS_ERR(node->data))
882                                 return node->data;
883                         rec->refs--;
884                         rec = node->data;
885                 }
886         } else if (mod) {
887                 rec = calloc(1, sizeof(*rec));
888                 if (!rec)
889                         return ERR_PTR(-ENOMEM);
890                 rec->ino = ino;
891                 rec->extent_start = (u64)-1;
892                 rec->refs = 1;
893                 INIT_LIST_HEAD(&rec->backrefs);
894                 INIT_LIST_HEAD(&rec->orphan_extents);
895                 rec->holes = RB_ROOT;
896
897                 node = malloc(sizeof(*node));
898                 if (!node) {
899                         free(rec);
900                         return ERR_PTR(-ENOMEM);
901                 }
902                 node->cache.start = ino;
903                 node->cache.size = 1;
904                 node->data = rec;
905
906                 if (ino == BTRFS_FREE_INO_OBJECTID)
907                         rec->found_link = 1;
908
909                 ret = insert_cache_extent(inode_cache, &node->cache);
910                 if (ret)
911                         return ERR_PTR(-EEXIST);
912         }
913         return rec;
914 }
915
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
917 {
918         struct orphan_data_extent *orphan;
919
920         while (!list_empty(orphan_extents)) {
921                 orphan = list_entry(orphan_extents->next,
922                                     struct orphan_data_extent, list);
923                 list_del(&orphan->list);
924                 free(orphan);
925         }
926 }
927
928 static void free_inode_rec(struct inode_record *rec)
929 {
930         struct inode_backref *backref;
931
932         if (--rec->refs > 0)
933                 return;
934
935         while (!list_empty(&rec->backrefs)) {
936                 backref = to_inode_backref(rec->backrefs.next);
937                 list_del(&backref->list);
938                 free(backref);
939         }
940         free_orphan_data_extents(&rec->orphan_extents);
941         free_file_extent_holes(&rec->holes);
942         free(rec);
943 }
944
945 static int can_free_inode_rec(struct inode_record *rec)
946 {
947         if (!rec->errors && rec->checked && rec->found_inode_item &&
948             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
949                 return 1;
950         return 0;
951 }
952
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954                                  struct inode_record *rec)
955 {
956         struct cache_extent *cache;
957         struct inode_backref *tmp, *backref;
958         struct ptr_node *node;
959         u8 filetype;
960
961         if (!rec->found_inode_item)
962                 return;
963
964         filetype = imode_to_type(rec->imode);
965         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966                 if (backref->found_dir_item && backref->found_dir_index) {
967                         if (backref->filetype != filetype)
968                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969                         if (!backref->errors && backref->found_inode_ref &&
970                             rec->nlink == rec->found_link) {
971                                 list_del(&backref->list);
972                                 free(backref);
973                         }
974                 }
975         }
976
977         if (!rec->checked || rec->merging)
978                 return;
979
980         if (S_ISDIR(rec->imode)) {
981                 if (rec->found_size != rec->isize)
982                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983                 if (rec->found_file_extent)
984                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
985         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986                 if (rec->found_dir_item)
987                         rec->errors |= I_ERR_ODD_DIR_ITEM;
988                 if (rec->found_size != rec->nbytes)
989                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990                 if (rec->nlink > 0 && !no_holes &&
991                     (rec->extent_end < rec->isize ||
992                      first_extent_gap(&rec->holes) < rec->isize))
993                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
994         }
995
996         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997                 if (rec->found_csum_item && rec->nodatasum)
998                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
999                 if (rec->some_csum_missing && !rec->nodatasum)
1000                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1001         }
1002
1003         BUG_ON(rec->refs != 1);
1004         if (can_free_inode_rec(rec)) {
1005                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006                 node = container_of(cache, struct ptr_node, cache);
1007                 BUG_ON(node->data != rec);
1008                 remove_cache_extent(inode_cache, &node->cache);
1009                 free(node);
1010                 free_inode_rec(rec);
1011         }
1012 }
1013
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1015 {
1016         struct btrfs_path path;
1017         struct btrfs_key key;
1018         int ret;
1019
1020         key.objectid = BTRFS_ORPHAN_OBJECTID;
1021         key.type = BTRFS_ORPHAN_ITEM_KEY;
1022         key.offset = ino;
1023
1024         btrfs_init_path(&path);
1025         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026         btrfs_release_path(&path);
1027         if (ret > 0)
1028                 ret = -ENOENT;
1029         return ret;
1030 }
1031
1032 static int process_inode_item(struct extent_buffer *eb,
1033                               int slot, struct btrfs_key *key,
1034                               struct shared_node *active_node)
1035 {
1036         struct inode_record *rec;
1037         struct btrfs_inode_item *item;
1038
1039         rec = active_node->current;
1040         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041         if (rec->found_inode_item) {
1042                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1043                 return 1;
1044         }
1045         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046         rec->nlink = btrfs_inode_nlink(eb, item);
1047         rec->isize = btrfs_inode_size(eb, item);
1048         rec->nbytes = btrfs_inode_nbytes(eb, item);
1049         rec->imode = btrfs_inode_mode(eb, item);
1050         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051                 rec->nodatasum = 1;
1052         rec->found_inode_item = 1;
1053         if (rec->nlink == 0)
1054                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055         maybe_free_inode_rec(&active_node->inode_cache, rec);
1056         return 0;
1057 }
1058
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060                                                 const char *name,
1061                                                 int namelen, u64 dir)
1062 {
1063         struct inode_backref *backref;
1064
1065         list_for_each_entry(backref, &rec->backrefs, list) {
1066                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067                         break;
1068                 if (backref->dir != dir || backref->namelen != namelen)
1069                         continue;
1070                 if (memcmp(name, backref->name, namelen))
1071                         continue;
1072                 return backref;
1073         }
1074
1075         backref = malloc(sizeof(*backref) + namelen + 1);
1076         if (!backref)
1077                 return NULL;
1078         memset(backref, 0, sizeof(*backref));
1079         backref->dir = dir;
1080         backref->namelen = namelen;
1081         memcpy(backref->name, name, namelen);
1082         backref->name[namelen] = '\0';
1083         list_add_tail(&backref->list, &rec->backrefs);
1084         return backref;
1085 }
1086
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088                              u64 ino, u64 dir, u64 index,
1089                              const char *name, int namelen,
1090                              u8 filetype, u8 itemtype, int errors)
1091 {
1092         struct inode_record *rec;
1093         struct inode_backref *backref;
1094
1095         rec = get_inode_rec(inode_cache, ino, 1);
1096         BUG_ON(IS_ERR(rec));
1097         backref = get_inode_backref(rec, name, namelen, dir);
1098         BUG_ON(!backref);
1099         if (errors)
1100                 backref->errors |= errors;
1101         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102                 if (backref->found_dir_index)
1103                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104                 if (backref->found_inode_ref && backref->index != index)
1105                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1106                 if (backref->found_dir_item && backref->filetype != filetype)
1107                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1108
1109                 backref->index = index;
1110                 backref->filetype = filetype;
1111                 backref->found_dir_index = 1;
1112         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113                 rec->found_link++;
1114                 if (backref->found_dir_item)
1115                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116                 if (backref->found_dir_index && backref->filetype != filetype)
1117                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1118
1119                 backref->filetype = filetype;
1120                 backref->found_dir_item = 1;
1121         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123                 if (backref->found_inode_ref)
1124                         backref->errors |= REF_ERR_DUP_INODE_REF;
1125                 if (backref->found_dir_index && backref->index != index)
1126                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1127                 else
1128                         backref->index = index;
1129
1130                 backref->ref_type = itemtype;
1131                 backref->found_inode_ref = 1;
1132         } else {
1133                 BUG_ON(1);
1134         }
1135
1136         maybe_free_inode_rec(inode_cache, rec);
1137         return 0;
1138 }
1139
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141                             struct cache_tree *dst_cache)
1142 {
1143         struct inode_backref *backref;
1144         u32 dir_count = 0;
1145         int ret = 0;
1146
1147         dst->merging = 1;
1148         list_for_each_entry(backref, &src->backrefs, list) {
1149                 if (backref->found_dir_index) {
1150                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1151                                         backref->index, backref->name,
1152                                         backref->namelen, backref->filetype,
1153                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1154                 }
1155                 if (backref->found_dir_item) {
1156                         dir_count++;
1157                         add_inode_backref(dst_cache, dst->ino,
1158                                         backref->dir, 0, backref->name,
1159                                         backref->namelen, backref->filetype,
1160                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1161                 }
1162                 if (backref->found_inode_ref) {
1163                         add_inode_backref(dst_cache, dst->ino,
1164                                         backref->dir, backref->index,
1165                                         backref->name, backref->namelen, 0,
1166                                         backref->ref_type, backref->errors);
1167                 }
1168         }
1169
1170         if (src->found_dir_item)
1171                 dst->found_dir_item = 1;
1172         if (src->found_file_extent)
1173                 dst->found_file_extent = 1;
1174         if (src->found_csum_item)
1175                 dst->found_csum_item = 1;
1176         if (src->some_csum_missing)
1177                 dst->some_csum_missing = 1;
1178         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1180                 if (ret < 0)
1181                         return ret;
1182         }
1183
1184         BUG_ON(src->found_link < dir_count);
1185         dst->found_link += src->found_link - dir_count;
1186         dst->found_size += src->found_size;
1187         if (src->extent_start != (u64)-1) {
1188                 if (dst->extent_start == (u64)-1) {
1189                         dst->extent_start = src->extent_start;
1190                         dst->extent_end = src->extent_end;
1191                 } else {
1192                         if (dst->extent_end > src->extent_start)
1193                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194                         else if (dst->extent_end < src->extent_start) {
1195                                 ret = add_file_extent_hole(&dst->holes,
1196                                         dst->extent_end,
1197                                         src->extent_start - dst->extent_end);
1198                         }
1199                         if (dst->extent_end < src->extent_end)
1200                                 dst->extent_end = src->extent_end;
1201                 }
1202         }
1203
1204         dst->errors |= src->errors;
1205         if (src->found_inode_item) {
1206                 if (!dst->found_inode_item) {
1207                         dst->nlink = src->nlink;
1208                         dst->isize = src->isize;
1209                         dst->nbytes = src->nbytes;
1210                         dst->imode = src->imode;
1211                         dst->nodatasum = src->nodatasum;
1212                         dst->found_inode_item = 1;
1213                 } else {
1214                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1215                 }
1216         }
1217         dst->merging = 0;
1218
1219         return 0;
1220 }
1221
1222 static int splice_shared_node(struct shared_node *src_node,
1223                               struct shared_node *dst_node)
1224 {
1225         struct cache_extent *cache;
1226         struct ptr_node *node, *ins;
1227         struct cache_tree *src, *dst;
1228         struct inode_record *rec, *conflict;
1229         u64 current_ino = 0;
1230         int splice = 0;
1231         int ret;
1232
1233         if (--src_node->refs == 0)
1234                 splice = 1;
1235         if (src_node->current)
1236                 current_ino = src_node->current->ino;
1237
1238         src = &src_node->root_cache;
1239         dst = &dst_node->root_cache;
1240 again:
1241         cache = search_cache_extent(src, 0);
1242         while (cache) {
1243                 node = container_of(cache, struct ptr_node, cache);
1244                 rec = node->data;
1245                 cache = next_cache_extent(cache);
1246
1247                 if (splice) {
1248                         remove_cache_extent(src, &node->cache);
1249                         ins = node;
1250                 } else {
1251                         ins = malloc(sizeof(*ins));
1252                         BUG_ON(!ins);
1253                         ins->cache.start = node->cache.start;
1254                         ins->cache.size = node->cache.size;
1255                         ins->data = rec;
1256                         rec->refs++;
1257                 }
1258                 ret = insert_cache_extent(dst, &ins->cache);
1259                 if (ret == -EEXIST) {
1260                         conflict = get_inode_rec(dst, rec->ino, 1);
1261                         BUG_ON(IS_ERR(conflict));
1262                         merge_inode_recs(rec, conflict, dst);
1263                         if (rec->checked) {
1264                                 conflict->checked = 1;
1265                                 if (dst_node->current == conflict)
1266                                         dst_node->current = NULL;
1267                         }
1268                         maybe_free_inode_rec(dst, conflict);
1269                         free_inode_rec(rec);
1270                         free(ins);
1271                 } else {
1272                         BUG_ON(ret);
1273                 }
1274         }
1275
1276         if (src == &src_node->root_cache) {
1277                 src = &src_node->inode_cache;
1278                 dst = &dst_node->inode_cache;
1279                 goto again;
1280         }
1281
1282         if (current_ino > 0 && (!dst_node->current ||
1283             current_ino > dst_node->current->ino)) {
1284                 if (dst_node->current) {
1285                         dst_node->current->checked = 1;
1286                         maybe_free_inode_rec(dst, dst_node->current);
1287                 }
1288                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289                 BUG_ON(IS_ERR(dst_node->current));
1290         }
1291         return 0;
1292 }
1293
1294 static void free_inode_ptr(struct cache_extent *cache)
1295 {
1296         struct ptr_node *node;
1297         struct inode_record *rec;
1298
1299         node = container_of(cache, struct ptr_node, cache);
1300         rec = node->data;
1301         free_inode_rec(rec);
1302         free(node);
1303 }
1304
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1306
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1308                                             u64 bytenr)
1309 {
1310         struct cache_extent *cache;
1311         struct shared_node *node;
1312
1313         cache = lookup_cache_extent(shared, bytenr, 1);
1314         if (cache) {
1315                 node = container_of(cache, struct shared_node, cache);
1316                 return node;
1317         }
1318         return NULL;
1319 }
1320
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1322 {
1323         int ret;
1324         struct shared_node *node;
1325
1326         node = calloc(1, sizeof(*node));
1327         if (!node)
1328                 return -ENOMEM;
1329         node->cache.start = bytenr;
1330         node->cache.size = 1;
1331         cache_tree_init(&node->root_cache);
1332         cache_tree_init(&node->inode_cache);
1333         node->refs = refs;
1334
1335         ret = insert_cache_extent(shared, &node->cache);
1336
1337         return ret;
1338 }
1339
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341                              struct walk_control *wc, int level)
1342 {
1343         struct shared_node *node;
1344         struct shared_node *dest;
1345         int ret;
1346
1347         if (level == wc->active_node)
1348                 return 0;
1349
1350         BUG_ON(wc->active_node <= level);
1351         node = find_shared_node(&wc->shared, bytenr);
1352         if (!node) {
1353                 ret = add_shared_node(&wc->shared, bytenr, refs);
1354                 BUG_ON(ret);
1355                 node = find_shared_node(&wc->shared, bytenr);
1356                 wc->nodes[level] = node;
1357                 wc->active_node = level;
1358                 return 0;
1359         }
1360
1361         if (wc->root_level == wc->active_node &&
1362             btrfs_root_refs(&root->root_item) == 0) {
1363                 if (--node->refs == 0) {
1364                         free_inode_recs_tree(&node->root_cache);
1365                         free_inode_recs_tree(&node->inode_cache);
1366                         remove_cache_extent(&wc->shared, &node->cache);
1367                         free(node);
1368                 }
1369                 return 1;
1370         }
1371
1372         dest = wc->nodes[wc->active_node];
1373         splice_shared_node(node, dest);
1374         if (node->refs == 0) {
1375                 remove_cache_extent(&wc->shared, &node->cache);
1376                 free(node);
1377         }
1378         return 1;
1379 }
1380
1381 static int leave_shared_node(struct btrfs_root *root,
1382                              struct walk_control *wc, int level)
1383 {
1384         struct shared_node *node;
1385         struct shared_node *dest;
1386         int i;
1387
1388         if (level == wc->root_level)
1389                 return 0;
1390
1391         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1392                 if (wc->nodes[i])
1393                         break;
1394         }
1395         BUG_ON(i >= BTRFS_MAX_LEVEL);
1396
1397         node = wc->nodes[wc->active_node];
1398         wc->nodes[wc->active_node] = NULL;
1399         wc->active_node = i;
1400
1401         dest = wc->nodes[wc->active_node];
1402         if (wc->active_node < wc->root_level ||
1403             btrfs_root_refs(&root->root_item) > 0) {
1404                 BUG_ON(node->refs <= 1);
1405                 splice_shared_node(node, dest);
1406         } else {
1407                 BUG_ON(node->refs < 2);
1408                 node->refs--;
1409         }
1410         return 0;
1411 }
1412
1413 /*
1414  * Returns:
1415  * < 0 - on error
1416  * 1   - if the root with id child_root_id is a child of root parent_root_id
1417  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1418  *       has other root(s) as parent(s)
1419  * 2   - if the root child_root_id doesn't have any parent roots
1420  */
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1422                          u64 child_root_id)
1423 {
1424         struct btrfs_path path;
1425         struct btrfs_key key;
1426         struct extent_buffer *leaf;
1427         int has_parent = 0;
1428         int ret;
1429
1430         btrfs_init_path(&path);
1431
1432         key.objectid = parent_root_id;
1433         key.type = BTRFS_ROOT_REF_KEY;
1434         key.offset = child_root_id;
1435         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1436                                 0, 0);
1437         if (ret < 0)
1438                 return ret;
1439         btrfs_release_path(&path);
1440         if (!ret)
1441                 return 1;
1442
1443         key.objectid = child_root_id;
1444         key.type = BTRFS_ROOT_BACKREF_KEY;
1445         key.offset = 0;
1446         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1447                                 0, 0);
1448         if (ret < 0)
1449                 goto out;
1450
1451         while (1) {
1452                 leaf = path.nodes[0];
1453                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1455                         if (ret)
1456                                 break;
1457                         leaf = path.nodes[0];
1458                 }
1459
1460                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461                 if (key.objectid != child_root_id ||
1462                     key.type != BTRFS_ROOT_BACKREF_KEY)
1463                         break;
1464
1465                 has_parent = 1;
1466
1467                 if (key.offset == parent_root_id) {
1468                         btrfs_release_path(&path);
1469                         return 1;
1470                 }
1471
1472                 path.slots[0]++;
1473         }
1474 out:
1475         btrfs_release_path(&path);
1476         if (ret < 0)
1477                 return ret;
1478         return has_parent ? 0 : 2;
1479 }
1480
1481 static int process_dir_item(struct btrfs_root *root,
1482                             struct extent_buffer *eb,
1483                             int slot, struct btrfs_key *key,
1484                             struct shared_node *active_node)
1485 {
1486         u32 total;
1487         u32 cur = 0;
1488         u32 len;
1489         u32 name_len;
1490         u32 data_len;
1491         int error;
1492         int nritems = 0;
1493         u8 filetype;
1494         struct btrfs_dir_item *di;
1495         struct inode_record *rec;
1496         struct cache_tree *root_cache;
1497         struct cache_tree *inode_cache;
1498         struct btrfs_key location;
1499         char namebuf[BTRFS_NAME_LEN];
1500
1501         root_cache = &active_node->root_cache;
1502         inode_cache = &active_node->inode_cache;
1503         rec = active_node->current;
1504         rec->found_dir_item = 1;
1505
1506         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1507         total = btrfs_item_size_nr(eb, slot);
1508         while (cur < total) {
1509                 nritems++;
1510                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1511                 name_len = btrfs_dir_name_len(eb, di);
1512                 data_len = btrfs_dir_data_len(eb, di);
1513                 filetype = btrfs_dir_type(eb, di);
1514
1515                 rec->found_size += name_len;
1516                 if (name_len <= BTRFS_NAME_LEN) {
1517                         len = name_len;
1518                         error = 0;
1519                 } else {
1520                         len = BTRFS_NAME_LEN;
1521                         error = REF_ERR_NAME_TOO_LONG;
1522                 }
1523                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1524
1525                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1526                         add_inode_backref(inode_cache, location.objectid,
1527                                           key->objectid, key->offset, namebuf,
1528                                           len, filetype, key->type, error);
1529                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1530                         add_inode_backref(root_cache, location.objectid,
1531                                           key->objectid, key->offset,
1532                                           namebuf, len, filetype,
1533                                           key->type, error);
1534                 } else {
1535                         fprintf(stderr, "invalid location in dir item %u\n",
1536                                 location.type);
1537                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1538                                           key->objectid, key->offset, namebuf,
1539                                           len, filetype, key->type, error);
1540                 }
1541
1542                 len = sizeof(*di) + name_len + data_len;
1543                 di = (struct btrfs_dir_item *)((char *)di + len);
1544                 cur += len;
1545         }
1546         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1547                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1548
1549         return 0;
1550 }
1551
1552 static int process_inode_ref(struct extent_buffer *eb,
1553                              int slot, struct btrfs_key *key,
1554                              struct shared_node *active_node)
1555 {
1556         u32 total;
1557         u32 cur = 0;
1558         u32 len;
1559         u32 name_len;
1560         u64 index;
1561         int error;
1562         struct cache_tree *inode_cache;
1563         struct btrfs_inode_ref *ref;
1564         char namebuf[BTRFS_NAME_LEN];
1565
1566         inode_cache = &active_node->inode_cache;
1567
1568         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1569         total = btrfs_item_size_nr(eb, slot);
1570         while (cur < total) {
1571                 name_len = btrfs_inode_ref_name_len(eb, ref);
1572                 index = btrfs_inode_ref_index(eb, ref);
1573                 if (name_len <= BTRFS_NAME_LEN) {
1574                         len = name_len;
1575                         error = 0;
1576                 } else {
1577                         len = BTRFS_NAME_LEN;
1578                         error = REF_ERR_NAME_TOO_LONG;
1579                 }
1580                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1581                 add_inode_backref(inode_cache, key->objectid, key->offset,
1582                                   index, namebuf, len, 0, key->type, error);
1583
1584                 len = sizeof(*ref) + name_len;
1585                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1586                 cur += len;
1587         }
1588         return 0;
1589 }
1590
1591 static int process_inode_extref(struct extent_buffer *eb,
1592                                 int slot, struct btrfs_key *key,
1593                                 struct shared_node *active_node)
1594 {
1595         u32 total;
1596         u32 cur = 0;
1597         u32 len;
1598         u32 name_len;
1599         u64 index;
1600         u64 parent;
1601         int error;
1602         struct cache_tree *inode_cache;
1603         struct btrfs_inode_extref *extref;
1604         char namebuf[BTRFS_NAME_LEN];
1605
1606         inode_cache = &active_node->inode_cache;
1607
1608         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1609         total = btrfs_item_size_nr(eb, slot);
1610         while (cur < total) {
1611                 name_len = btrfs_inode_extref_name_len(eb, extref);
1612                 index = btrfs_inode_extref_index(eb, extref);
1613                 parent = btrfs_inode_extref_parent(eb, extref);
1614                 if (name_len <= BTRFS_NAME_LEN) {
1615                         len = name_len;
1616                         error = 0;
1617                 } else {
1618                         len = BTRFS_NAME_LEN;
1619                         error = REF_ERR_NAME_TOO_LONG;
1620                 }
1621                 read_extent_buffer(eb, namebuf,
1622                                    (unsigned long)(extref + 1), len);
1623                 add_inode_backref(inode_cache, key->objectid, parent,
1624                                   index, namebuf, len, 0, key->type, error);
1625
1626                 len = sizeof(*extref) + name_len;
1627                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1628                 cur += len;
1629         }
1630         return 0;
1631
1632 }
1633
1634 static int count_csum_range(struct btrfs_root *root, u64 start,
1635                             u64 len, u64 *found)
1636 {
1637         struct btrfs_key key;
1638         struct btrfs_path path;
1639         struct extent_buffer *leaf;
1640         int ret;
1641         size_t size;
1642         *found = 0;
1643         u64 csum_end;
1644         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1645
1646         btrfs_init_path(&path);
1647
1648         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1649         key.offset = start;
1650         key.type = BTRFS_EXTENT_CSUM_KEY;
1651
1652         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1653                                 &key, &path, 0, 0);
1654         if (ret < 0)
1655                 goto out;
1656         if (ret > 0 && path.slots[0] > 0) {
1657                 leaf = path.nodes[0];
1658                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1659                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1660                     key.type == BTRFS_EXTENT_CSUM_KEY)
1661                         path.slots[0]--;
1662         }
1663
1664         while (len > 0) {
1665                 leaf = path.nodes[0];
1666                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1667                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1668                         if (ret > 0)
1669                                 break;
1670                         else if (ret < 0)
1671                                 goto out;
1672                         leaf = path.nodes[0];
1673                 }
1674
1675                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1676                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1677                     key.type != BTRFS_EXTENT_CSUM_KEY)
1678                         break;
1679
1680                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1681                 if (key.offset >= start + len)
1682                         break;
1683
1684                 if (key.offset > start)
1685                         start = key.offset;
1686
1687                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1688                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1689                 if (csum_end > start) {
1690                         size = min(csum_end - start, len);
1691                         len -= size;
1692                         start += size;
1693                         *found += size;
1694                 }
1695
1696                 path.slots[0]++;
1697         }
1698 out:
1699         btrfs_release_path(&path);
1700         if (ret < 0)
1701                 return ret;
1702         return 0;
1703 }
1704
1705 static int process_file_extent(struct btrfs_root *root,
1706                                 struct extent_buffer *eb,
1707                                 int slot, struct btrfs_key *key,
1708                                 struct shared_node *active_node)
1709 {
1710         struct inode_record *rec;
1711         struct btrfs_file_extent_item *fi;
1712         u64 num_bytes = 0;
1713         u64 disk_bytenr = 0;
1714         u64 extent_offset = 0;
1715         u64 mask = root->sectorsize - 1;
1716         int extent_type;
1717         int ret;
1718
1719         rec = active_node->current;
1720         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1721         rec->found_file_extent = 1;
1722
1723         if (rec->extent_start == (u64)-1) {
1724                 rec->extent_start = key->offset;
1725                 rec->extent_end = key->offset;
1726         }
1727
1728         if (rec->extent_end > key->offset)
1729                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1730         else if (rec->extent_end < key->offset) {
1731                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1732                                            key->offset - rec->extent_end);
1733                 if (ret < 0)
1734                         return ret;
1735         }
1736
1737         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1738         extent_type = btrfs_file_extent_type(eb, fi);
1739
1740         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1741                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1742                 if (num_bytes == 0)
1743                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1744                 rec->found_size += num_bytes;
1745                 num_bytes = (num_bytes + mask) & ~mask;
1746         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1747                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1748                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1749                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1750                 extent_offset = btrfs_file_extent_offset(eb, fi);
1751                 if (num_bytes == 0 || (num_bytes & mask))
1752                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1753                 if (num_bytes + extent_offset >
1754                     btrfs_file_extent_ram_bytes(eb, fi))
1755                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1756                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1757                     (btrfs_file_extent_compression(eb, fi) ||
1758                      btrfs_file_extent_encryption(eb, fi) ||
1759                      btrfs_file_extent_other_encoding(eb, fi)))
1760                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1761                 if (disk_bytenr > 0)
1762                         rec->found_size += num_bytes;
1763         } else {
1764                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1765         }
1766         rec->extent_end = key->offset + num_bytes;
1767
1768         /*
1769          * The data reloc tree will copy full extents into its inode and then
1770          * copy the corresponding csums.  Because the extent it copied could be
1771          * a preallocated extent that hasn't been written to yet there may be no
1772          * csums to copy, ergo we won't have csums for our file extent.  This is
1773          * ok so just don't bother checking csums if the inode belongs to the
1774          * data reloc tree.
1775          */
1776         if (disk_bytenr > 0 &&
1777             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1778                 u64 found;
1779                 if (btrfs_file_extent_compression(eb, fi))
1780                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1781                 else
1782                         disk_bytenr += extent_offset;
1783
1784                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1785                 if (ret < 0)
1786                         return ret;
1787                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1788                         if (found > 0)
1789                                 rec->found_csum_item = 1;
1790                         if (found < num_bytes)
1791                                 rec->some_csum_missing = 1;
1792                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1793                         if (found > 0)
1794                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1795                 }
1796         }
1797         return 0;
1798 }
1799
1800 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1801                             struct walk_control *wc)
1802 {
1803         struct btrfs_key key;
1804         u32 nritems;
1805         int i;
1806         int ret = 0;
1807         struct cache_tree *inode_cache;
1808         struct shared_node *active_node;
1809
1810         if (wc->root_level == wc->active_node &&
1811             btrfs_root_refs(&root->root_item) == 0)
1812                 return 0;
1813
1814         active_node = wc->nodes[wc->active_node];
1815         inode_cache = &active_node->inode_cache;
1816         nritems = btrfs_header_nritems(eb);
1817         for (i = 0; i < nritems; i++) {
1818                 btrfs_item_key_to_cpu(eb, &key, i);
1819
1820                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1821                         continue;
1822                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1823                         continue;
1824
1825                 if (active_node->current == NULL ||
1826                     active_node->current->ino < key.objectid) {
1827                         if (active_node->current) {
1828                                 active_node->current->checked = 1;
1829                                 maybe_free_inode_rec(inode_cache,
1830                                                      active_node->current);
1831                         }
1832                         active_node->current = get_inode_rec(inode_cache,
1833                                                              key.objectid, 1);
1834                         BUG_ON(IS_ERR(active_node->current));
1835                 }
1836                 switch (key.type) {
1837                 case BTRFS_DIR_ITEM_KEY:
1838                 case BTRFS_DIR_INDEX_KEY:
1839                         ret = process_dir_item(root, eb, i, &key, active_node);
1840                         break;
1841                 case BTRFS_INODE_REF_KEY:
1842                         ret = process_inode_ref(eb, i, &key, active_node);
1843                         break;
1844                 case BTRFS_INODE_EXTREF_KEY:
1845                         ret = process_inode_extref(eb, i, &key, active_node);
1846                         break;
1847                 case BTRFS_INODE_ITEM_KEY:
1848                         ret = process_inode_item(eb, i, &key, active_node);
1849                         break;
1850                 case BTRFS_EXTENT_DATA_KEY:
1851                         ret = process_file_extent(root, eb, i, &key,
1852                                                   active_node);
1853                         break;
1854                 default:
1855                         break;
1856                 };
1857         }
1858         return ret;
1859 }
1860
1861 struct node_refs {
1862         u64 bytenr[BTRFS_MAX_LEVEL];
1863         u64 refs[BTRFS_MAX_LEVEL];
1864         int need_check[BTRFS_MAX_LEVEL];
1865 };
1866
1867 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1868                              struct node_refs *nrefs, u64 level);
1869 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1870                             unsigned int ext_ref);
1871
1872 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1873                                struct node_refs *nrefs, int *level, int ext_ref)
1874 {
1875         struct extent_buffer *cur = path->nodes[0];
1876         struct btrfs_key key;
1877         u64 cur_bytenr;
1878         u32 nritems;
1879         u64 first_ino = 0;
1880         int root_level = btrfs_header_level(root->node);
1881         int i;
1882         int ret = 0; /* Final return value */
1883         int err = 0; /* Positive error bitmap */
1884
1885         cur_bytenr = cur->start;
1886
1887         /* skip to first inode item or the first inode number change */
1888         nritems = btrfs_header_nritems(cur);
1889         for (i = 0; i < nritems; i++) {
1890                 btrfs_item_key_to_cpu(cur, &key, i);
1891                 if (i == 0)
1892                         first_ino = key.objectid;
1893                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1894                     (first_ino && first_ino != key.objectid))
1895                         break;
1896         }
1897         if (i == nritems) {
1898                 path->slots[0] = nritems;
1899                 return 0;
1900         }
1901         path->slots[0] = i;
1902
1903 again:
1904         err |= check_inode_item(root, path, ext_ref);
1905
1906         if (err & LAST_ITEM)
1907                 goto out;
1908
1909         /* still have inode items in thie leaf */
1910         if (cur->start == cur_bytenr)
1911                 goto again;
1912
1913         /*
1914          * we have switched to another leaf, above nodes may
1915          * have changed, here walk down the path, if a node
1916          * or leaf is shared, check whether we can skip this
1917          * node or leaf.
1918          */
1919         for (i = root_level; i >= 0; i--) {
1920                 if (path->nodes[i]->start == nrefs->bytenr[i])
1921                         continue;
1922
1923                 ret = update_nodes_refs(root,
1924                                 path->nodes[i]->start,
1925                                 nrefs, i);
1926                 if (ret)
1927                         goto out;
1928
1929                 if (!nrefs->need_check[i]) {
1930                         *level += 1;
1931                         break;
1932                 }
1933         }
1934
1935         for (i = 0; i < *level; i++) {
1936                 free_extent_buffer(path->nodes[i]);
1937                 path->nodes[i] = NULL;
1938         }
1939 out:
1940         err &= ~LAST_ITEM;
1941         /*
1942          * Convert any error bitmap to -EIO, as we should avoid
1943          * mixing positive and negative return value to represent
1944          * error
1945          */
1946         if (err && !ret)
1947                 ret = -EIO;
1948         return ret;
1949 }
1950
1951 static void reada_walk_down(struct btrfs_root *root,
1952                             struct extent_buffer *node, int slot)
1953 {
1954         u64 bytenr;
1955         u64 ptr_gen;
1956         u32 nritems;
1957         u32 blocksize;
1958         int i;
1959         int level;
1960
1961         level = btrfs_header_level(node);
1962         if (level != 1)
1963                 return;
1964
1965         nritems = btrfs_header_nritems(node);
1966         blocksize = root->nodesize;
1967         for (i = slot; i < nritems; i++) {
1968                 bytenr = btrfs_node_blockptr(node, i);
1969                 ptr_gen = btrfs_node_ptr_generation(node, i);
1970                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1971         }
1972 }
1973
1974 /*
1975  * Check the child node/leaf by the following condition:
1976  * 1. the first item key of the node/leaf should be the same with the one
1977  *    in parent.
1978  * 2. block in parent node should match the child node/leaf.
1979  * 3. generation of parent node and child's header should be consistent.
1980  *
1981  * Or the child node/leaf pointed by the key in parent is not valid.
1982  *
1983  * We hope to check leaf owner too, but since subvol may share leaves,
1984  * which makes leaf owner check not so strong, key check should be
1985  * sufficient enough for that case.
1986  */
1987 static int check_child_node(struct btrfs_root *root,
1988                             struct extent_buffer *parent, int slot,
1989                             struct extent_buffer *child)
1990 {
1991         struct btrfs_key parent_key;
1992         struct btrfs_key child_key;
1993         int ret = 0;
1994
1995         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1996         if (btrfs_header_level(child) == 0)
1997                 btrfs_item_key_to_cpu(child, &child_key, 0);
1998         else
1999                 btrfs_node_key_to_cpu(child, &child_key, 0);
2000
2001         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2002                 ret = -EINVAL;
2003                 fprintf(stderr,
2004                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2005                         parent_key.objectid, parent_key.type, parent_key.offset,
2006                         child_key.objectid, child_key.type, child_key.offset);
2007         }
2008         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2009                 ret = -EINVAL;
2010                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2011                         btrfs_node_blockptr(parent, slot),
2012                         btrfs_header_bytenr(child));
2013         }
2014         if (btrfs_node_ptr_generation(parent, slot) !=
2015             btrfs_header_generation(child)) {
2016                 ret = -EINVAL;
2017                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2018                         btrfs_header_generation(child),
2019                         btrfs_node_ptr_generation(parent, slot));
2020         }
2021         return ret;
2022 }
2023
2024 /*
2025  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2026  * in every fs or file tree check. Here we find its all root ids, and only check
2027  * it in the fs or file tree which has the smallest root id.
2028  */
2029 static int need_check(struct btrfs_root *root, struct ulist *roots)
2030 {
2031         struct rb_node *node;
2032         struct ulist_node *u;
2033
2034         if (roots->nnodes == 1)
2035                 return 1;
2036
2037         node = rb_first(&roots->root);
2038         u = rb_entry(node, struct ulist_node, rb_node);
2039         /*
2040          * current root id is not smallest, we skip it and let it be checked
2041          * in the fs or file tree who hash the smallest root id.
2042          */
2043         if (root->objectid != u->val)
2044                 return 0;
2045
2046         return 1;
2047 }
2048
2049 /*
2050  * for a tree node or leaf, we record its reference count, so later if we still
2051  * process this node or leaf, don't need to compute its reference count again.
2052  */
2053 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2054                              struct node_refs *nrefs, u64 level)
2055 {
2056         int check, ret;
2057         u64 refs;
2058         struct ulist *roots;
2059
2060         if (nrefs->bytenr[level] != bytenr) {
2061                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2062                                        level, 1, &refs, NULL);
2063                 if (ret < 0)
2064                         return ret;
2065
2066                 nrefs->bytenr[level] = bytenr;
2067                 nrefs->refs[level] = refs;
2068                 if (refs > 1) {
2069                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2070                                                    0, &roots);
2071                         if (ret)
2072                                 return -EIO;
2073
2074                         check = need_check(root, roots);
2075                         ulist_free(roots);
2076                         nrefs->need_check[level] = check;
2077                 } else {
2078                         nrefs->need_check[level] = 1;
2079                 }
2080         }
2081
2082         return 0;
2083 }
2084
2085 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2086                           struct walk_control *wc, int *level,
2087                           struct node_refs *nrefs)
2088 {
2089         enum btrfs_tree_block_status status;
2090         u64 bytenr;
2091         u64 ptr_gen;
2092         struct extent_buffer *next;
2093         struct extent_buffer *cur;
2094         u32 blocksize;
2095         int ret, err = 0;
2096         u64 refs;
2097
2098         WARN_ON(*level < 0);
2099         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2100
2101         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2102                 refs = nrefs->refs[*level];
2103                 ret = 0;
2104         } else {
2105                 ret = btrfs_lookup_extent_info(NULL, root,
2106                                        path->nodes[*level]->start,
2107                                        *level, 1, &refs, NULL);
2108                 if (ret < 0) {
2109                         err = ret;
2110                         goto out;
2111                 }
2112                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2113                 nrefs->refs[*level] = refs;
2114         }
2115
2116         if (refs > 1) {
2117                 ret = enter_shared_node(root, path->nodes[*level]->start,
2118                                         refs, wc, *level);
2119                 if (ret > 0) {
2120                         err = ret;
2121                         goto out;
2122                 }
2123         }
2124
2125         while (*level >= 0) {
2126                 WARN_ON(*level < 0);
2127                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2128                 cur = path->nodes[*level];
2129
2130                 if (btrfs_header_level(cur) != *level)
2131                         WARN_ON(1);
2132
2133                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2134                         break;
2135                 if (*level == 0) {
2136                         ret = process_one_leaf(root, cur, wc);
2137                         if (ret < 0)
2138                                 err = ret;
2139                         break;
2140                 }
2141                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2142                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2143                 blocksize = root->nodesize;
2144
2145                 if (bytenr == nrefs->bytenr[*level - 1]) {
2146                         refs = nrefs->refs[*level - 1];
2147                 } else {
2148                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2149                                         *level - 1, 1, &refs, NULL);
2150                         if (ret < 0) {
2151                                 refs = 0;
2152                         } else {
2153                                 nrefs->bytenr[*level - 1] = bytenr;
2154                                 nrefs->refs[*level - 1] = refs;
2155                         }
2156                 }
2157
2158                 if (refs > 1) {
2159                         ret = enter_shared_node(root, bytenr, refs,
2160                                                 wc, *level - 1);
2161                         if (ret > 0) {
2162                                 path->slots[*level]++;
2163                                 continue;
2164                         }
2165                 }
2166
2167                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2168                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2169                         free_extent_buffer(next);
2170                         reada_walk_down(root, cur, path->slots[*level]);
2171                         next = read_tree_block(root, bytenr, blocksize,
2172                                                ptr_gen);
2173                         if (!extent_buffer_uptodate(next)) {
2174                                 struct btrfs_key node_key;
2175
2176                                 btrfs_node_key_to_cpu(path->nodes[*level],
2177                                                       &node_key,
2178                                                       path->slots[*level]);
2179                                 btrfs_add_corrupt_extent_record(root->fs_info,
2180                                                 &node_key,
2181                                                 path->nodes[*level]->start,
2182                                                 root->nodesize, *level);
2183                                 err = -EIO;
2184                                 goto out;
2185                         }
2186                 }
2187
2188                 ret = check_child_node(root, cur, path->slots[*level], next);
2189                 if (ret) {
2190                         err = ret;
2191                         goto out;
2192                 }
2193
2194                 if (btrfs_is_leaf(next))
2195                         status = btrfs_check_leaf(root, NULL, next);
2196                 else
2197                         status = btrfs_check_node(root, NULL, next);
2198                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2199                         free_extent_buffer(next);
2200                         err = -EIO;
2201                         goto out;
2202                 }
2203
2204                 *level = *level - 1;
2205                 free_extent_buffer(path->nodes[*level]);
2206                 path->nodes[*level] = next;
2207                 path->slots[*level] = 0;
2208         }
2209 out:
2210         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2211         return err;
2212 }
2213
2214 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2215                             unsigned int ext_ref);
2216
2217 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2218                              int *level, struct node_refs *nrefs, int ext_ref)
2219 {
2220         enum btrfs_tree_block_status status;
2221         u64 bytenr;
2222         u64 ptr_gen;
2223         struct extent_buffer *next;
2224         struct extent_buffer *cur;
2225         u32 blocksize;
2226         int ret;
2227
2228         WARN_ON(*level < 0);
2229         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2230
2231         ret = update_nodes_refs(root, path->nodes[*level]->start,
2232                                 nrefs, *level);
2233         if (ret < 0)
2234                 return ret;
2235
2236         while (*level >= 0) {
2237                 WARN_ON(*level < 0);
2238                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2239                 cur = path->nodes[*level];
2240
2241                 if (btrfs_header_level(cur) != *level)
2242                         WARN_ON(1);
2243
2244                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2245                         break;
2246                 /* Don't forgot to check leaf/node validation */
2247                 if (*level == 0) {
2248                         ret = btrfs_check_leaf(root, NULL, cur);
2249                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2250                                 ret = -EIO;
2251                                 break;
2252                         }
2253                         ret = process_one_leaf_v2(root, path, nrefs,
2254                                                   level, ext_ref);
2255                         break;
2256                 } else {
2257                         ret = btrfs_check_node(root, NULL, cur);
2258                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2259                                 ret = -EIO;
2260                                 break;
2261                         }
2262                 }
2263                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2264                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2265                 blocksize = root->nodesize;
2266
2267                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2268                 if (ret)
2269                         break;
2270                 if (!nrefs->need_check[*level - 1]) {
2271                         path->slots[*level]++;
2272                         continue;
2273                 }
2274
2275                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2276                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2277                         free_extent_buffer(next);
2278                         reada_walk_down(root, cur, path->slots[*level]);
2279                         next = read_tree_block(root, bytenr, blocksize,
2280                                                ptr_gen);
2281                         if (!extent_buffer_uptodate(next)) {
2282                                 struct btrfs_key node_key;
2283
2284                                 btrfs_node_key_to_cpu(path->nodes[*level],
2285                                                       &node_key,
2286                                                       path->slots[*level]);
2287                                 btrfs_add_corrupt_extent_record(root->fs_info,
2288                                                 &node_key,
2289                                                 path->nodes[*level]->start,
2290                                                 root->nodesize, *level);
2291                                 ret = -EIO;
2292                                 break;
2293                         }
2294                 }
2295
2296                 ret = check_child_node(root, cur, path->slots[*level], next);
2297                 if (ret < 0) 
2298                         break;
2299
2300                 if (btrfs_is_leaf(next))
2301                         status = btrfs_check_leaf(root, NULL, next);
2302                 else
2303                         status = btrfs_check_node(root, NULL, next);
2304                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2305                         free_extent_buffer(next);
2306                         ret = -EIO;
2307                         break;
2308                 }
2309
2310                 *level = *level - 1;
2311                 free_extent_buffer(path->nodes[*level]);
2312                 path->nodes[*level] = next;
2313                 path->slots[*level] = 0;
2314         }
2315         return ret;
2316 }
2317
2318 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2319                         struct walk_control *wc, int *level)
2320 {
2321         int i;
2322         struct extent_buffer *leaf;
2323
2324         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2325                 leaf = path->nodes[i];
2326                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2327                         path->slots[i]++;
2328                         *level = i;
2329                         return 0;
2330                 } else {
2331                         free_extent_buffer(path->nodes[*level]);
2332                         path->nodes[*level] = NULL;
2333                         BUG_ON(*level > wc->active_node);
2334                         if (*level == wc->active_node)
2335                                 leave_shared_node(root, wc, *level);
2336                         *level = i + 1;
2337                 }
2338         }
2339         return 1;
2340 }
2341
2342 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2343                            int *level)
2344 {
2345         int i;
2346         struct extent_buffer *leaf;
2347
2348         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2349                 leaf = path->nodes[i];
2350                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2351                         path->slots[i]++;
2352                         *level = i;
2353                         return 0;
2354                 } else {
2355                         free_extent_buffer(path->nodes[*level]);
2356                         path->nodes[*level] = NULL;
2357                         *level = i + 1;
2358                 }
2359         }
2360         return 1;
2361 }
2362
2363 static int check_root_dir(struct inode_record *rec)
2364 {
2365         struct inode_backref *backref;
2366         int ret = -1;
2367
2368         if (!rec->found_inode_item || rec->errors)
2369                 goto out;
2370         if (rec->nlink != 1 || rec->found_link != 0)
2371                 goto out;
2372         if (list_empty(&rec->backrefs))
2373                 goto out;
2374         backref = to_inode_backref(rec->backrefs.next);
2375         if (!backref->found_inode_ref)
2376                 goto out;
2377         if (backref->index != 0 || backref->namelen != 2 ||
2378             memcmp(backref->name, "..", 2))
2379                 goto out;
2380         if (backref->found_dir_index || backref->found_dir_item)
2381                 goto out;
2382         ret = 0;
2383 out:
2384         return ret;
2385 }
2386
2387 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2388                               struct btrfs_root *root, struct btrfs_path *path,
2389                               struct inode_record *rec)
2390 {
2391         struct btrfs_inode_item *ei;
2392         struct btrfs_key key;
2393         int ret;
2394
2395         key.objectid = rec->ino;
2396         key.type = BTRFS_INODE_ITEM_KEY;
2397         key.offset = (u64)-1;
2398
2399         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2400         if (ret < 0)
2401                 goto out;
2402         if (ret) {
2403                 if (!path->slots[0]) {
2404                         ret = -ENOENT;
2405                         goto out;
2406                 }
2407                 path->slots[0]--;
2408                 ret = 0;
2409         }
2410         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2411         if (key.objectid != rec->ino) {
2412                 ret = -ENOENT;
2413                 goto out;
2414         }
2415
2416         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2417                             struct btrfs_inode_item);
2418         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2419         btrfs_mark_buffer_dirty(path->nodes[0]);
2420         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2421         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2422                root->root_key.objectid);
2423 out:
2424         btrfs_release_path(path);
2425         return ret;
2426 }
2427
2428 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2429                                     struct btrfs_root *root,
2430                                     struct btrfs_path *path,
2431                                     struct inode_record *rec)
2432 {
2433         int ret;
2434
2435         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2436         btrfs_release_path(path);
2437         if (!ret)
2438                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2439         return ret;
2440 }
2441
2442 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2443                                struct btrfs_root *root,
2444                                struct btrfs_path *path,
2445                                struct inode_record *rec)
2446 {
2447         struct btrfs_inode_item *ei;
2448         struct btrfs_key key;
2449         int ret = 0;
2450
2451         key.objectid = rec->ino;
2452         key.type = BTRFS_INODE_ITEM_KEY;
2453         key.offset = 0;
2454
2455         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2456         if (ret) {
2457                 if (ret > 0)
2458                         ret = -ENOENT;
2459                 goto out;
2460         }
2461
2462         /* Since ret == 0, no need to check anything */
2463         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2464                             struct btrfs_inode_item);
2465         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2466         btrfs_mark_buffer_dirty(path->nodes[0]);
2467         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2468         printf("reset nbytes for ino %llu root %llu\n",
2469                rec->ino, root->root_key.objectid);
2470 out:
2471         btrfs_release_path(path);
2472         return ret;
2473 }
2474
2475 static int add_missing_dir_index(struct btrfs_root *root,
2476                                  struct cache_tree *inode_cache,
2477                                  struct inode_record *rec,
2478                                  struct inode_backref *backref)
2479 {
2480         struct btrfs_path path;
2481         struct btrfs_trans_handle *trans;
2482         struct btrfs_dir_item *dir_item;
2483         struct extent_buffer *leaf;
2484         struct btrfs_key key;
2485         struct btrfs_disk_key disk_key;
2486         struct inode_record *dir_rec;
2487         unsigned long name_ptr;
2488         u32 data_size = sizeof(*dir_item) + backref->namelen;
2489         int ret;
2490
2491         trans = btrfs_start_transaction(root, 1);
2492         if (IS_ERR(trans))
2493                 return PTR_ERR(trans);
2494
2495         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2496                 (unsigned long long)rec->ino);
2497
2498         btrfs_init_path(&path);
2499         key.objectid = backref->dir;
2500         key.type = BTRFS_DIR_INDEX_KEY;
2501         key.offset = backref->index;
2502         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2503         BUG_ON(ret);
2504
2505         leaf = path.nodes[0];
2506         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2507
2508         disk_key.objectid = cpu_to_le64(rec->ino);
2509         disk_key.type = BTRFS_INODE_ITEM_KEY;
2510         disk_key.offset = 0;
2511
2512         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2513         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2514         btrfs_set_dir_data_len(leaf, dir_item, 0);
2515         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2516         name_ptr = (unsigned long)(dir_item + 1);
2517         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2518         btrfs_mark_buffer_dirty(leaf);
2519         btrfs_release_path(&path);
2520         btrfs_commit_transaction(trans, root);
2521
2522         backref->found_dir_index = 1;
2523         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2524         BUG_ON(IS_ERR(dir_rec));
2525         if (!dir_rec)
2526                 return 0;
2527         dir_rec->found_size += backref->namelen;
2528         if (dir_rec->found_size == dir_rec->isize &&
2529             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2530                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2531         if (dir_rec->found_size != dir_rec->isize)
2532                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2533
2534         return 0;
2535 }
2536
2537 static int delete_dir_index(struct btrfs_root *root,
2538                             struct cache_tree *inode_cache,
2539                             struct inode_record *rec,
2540                             struct inode_backref *backref)
2541 {
2542         struct btrfs_trans_handle *trans;
2543         struct btrfs_dir_item *di;
2544         struct btrfs_path path;
2545         int ret = 0;
2546
2547         trans = btrfs_start_transaction(root, 1);
2548         if (IS_ERR(trans))
2549                 return PTR_ERR(trans);
2550
2551         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2552                 (unsigned long long)backref->dir,
2553                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2554                 (unsigned long long)root->objectid);
2555
2556         btrfs_init_path(&path);
2557         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2558                                     backref->name, backref->namelen,
2559                                     backref->index, -1);
2560         if (IS_ERR(di)) {
2561                 ret = PTR_ERR(di);
2562                 btrfs_release_path(&path);
2563                 btrfs_commit_transaction(trans, root);
2564                 if (ret == -ENOENT)
2565                         return 0;
2566                 return ret;
2567         }
2568
2569         if (!di)
2570                 ret = btrfs_del_item(trans, root, &path);
2571         else
2572                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2573         BUG_ON(ret);
2574         btrfs_release_path(&path);
2575         btrfs_commit_transaction(trans, root);
2576         return ret;
2577 }
2578
2579 static int create_inode_item(struct btrfs_root *root,
2580                              struct inode_record *rec,
2581                              struct inode_backref *backref, int root_dir)
2582 {
2583         struct btrfs_trans_handle *trans;
2584         struct btrfs_inode_item inode_item;
2585         time_t now = time(NULL);
2586         int ret;
2587
2588         trans = btrfs_start_transaction(root, 1);
2589         if (IS_ERR(trans)) {
2590                 ret = PTR_ERR(trans);
2591                 return ret;
2592         }
2593
2594         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2595                 "be incomplete, please check permissions and content after "
2596                 "the fsck completes.\n", (unsigned long long)root->objectid,
2597                 (unsigned long long)rec->ino);
2598
2599         memset(&inode_item, 0, sizeof(inode_item));
2600         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2601         if (root_dir)
2602                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2603         else
2604                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2605         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2606         if (rec->found_dir_item) {
2607                 if (rec->found_file_extent)
2608                         fprintf(stderr, "root %llu inode %llu has both a dir "
2609                                 "item and extents, unsure if it is a dir or a "
2610                                 "regular file so setting it as a directory\n",
2611                                 (unsigned long long)root->objectid,
2612                                 (unsigned long long)rec->ino);
2613                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2614                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2615         } else if (!rec->found_dir_item) {
2616                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2617                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2618         }
2619         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2620         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2621         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2622         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2623         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2624         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2625         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2626         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2627
2628         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2629         BUG_ON(ret);
2630         btrfs_commit_transaction(trans, root);
2631         return 0;
2632 }
2633
2634 static int repair_inode_backrefs(struct btrfs_root *root,
2635                                  struct inode_record *rec,
2636                                  struct cache_tree *inode_cache,
2637                                  int delete)
2638 {
2639         struct inode_backref *tmp, *backref;
2640         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2641         int ret = 0;
2642         int repaired = 0;
2643
2644         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2645                 if (!delete && rec->ino == root_dirid) {
2646                         if (!rec->found_inode_item) {
2647                                 ret = create_inode_item(root, rec, backref, 1);
2648                                 if (ret)
2649                                         break;
2650                                 repaired++;
2651                         }
2652                 }
2653
2654                 /* Index 0 for root dir's are special, don't mess with it */
2655                 if (rec->ino == root_dirid && backref->index == 0)
2656                         continue;
2657
2658                 if (delete &&
2659                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2660                      (backref->found_dir_index && backref->found_inode_ref &&
2661                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2662                         ret = delete_dir_index(root, inode_cache, rec, backref);
2663                         if (ret)
2664                                 break;
2665                         repaired++;
2666                         list_del(&backref->list);
2667                         free(backref);
2668                 }
2669
2670                 if (!delete && !backref->found_dir_index &&
2671                     backref->found_dir_item && backref->found_inode_ref) {
2672                         ret = add_missing_dir_index(root, inode_cache, rec,
2673                                                     backref);
2674                         if (ret)
2675                                 break;
2676                         repaired++;
2677                         if (backref->found_dir_item &&
2678                             backref->found_dir_index &&
2679                             backref->found_dir_index) {
2680                                 if (!backref->errors &&
2681                                     backref->found_inode_ref) {
2682                                         list_del(&backref->list);
2683                                         free(backref);
2684                                 }
2685                         }
2686                 }
2687
2688                 if (!delete && (!backref->found_dir_index &&
2689                                 !backref->found_dir_item &&
2690                                 backref->found_inode_ref)) {
2691                         struct btrfs_trans_handle *trans;
2692                         struct btrfs_key location;
2693
2694                         ret = check_dir_conflict(root, backref->name,
2695                                                  backref->namelen,
2696                                                  backref->dir,
2697                                                  backref->index);
2698                         if (ret) {
2699                                 /*
2700                                  * let nlink fixing routine to handle it,
2701                                  * which can do it better.
2702                                  */
2703                                 ret = 0;
2704                                 break;
2705                         }
2706                         location.objectid = rec->ino;
2707                         location.type = BTRFS_INODE_ITEM_KEY;
2708                         location.offset = 0;
2709
2710                         trans = btrfs_start_transaction(root, 1);
2711                         if (IS_ERR(trans)) {
2712                                 ret = PTR_ERR(trans);
2713                                 break;
2714                         }
2715                         fprintf(stderr, "adding missing dir index/item pair "
2716                                 "for inode %llu\n",
2717                                 (unsigned long long)rec->ino);
2718                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2719                                                     backref->namelen,
2720                                                     backref->dir, &location,
2721                                                     imode_to_type(rec->imode),
2722                                                     backref->index);
2723                         BUG_ON(ret);
2724                         btrfs_commit_transaction(trans, root);
2725                         repaired++;
2726                 }
2727
2728                 if (!delete && (backref->found_inode_ref &&
2729                                 backref->found_dir_index &&
2730                                 backref->found_dir_item &&
2731                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2732                                 !rec->found_inode_item)) {
2733                         ret = create_inode_item(root, rec, backref, 0);
2734                         if (ret)
2735                                 break;
2736                         repaired++;
2737                 }
2738
2739         }
2740         return ret ? ret : repaired;
2741 }
2742
2743 /*
2744  * To determine the file type for nlink/inode_item repair
2745  *
2746  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2747  * Return -ENOENT if file type is not found.
2748  */
2749 static int find_file_type(struct inode_record *rec, u8 *type)
2750 {
2751         struct inode_backref *backref;
2752
2753         /* For inode item recovered case */
2754         if (rec->found_inode_item) {
2755                 *type = imode_to_type(rec->imode);
2756                 return 0;
2757         }
2758
2759         list_for_each_entry(backref, &rec->backrefs, list) {
2760                 if (backref->found_dir_index || backref->found_dir_item) {
2761                         *type = backref->filetype;
2762                         return 0;
2763                 }
2764         }
2765         return -ENOENT;
2766 }
2767
2768 /*
2769  * To determine the file name for nlink repair
2770  *
2771  * Return 0 if file name is found, set name and namelen.
2772  * Return -ENOENT if file name is not found.
2773  */
2774 static int find_file_name(struct inode_record *rec,
2775                           char *name, int *namelen)
2776 {
2777         struct inode_backref *backref;
2778
2779         list_for_each_entry(backref, &rec->backrefs, list) {
2780                 if (backref->found_dir_index || backref->found_dir_item ||
2781                     backref->found_inode_ref) {
2782                         memcpy(name, backref->name, backref->namelen);
2783                         *namelen = backref->namelen;
2784                         return 0;
2785                 }
2786         }
2787         return -ENOENT;
2788 }
2789
2790 /* Reset the nlink of the inode to the correct one */
2791 static int reset_nlink(struct btrfs_trans_handle *trans,
2792                        struct btrfs_root *root,
2793                        struct btrfs_path *path,
2794                        struct inode_record *rec)
2795 {
2796         struct inode_backref *backref;
2797         struct inode_backref *tmp;
2798         struct btrfs_key key;
2799         struct btrfs_inode_item *inode_item;
2800         int ret = 0;
2801
2802         /* We don't believe this either, reset it and iterate backref */
2803         rec->found_link = 0;
2804
2805         /* Remove all backref including the valid ones */
2806         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2807                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2808                                    backref->index, backref->name,
2809                                    backref->namelen, 0);
2810                 if (ret < 0)
2811                         goto out;
2812
2813                 /* remove invalid backref, so it won't be added back */
2814                 if (!(backref->found_dir_index &&
2815                       backref->found_dir_item &&
2816                       backref->found_inode_ref)) {
2817                         list_del(&backref->list);
2818                         free(backref);
2819                 } else {
2820                         rec->found_link++;
2821                 }
2822         }
2823
2824         /* Set nlink to 0 */
2825         key.objectid = rec->ino;
2826         key.type = BTRFS_INODE_ITEM_KEY;
2827         key.offset = 0;
2828         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2829         if (ret < 0)
2830                 goto out;
2831         if (ret > 0) {
2832                 ret = -ENOENT;
2833                 goto out;
2834         }
2835         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2836                                     struct btrfs_inode_item);
2837         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2838         btrfs_mark_buffer_dirty(path->nodes[0]);
2839         btrfs_release_path(path);
2840
2841         /*
2842          * Add back valid inode_ref/dir_item/dir_index,
2843          * add_link() will handle the nlink inc, so new nlink must be correct
2844          */
2845         list_for_each_entry(backref, &rec->backrefs, list) {
2846                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2847                                      backref->name, backref->namelen,
2848                                      backref->filetype, &backref->index, 1);
2849                 if (ret < 0)
2850                         goto out;
2851         }
2852 out:
2853         btrfs_release_path(path);
2854         return ret;
2855 }
2856
2857 static int get_highest_inode(struct btrfs_trans_handle *trans,
2858                                 struct btrfs_root *root,
2859                                 struct btrfs_path *path,
2860                                 u64 *highest_ino)
2861 {
2862         struct btrfs_key key, found_key;
2863         int ret;
2864
2865         btrfs_init_path(path);
2866         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2867         key.offset = -1;
2868         key.type = BTRFS_INODE_ITEM_KEY;
2869         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2870         if (ret == 1) {
2871                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2872                                 path->slots[0] - 1);
2873                 *highest_ino = found_key.objectid;
2874                 ret = 0;
2875         }
2876         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2877                 ret = -EOVERFLOW;
2878         btrfs_release_path(path);
2879         return ret;
2880 }
2881
2882 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2883                                struct btrfs_root *root,
2884                                struct btrfs_path *path,
2885                                struct inode_record *rec)
2886 {
2887         char *dir_name = "lost+found";
2888         char namebuf[BTRFS_NAME_LEN] = {0};
2889         u64 lost_found_ino;
2890         u32 mode = 0700;
2891         u8 type = 0;
2892         int namelen = 0;
2893         int name_recovered = 0;
2894         int type_recovered = 0;
2895         int ret = 0;
2896
2897         /*
2898          * Get file name and type first before these invalid inode ref
2899          * are deleted by remove_all_invalid_backref()
2900          */
2901         name_recovered = !find_file_name(rec, namebuf, &namelen);
2902         type_recovered = !find_file_type(rec, &type);
2903
2904         if (!name_recovered) {
2905                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2906                        rec->ino, rec->ino);
2907                 namelen = count_digits(rec->ino);
2908                 sprintf(namebuf, "%llu", rec->ino);
2909                 name_recovered = 1;
2910         }
2911         if (!type_recovered) {
2912                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2913                        rec->ino);
2914                 type = BTRFS_FT_REG_FILE;
2915                 type_recovered = 1;
2916         }
2917
2918         ret = reset_nlink(trans, root, path, rec);
2919         if (ret < 0) {
2920                 fprintf(stderr,
2921                         "Failed to reset nlink for inode %llu: %s\n",
2922                         rec->ino, strerror(-ret));
2923                 goto out;
2924         }
2925
2926         if (rec->found_link == 0) {
2927                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2928                 if (ret < 0)
2929                         goto out;
2930                 lost_found_ino++;
2931                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2932                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2933                                   mode);
2934                 if (ret < 0) {
2935                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2936                                 dir_name, strerror(-ret));
2937                         goto out;
2938                 }
2939                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2940                                      namebuf, namelen, type, NULL, 1);
2941                 /*
2942                  * Add ".INO" suffix several times to handle case where
2943                  * "FILENAME.INO" is already taken by another file.
2944                  */
2945                 while (ret == -EEXIST) {
2946                         /*
2947                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2948                          */
2949                         if (namelen + count_digits(rec->ino) + 1 >
2950                             BTRFS_NAME_LEN) {
2951                                 ret = -EFBIG;
2952                                 goto out;
2953                         }
2954                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2955                                  ".%llu", rec->ino);
2956                         namelen += count_digits(rec->ino) + 1;
2957                         ret = btrfs_add_link(trans, root, rec->ino,
2958                                              lost_found_ino, namebuf,
2959                                              namelen, type, NULL, 1);
2960                 }
2961                 if (ret < 0) {
2962                         fprintf(stderr,
2963                                 "Failed to link the inode %llu to %s dir: %s\n",
2964                                 rec->ino, dir_name, strerror(-ret));
2965                         goto out;
2966                 }
2967                 /*
2968                  * Just increase the found_link, don't actually add the
2969                  * backref. This will make things easier and this inode
2970                  * record will be freed after the repair is done.
2971                  * So fsck will not report problem about this inode.
2972                  */
2973                 rec->found_link++;
2974                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2975                        namelen, namebuf, dir_name);
2976         }
2977         printf("Fixed the nlink of inode %llu\n", rec->ino);
2978 out:
2979         /*
2980          * Clear the flag anyway, or we will loop forever for the same inode
2981          * as it will not be removed from the bad inode list and the dead loop
2982          * happens.
2983          */
2984         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2985         btrfs_release_path(path);
2986         return ret;
2987 }
2988
2989 /*
2990  * Check if there is any normal(reg or prealloc) file extent for given
2991  * ino.
2992  * This is used to determine the file type when neither its dir_index/item or
2993  * inode_item exists.
2994  *
2995  * This will *NOT* report error, if any error happens, just consider it does
2996  * not have any normal file extent.
2997  */
2998 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2999 {
3000         struct btrfs_path path;
3001         struct btrfs_key key;
3002         struct btrfs_key found_key;
3003         struct btrfs_file_extent_item *fi;
3004         u8 type;
3005         int ret = 0;
3006
3007         btrfs_init_path(&path);
3008         key.objectid = ino;
3009         key.type = BTRFS_EXTENT_DATA_KEY;
3010         key.offset = 0;
3011
3012         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3013         if (ret < 0) {
3014                 ret = 0;
3015                 goto out;
3016         }
3017         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3018                 ret = btrfs_next_leaf(root, &path);
3019                 if (ret) {
3020                         ret = 0;
3021                         goto out;
3022                 }
3023         }
3024         while (1) {
3025                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3026                                       path.slots[0]);
3027                 if (found_key.objectid != ino ||
3028                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3029                         break;
3030                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3031                                     struct btrfs_file_extent_item);
3032                 type = btrfs_file_extent_type(path.nodes[0], fi);
3033                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3034                         ret = 1;
3035                         goto out;
3036                 }
3037         }
3038 out:
3039         btrfs_release_path(&path);
3040         return ret;
3041 }
3042
3043 static u32 btrfs_type_to_imode(u8 type)
3044 {
3045         static u32 imode_by_btrfs_type[] = {
3046                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3047                 [BTRFS_FT_DIR]          = S_IFDIR,
3048                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3049                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3050                 [BTRFS_FT_FIFO]         = S_IFIFO,
3051                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3052                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3053         };
3054
3055         return imode_by_btrfs_type[(type)];
3056 }
3057
3058 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3059                                 struct btrfs_root *root,
3060                                 struct btrfs_path *path,
3061                                 struct inode_record *rec)
3062 {
3063         u8 filetype;
3064         u32 mode = 0700;
3065         int type_recovered = 0;
3066         int ret = 0;
3067
3068         printf("Trying to rebuild inode:%llu\n", rec->ino);
3069
3070         type_recovered = !find_file_type(rec, &filetype);
3071
3072         /*
3073          * Try to determine inode type if type not found.
3074          *
3075          * For found regular file extent, it must be FILE.
3076          * For found dir_item/index, it must be DIR.
3077          *
3078          * For undetermined one, use FILE as fallback.
3079          *
3080          * TODO:
3081          * 1. If found backref(inode_index/item is already handled) to it,
3082          *    it must be DIR.
3083          *    Need new inode-inode ref structure to allow search for that.
3084          */
3085         if (!type_recovered) {
3086                 if (rec->found_file_extent &&
3087                     find_normal_file_extent(root, rec->ino)) {
3088                         type_recovered = 1;
3089                         filetype = BTRFS_FT_REG_FILE;
3090                 } else if (rec->found_dir_item) {
3091                         type_recovered = 1;
3092                         filetype = BTRFS_FT_DIR;
3093                 } else if (!list_empty(&rec->orphan_extents)) {
3094                         type_recovered = 1;
3095                         filetype = BTRFS_FT_REG_FILE;
3096                 } else{
3097                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3098                                rec->ino);
3099                         type_recovered = 1;
3100                         filetype = BTRFS_FT_REG_FILE;
3101                 }
3102         }
3103
3104         ret = btrfs_new_inode(trans, root, rec->ino,
3105                               mode | btrfs_type_to_imode(filetype));
3106         if (ret < 0)
3107                 goto out;
3108
3109         /*
3110          * Here inode rebuild is done, we only rebuild the inode item,
3111          * don't repair the nlink(like move to lost+found).
3112          * That is the job of nlink repair.
3113          *
3114          * We just fill the record and return
3115          */
3116         rec->found_dir_item = 1;
3117         rec->imode = mode | btrfs_type_to_imode(filetype);
3118         rec->nlink = 0;
3119         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3120         /* Ensure the inode_nlinks repair function will be called */
3121         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3122 out:
3123         return ret;
3124 }
3125
3126 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3127                                       struct btrfs_root *root,
3128                                       struct btrfs_path *path,
3129                                       struct inode_record *rec)
3130 {
3131         struct orphan_data_extent *orphan;
3132         struct orphan_data_extent *tmp;
3133         int ret = 0;
3134
3135         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3136                 /*
3137                  * Check for conflicting file extents
3138                  *
3139                  * Here we don't know whether the extents is compressed or not,
3140                  * so we can only assume it not compressed nor data offset,
3141                  * and use its disk_len as extent length.
3142                  */
3143                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3144                                        orphan->offset, orphan->disk_len, 0);
3145                 btrfs_release_path(path);
3146                 if (ret < 0)
3147                         goto out;
3148                 if (!ret) {
3149                         fprintf(stderr,
3150                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3151                                 orphan->disk_bytenr, orphan->disk_len);
3152                         ret = btrfs_free_extent(trans,
3153                                         root->fs_info->extent_root,
3154                                         orphan->disk_bytenr, orphan->disk_len,
3155                                         0, root->objectid, orphan->objectid,
3156                                         orphan->offset);
3157                         if (ret < 0)
3158                                 goto out;
3159                 }
3160                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3161                                 orphan->offset, orphan->disk_bytenr,
3162                                 orphan->disk_len, orphan->disk_len);
3163                 if (ret < 0)
3164                         goto out;
3165
3166                 /* Update file size info */
3167                 rec->found_size += orphan->disk_len;
3168                 if (rec->found_size == rec->nbytes)
3169                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3170
3171                 /* Update the file extent hole info too */
3172                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3173                                            orphan->disk_len);
3174                 if (ret < 0)
3175                         goto out;
3176                 if (RB_EMPTY_ROOT(&rec->holes))
3177                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3178
3179                 list_del(&orphan->list);
3180                 free(orphan);
3181         }
3182         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3183 out:
3184         return ret;
3185 }
3186
3187 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3188                                         struct btrfs_root *root,
3189                                         struct btrfs_path *path,
3190                                         struct inode_record *rec)
3191 {
3192         struct rb_node *node;
3193         struct file_extent_hole *hole;
3194         int found = 0;
3195         int ret = 0;
3196
3197         node = rb_first(&rec->holes);
3198
3199         while (node) {
3200                 found = 1;
3201                 hole = rb_entry(node, struct file_extent_hole, node);
3202                 ret = btrfs_punch_hole(trans, root, rec->ino,
3203                                        hole->start, hole->len);
3204                 if (ret < 0)
3205                         goto out;
3206                 ret = del_file_extent_hole(&rec->holes, hole->start,
3207                                            hole->len);
3208                 if (ret < 0)
3209                         goto out;
3210                 if (RB_EMPTY_ROOT(&rec->holes))
3211                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3212                 node = rb_first(&rec->holes);
3213         }
3214         /* special case for a file losing all its file extent */
3215         if (!found) {
3216                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3217                                        round_up(rec->isize, root->sectorsize));
3218                 if (ret < 0)
3219                         goto out;
3220         }
3221         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3222                rec->ino, root->objectid);
3223 out:
3224         return ret;
3225 }
3226
3227 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3228 {
3229         struct btrfs_trans_handle *trans;
3230         struct btrfs_path path;
3231         int ret = 0;
3232
3233         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3234                              I_ERR_NO_ORPHAN_ITEM |
3235                              I_ERR_LINK_COUNT_WRONG |
3236                              I_ERR_NO_INODE_ITEM |
3237                              I_ERR_FILE_EXTENT_ORPHAN |
3238                              I_ERR_FILE_EXTENT_DISCOUNT|
3239                              I_ERR_FILE_NBYTES_WRONG)))
3240                 return rec->errors;
3241
3242         /*
3243          * For nlink repair, it may create a dir and add link, so
3244          * 2 for parent(256)'s dir_index and dir_item
3245          * 2 for lost+found dir's inode_item and inode_ref
3246          * 1 for the new inode_ref of the file
3247          * 2 for lost+found dir's dir_index and dir_item for the file
3248          */
3249         trans = btrfs_start_transaction(root, 7);
3250         if (IS_ERR(trans))
3251                 return PTR_ERR(trans);
3252
3253         btrfs_init_path(&path);
3254         if (rec->errors & I_ERR_NO_INODE_ITEM)
3255                 ret = repair_inode_no_item(trans, root, &path, rec);
3256         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3257                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3258         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3259                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3260         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3261                 ret = repair_inode_isize(trans, root, &path, rec);
3262         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3263                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3264         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3265                 ret = repair_inode_nlinks(trans, root, &path, rec);
3266         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3267                 ret = repair_inode_nbytes(trans, root, &path, rec);
3268         btrfs_commit_transaction(trans, root);
3269         btrfs_release_path(&path);
3270         return ret;
3271 }
3272
3273 static int check_inode_recs(struct btrfs_root *root,
3274                             struct cache_tree *inode_cache)
3275 {
3276         struct cache_extent *cache;
3277         struct ptr_node *node;
3278         struct inode_record *rec;
3279         struct inode_backref *backref;
3280         int stage = 0;
3281         int ret = 0;
3282         int err = 0;
3283         u64 error = 0;
3284         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3285
3286         if (btrfs_root_refs(&root->root_item) == 0) {
3287                 if (!cache_tree_empty(inode_cache))
3288                         fprintf(stderr, "warning line %d\n", __LINE__);
3289                 return 0;
3290         }
3291
3292         /*
3293          * We need to repair backrefs first because we could change some of the
3294          * errors in the inode recs.
3295          *
3296          * We also need to go through and delete invalid backrefs first and then
3297          * add the correct ones second.  We do this because we may get EEXIST
3298          * when adding back the correct index because we hadn't yet deleted the
3299          * invalid index.
3300          *
3301          * For example, if we were missing a dir index then the directories
3302          * isize would be wrong, so if we fixed the isize to what we thought it
3303          * would be and then fixed the backref we'd still have a invalid fs, so
3304          * we need to add back the dir index and then check to see if the isize
3305          * is still wrong.
3306          */
3307         while (stage < 3) {
3308                 stage++;
3309                 if (stage == 3 && !err)
3310                         break;
3311
3312                 cache = search_cache_extent(inode_cache, 0);
3313                 while (repair && cache) {
3314                         node = container_of(cache, struct ptr_node, cache);
3315                         rec = node->data;
3316                         cache = next_cache_extent(cache);
3317
3318                         /* Need to free everything up and rescan */
3319                         if (stage == 3) {
3320                                 remove_cache_extent(inode_cache, &node->cache);
3321                                 free(node);
3322                                 free_inode_rec(rec);
3323                                 continue;
3324                         }
3325
3326                         if (list_empty(&rec->backrefs))
3327                                 continue;
3328
3329                         ret = repair_inode_backrefs(root, rec, inode_cache,
3330                                                     stage == 1);
3331                         if (ret < 0) {
3332                                 err = ret;
3333                                 stage = 2;
3334                                 break;
3335                         } if (ret > 0) {
3336                                 err = -EAGAIN;
3337                         }
3338                 }
3339         }
3340         if (err)
3341                 return err;
3342
3343         rec = get_inode_rec(inode_cache, root_dirid, 0);
3344         BUG_ON(IS_ERR(rec));
3345         if (rec) {
3346                 ret = check_root_dir(rec);
3347                 if (ret) {
3348                         fprintf(stderr, "root %llu root dir %llu error\n",
3349                                 (unsigned long long)root->root_key.objectid,
3350                                 (unsigned long long)root_dirid);
3351                         print_inode_error(root, rec);
3352                         error++;
3353                 }
3354         } else {
3355                 if (repair) {
3356                         struct btrfs_trans_handle *trans;
3357
3358                         trans = btrfs_start_transaction(root, 1);
3359                         if (IS_ERR(trans)) {
3360                                 err = PTR_ERR(trans);
3361                                 return err;
3362                         }
3363
3364                         fprintf(stderr,
3365                                 "root %llu missing its root dir, recreating\n",
3366                                 (unsigned long long)root->objectid);
3367
3368                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3369                         BUG_ON(ret);
3370
3371                         btrfs_commit_transaction(trans, root);
3372                         return -EAGAIN;
3373                 }
3374
3375                 fprintf(stderr, "root %llu root dir %llu not found\n",
3376                         (unsigned long long)root->root_key.objectid,
3377                         (unsigned long long)root_dirid);
3378         }
3379
3380         while (1) {
3381                 cache = search_cache_extent(inode_cache, 0);
3382                 if (!cache)
3383                         break;
3384                 node = container_of(cache, struct ptr_node, cache);
3385                 rec = node->data;
3386                 remove_cache_extent(inode_cache, &node->cache);
3387                 free(node);
3388                 if (rec->ino == root_dirid ||
3389                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3390                         free_inode_rec(rec);
3391                         continue;
3392                 }
3393
3394                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3395                         ret = check_orphan_item(root, rec->ino);
3396                         if (ret == 0)
3397                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3398                         if (can_free_inode_rec(rec)) {
3399                                 free_inode_rec(rec);
3400                                 continue;
3401                         }
3402                 }
3403
3404                 if (!rec->found_inode_item)
3405                         rec->errors |= I_ERR_NO_INODE_ITEM;
3406                 if (rec->found_link != rec->nlink)
3407                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3408                 if (repair) {
3409                         ret = try_repair_inode(root, rec);
3410                         if (ret == 0 && can_free_inode_rec(rec)) {
3411                                 free_inode_rec(rec);
3412                                 continue;
3413                         }
3414                         ret = 0;
3415                 }
3416
3417                 if (!(repair && ret == 0))
3418                         error++;
3419                 print_inode_error(root, rec);
3420                 list_for_each_entry(backref, &rec->backrefs, list) {
3421                         if (!backref->found_dir_item)
3422                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3423                         if (!backref->found_dir_index)
3424                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3425                         if (!backref->found_inode_ref)
3426                                 backref->errors |= REF_ERR_NO_INODE_REF;
3427                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3428                                 " namelen %u name %s filetype %d errors %x",
3429                                 (unsigned long long)backref->dir,
3430                                 (unsigned long long)backref->index,
3431                                 backref->namelen, backref->name,
3432                                 backref->filetype, backref->errors);
3433                         print_ref_error(backref->errors);
3434                 }
3435                 free_inode_rec(rec);
3436         }
3437         return (error > 0) ? -1 : 0;
3438 }
3439
3440 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3441                                         u64 objectid)
3442 {
3443         struct cache_extent *cache;
3444         struct root_record *rec = NULL;
3445         int ret;
3446
3447         cache = lookup_cache_extent(root_cache, objectid, 1);
3448         if (cache) {
3449                 rec = container_of(cache, struct root_record, cache);
3450         } else {
3451                 rec = calloc(1, sizeof(*rec));
3452                 if (!rec)
3453                         return ERR_PTR(-ENOMEM);
3454                 rec->objectid = objectid;
3455                 INIT_LIST_HEAD(&rec->backrefs);
3456                 rec->cache.start = objectid;
3457                 rec->cache.size = 1;
3458
3459                 ret = insert_cache_extent(root_cache, &rec->cache);
3460                 if (ret)
3461                         return ERR_PTR(-EEXIST);
3462         }
3463         return rec;
3464 }
3465
3466 static struct root_backref *get_root_backref(struct root_record *rec,
3467                                              u64 ref_root, u64 dir, u64 index,
3468                                              const char *name, int namelen)
3469 {
3470         struct root_backref *backref;
3471
3472         list_for_each_entry(backref, &rec->backrefs, list) {
3473                 if (backref->ref_root != ref_root || backref->dir != dir ||
3474                     backref->namelen != namelen)
3475                         continue;
3476                 if (memcmp(name, backref->name, namelen))
3477                         continue;
3478                 return backref;
3479         }
3480
3481         backref = calloc(1, sizeof(*backref) + namelen + 1);
3482         if (!backref)
3483                 return NULL;
3484         backref->ref_root = ref_root;
3485         backref->dir = dir;
3486         backref->index = index;
3487         backref->namelen = namelen;
3488         memcpy(backref->name, name, namelen);
3489         backref->name[namelen] = '\0';
3490         list_add_tail(&backref->list, &rec->backrefs);
3491         return backref;
3492 }
3493
3494 static void free_root_record(struct cache_extent *cache)
3495 {
3496         struct root_record *rec;
3497         struct root_backref *backref;
3498
3499         rec = container_of(cache, struct root_record, cache);
3500         while (!list_empty(&rec->backrefs)) {
3501                 backref = to_root_backref(rec->backrefs.next);
3502                 list_del(&backref->list);
3503                 free(backref);
3504         }
3505
3506         free(rec);
3507 }
3508
3509 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3510
3511 static int add_root_backref(struct cache_tree *root_cache,
3512                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3513                             const char *name, int namelen,
3514                             int item_type, int errors)
3515 {
3516         struct root_record *rec;
3517         struct root_backref *backref;
3518
3519         rec = get_root_rec(root_cache, root_id);
3520         BUG_ON(IS_ERR(rec));
3521         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3522         BUG_ON(!backref);
3523
3524         backref->errors |= errors;
3525
3526         if (item_type != BTRFS_DIR_ITEM_KEY) {
3527                 if (backref->found_dir_index || backref->found_back_ref ||
3528                     backref->found_forward_ref) {
3529                         if (backref->index != index)
3530                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3531                 } else {
3532                         backref->index = index;
3533                 }
3534         }
3535
3536         if (item_type == BTRFS_DIR_ITEM_KEY) {
3537                 if (backref->found_forward_ref)
3538                         rec->found_ref++;
3539                 backref->found_dir_item = 1;
3540         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3541                 backref->found_dir_index = 1;
3542         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3543                 if (backref->found_forward_ref)
3544                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3545                 else if (backref->found_dir_item)
3546                         rec->found_ref++;
3547                 backref->found_forward_ref = 1;
3548         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3549                 if (backref->found_back_ref)
3550                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3551                 backref->found_back_ref = 1;
3552         } else {
3553                 BUG_ON(1);
3554         }
3555
3556         if (backref->found_forward_ref && backref->found_dir_item)
3557                 backref->reachable = 1;
3558         return 0;
3559 }
3560
3561 static int merge_root_recs(struct btrfs_root *root,
3562                            struct cache_tree *src_cache,
3563                            struct cache_tree *dst_cache)
3564 {
3565         struct cache_extent *cache;
3566         struct ptr_node *node;
3567         struct inode_record *rec;
3568         struct inode_backref *backref;
3569         int ret = 0;
3570
3571         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3572                 free_inode_recs_tree(src_cache);
3573                 return 0;
3574         }
3575
3576         while (1) {
3577                 cache = search_cache_extent(src_cache, 0);
3578                 if (!cache)
3579                         break;
3580                 node = container_of(cache, struct ptr_node, cache);
3581                 rec = node->data;
3582                 remove_cache_extent(src_cache, &node->cache);
3583                 free(node);
3584
3585                 ret = is_child_root(root, root->objectid, rec->ino);
3586                 if (ret < 0)
3587                         break;
3588                 else if (ret == 0)
3589                         goto skip;
3590
3591                 list_for_each_entry(backref, &rec->backrefs, list) {
3592                         BUG_ON(backref->found_inode_ref);
3593                         if (backref->found_dir_item)
3594                                 add_root_backref(dst_cache, rec->ino,
3595                                         root->root_key.objectid, backref->dir,
3596                                         backref->index, backref->name,
3597                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3598                                         backref->errors);
3599                         if (backref->found_dir_index)
3600                                 add_root_backref(dst_cache, rec->ino,
3601                                         root->root_key.objectid, backref->dir,
3602                                         backref->index, backref->name,
3603                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3604                                         backref->errors);
3605                 }
3606 skip:
3607                 free_inode_rec(rec);
3608         }
3609         if (ret < 0)
3610                 return ret;
3611         return 0;
3612 }
3613
3614 static int check_root_refs(struct btrfs_root *root,
3615                            struct cache_tree *root_cache)
3616 {
3617         struct root_record *rec;
3618         struct root_record *ref_root;
3619         struct root_backref *backref;
3620         struct cache_extent *cache;
3621         int loop = 1;
3622         int ret;
3623         int error;
3624         int errors = 0;
3625
3626         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3627         BUG_ON(IS_ERR(rec));
3628         rec->found_ref = 1;
3629
3630         /* fixme: this can not detect circular references */
3631         while (loop) {
3632                 loop = 0;
3633                 cache = search_cache_extent(root_cache, 0);
3634                 while (1) {
3635                         if (!cache)
3636                                 break;
3637                         rec = container_of(cache, struct root_record, cache);
3638                         cache = next_cache_extent(cache);
3639
3640                         if (rec->found_ref == 0)
3641                                 continue;
3642
3643                         list_for_each_entry(backref, &rec->backrefs, list) {
3644                                 if (!backref->reachable)
3645                                         continue;
3646
3647                                 ref_root = get_root_rec(root_cache,
3648                                                         backref->ref_root);
3649                                 BUG_ON(IS_ERR(ref_root));
3650                                 if (ref_root->found_ref > 0)
3651                                         continue;
3652
3653                                 backref->reachable = 0;
3654                                 rec->found_ref--;
3655                                 if (rec->found_ref == 0)
3656                                         loop = 1;
3657                         }
3658                 }
3659         }
3660
3661         cache = search_cache_extent(root_cache, 0);
3662         while (1) {
3663                 if (!cache)
3664                         break;
3665                 rec = container_of(cache, struct root_record, cache);
3666                 cache = next_cache_extent(cache);
3667
3668                 if (rec->found_ref == 0 &&
3669                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3670                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3671                         ret = check_orphan_item(root->fs_info->tree_root,
3672                                                 rec->objectid);
3673                         if (ret == 0)
3674                                 continue;
3675
3676                         /*
3677                          * If we don't have a root item then we likely just have
3678                          * a dir item in a snapshot for this root but no actual
3679                          * ref key or anything so it's meaningless.
3680                          */
3681                         if (!rec->found_root_item)
3682                                 continue;
3683                         errors++;
3684                         fprintf(stderr, "fs tree %llu not referenced\n",
3685                                 (unsigned long long)rec->objectid);
3686                 }
3687
3688                 error = 0;
3689                 if (rec->found_ref > 0 && !rec->found_root_item)
3690                         error = 1;
3691                 list_for_each_entry(backref, &rec->backrefs, list) {
3692                         if (!backref->found_dir_item)
3693                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3694                         if (!backref->found_dir_index)
3695                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3696                         if (!backref->found_back_ref)
3697                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3698                         if (!backref->found_forward_ref)
3699                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3700                         if (backref->reachable && backref->errors)
3701                                 error = 1;
3702                 }
3703                 if (!error)
3704                         continue;
3705
3706                 errors++;
3707                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3708                         (unsigned long long)rec->objectid, rec->found_ref,
3709                          rec->found_root_item ? "" : "not found");
3710
3711                 list_for_each_entry(backref, &rec->backrefs, list) {
3712                         if (!backref->reachable)
3713                                 continue;
3714                         if (!backref->errors && rec->found_root_item)
3715                                 continue;
3716                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3717                                 " index %llu namelen %u name %s errors %x\n",
3718                                 (unsigned long long)backref->ref_root,
3719                                 (unsigned long long)backref->dir,
3720                                 (unsigned long long)backref->index,
3721                                 backref->namelen, backref->name,
3722                                 backref->errors);
3723                         print_ref_error(backref->errors);
3724                 }
3725         }
3726         return errors > 0 ? 1 : 0;
3727 }
3728
3729 static int process_root_ref(struct extent_buffer *eb, int slot,
3730                             struct btrfs_key *key,
3731                             struct cache_tree *root_cache)
3732 {
3733         u64 dirid;
3734         u64 index;
3735         u32 len;
3736         u32 name_len;
3737         struct btrfs_root_ref *ref;
3738         char namebuf[BTRFS_NAME_LEN];
3739         int error;
3740
3741         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3742
3743         dirid = btrfs_root_ref_dirid(eb, ref);
3744         index = btrfs_root_ref_sequence(eb, ref);
3745         name_len = btrfs_root_ref_name_len(eb, ref);
3746
3747         if (name_len <= BTRFS_NAME_LEN) {
3748                 len = name_len;
3749                 error = 0;
3750         } else {
3751                 len = BTRFS_NAME_LEN;
3752                 error = REF_ERR_NAME_TOO_LONG;
3753         }
3754         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3755
3756         if (key->type == BTRFS_ROOT_REF_KEY) {
3757                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3758                                  index, namebuf, len, key->type, error);
3759         } else {
3760                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3761                                  index, namebuf, len, key->type, error);
3762         }
3763         return 0;
3764 }
3765
3766 static void free_corrupt_block(struct cache_extent *cache)
3767 {
3768         struct btrfs_corrupt_block *corrupt;
3769
3770         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3771         free(corrupt);
3772 }
3773
3774 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3775
3776 /*
3777  * Repair the btree of the given root.
3778  *
3779  * The fix is to remove the node key in corrupt_blocks cache_tree.
3780  * and rebalance the tree.
3781  * After the fix, the btree should be writeable.
3782  */
3783 static int repair_btree(struct btrfs_root *root,
3784                         struct cache_tree *corrupt_blocks)
3785 {
3786         struct btrfs_trans_handle *trans;
3787         struct btrfs_path path;
3788         struct btrfs_corrupt_block *corrupt;
3789         struct cache_extent *cache;
3790         struct btrfs_key key;
3791         u64 offset;
3792         int level;
3793         int ret = 0;
3794
3795         if (cache_tree_empty(corrupt_blocks))
3796                 return 0;
3797
3798         trans = btrfs_start_transaction(root, 1);
3799         if (IS_ERR(trans)) {
3800                 ret = PTR_ERR(trans);
3801                 fprintf(stderr, "Error starting transaction: %s\n",
3802                         strerror(-ret));
3803                 return ret;
3804         }
3805         btrfs_init_path(&path);
3806         cache = first_cache_extent(corrupt_blocks);
3807         while (cache) {
3808                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3809                                        cache);
3810                 level = corrupt->level;
3811                 path.lowest_level = level;
3812                 key.objectid = corrupt->key.objectid;
3813                 key.type = corrupt->key.type;
3814                 key.offset = corrupt->key.offset;
3815
3816                 /*
3817                  * Here we don't want to do any tree balance, since it may
3818                  * cause a balance with corrupted brother leaf/node,
3819                  * so ins_len set to 0 here.
3820                  * Balance will be done after all corrupt node/leaf is deleted.
3821                  */
3822                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3823                 if (ret < 0)
3824                         goto out;
3825                 offset = btrfs_node_blockptr(path.nodes[level],
3826                                              path.slots[level]);
3827
3828                 /* Remove the ptr */
3829                 ret = btrfs_del_ptr(trans, root, &path, level,
3830                                     path.slots[level]);
3831                 if (ret < 0)
3832                         goto out;
3833                 /*
3834                  * Remove the corresponding extent
3835                  * return value is not concerned.
3836                  */
3837                 btrfs_release_path(&path);
3838                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3839                                         0, root->root_key.objectid,
3840                                         level - 1, 0);
3841                 cache = next_cache_extent(cache);
3842         }
3843
3844         /* Balance the btree using btrfs_search_slot() */
3845         cache = first_cache_extent(corrupt_blocks);
3846         while (cache) {
3847                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3848                                        cache);
3849                 memcpy(&key, &corrupt->key, sizeof(key));
3850                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3851                 if (ret < 0)
3852                         goto out;
3853                 /* return will always >0 since it won't find the item */
3854                 ret = 0;
3855                 btrfs_release_path(&path);
3856                 cache = next_cache_extent(cache);
3857         }
3858 out:
3859         btrfs_commit_transaction(trans, root);
3860         btrfs_release_path(&path);
3861         return ret;
3862 }
3863
3864 static int check_fs_root(struct btrfs_root *root,
3865                          struct cache_tree *root_cache,
3866                          struct walk_control *wc)
3867 {
3868         int ret = 0;
3869         int err = 0;
3870         int wret;
3871         int level;
3872         struct btrfs_path path;
3873         struct shared_node root_node;
3874         struct root_record *rec;
3875         struct btrfs_root_item *root_item = &root->root_item;
3876         struct cache_tree corrupt_blocks;
3877         struct orphan_data_extent *orphan;
3878         struct orphan_data_extent *tmp;
3879         enum btrfs_tree_block_status status;
3880         struct node_refs nrefs;
3881
3882         /*
3883          * Reuse the corrupt_block cache tree to record corrupted tree block
3884          *
3885          * Unlike the usage in extent tree check, here we do it in a per
3886          * fs/subvol tree base.
3887          */
3888         cache_tree_init(&corrupt_blocks);
3889         root->fs_info->corrupt_blocks = &corrupt_blocks;
3890
3891         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3892                 rec = get_root_rec(root_cache, root->root_key.objectid);
3893                 BUG_ON(IS_ERR(rec));
3894                 if (btrfs_root_refs(root_item) > 0)
3895                         rec->found_root_item = 1;
3896         }
3897
3898         btrfs_init_path(&path);
3899         memset(&root_node, 0, sizeof(root_node));
3900         cache_tree_init(&root_node.root_cache);
3901         cache_tree_init(&root_node.inode_cache);
3902         memset(&nrefs, 0, sizeof(nrefs));
3903
3904         /* Move the orphan extent record to corresponding inode_record */
3905         list_for_each_entry_safe(orphan, tmp,
3906                                  &root->orphan_data_extents, list) {
3907                 struct inode_record *inode;
3908
3909                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3910                                       1);
3911                 BUG_ON(IS_ERR(inode));
3912                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3913                 list_move(&orphan->list, &inode->orphan_extents);
3914         }
3915
3916         level = btrfs_header_level(root->node);
3917         memset(wc->nodes, 0, sizeof(wc->nodes));
3918         wc->nodes[level] = &root_node;
3919         wc->active_node = level;
3920         wc->root_level = level;
3921
3922         /* We may not have checked the root block, lets do that now */
3923         if (btrfs_is_leaf(root->node))
3924                 status = btrfs_check_leaf(root, NULL, root->node);
3925         else
3926                 status = btrfs_check_node(root, NULL, root->node);
3927         if (status != BTRFS_TREE_BLOCK_CLEAN)
3928                 return -EIO;
3929
3930         if (btrfs_root_refs(root_item) > 0 ||
3931             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3932                 path.nodes[level] = root->node;
3933                 extent_buffer_get(root->node);
3934                 path.slots[level] = 0;
3935         } else {
3936                 struct btrfs_key key;
3937                 struct btrfs_disk_key found_key;
3938
3939                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3940                 level = root_item->drop_level;
3941                 path.lowest_level = level;
3942                 if (level > btrfs_header_level(root->node) ||
3943                     level >= BTRFS_MAX_LEVEL) {
3944                         error("ignoring invalid drop level: %u", level);
3945                         goto skip_walking;
3946                 }
3947                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3948                 if (wret < 0)
3949                         goto skip_walking;
3950                 btrfs_node_key(path.nodes[level], &found_key,
3951                                 path.slots[level]);
3952                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3953                                         sizeof(found_key)));
3954         }
3955
3956         while (1) {
3957                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3958                 if (wret < 0)
3959                         ret = wret;
3960                 if (wret != 0)
3961                         break;
3962
3963                 wret = walk_up_tree(root, &path, wc, &level);
3964                 if (wret < 0)
3965                         ret = wret;
3966                 if (wret != 0)
3967                         break;
3968         }
3969 skip_walking:
3970         btrfs_release_path(&path);
3971
3972         if (!cache_tree_empty(&corrupt_blocks)) {
3973                 struct cache_extent *cache;
3974                 struct btrfs_corrupt_block *corrupt;
3975
3976                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3977                        root->root_key.objectid);
3978                 cache = first_cache_extent(&corrupt_blocks);
3979                 while (cache) {
3980                         corrupt = container_of(cache,
3981                                                struct btrfs_corrupt_block,
3982                                                cache);
3983                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3984                                cache->start, corrupt->level,
3985                                corrupt->key.objectid, corrupt->key.type,
3986                                corrupt->key.offset);
3987                         cache = next_cache_extent(cache);
3988                 }
3989                 if (repair) {
3990                         printf("Try to repair the btree for root %llu\n",
3991                                root->root_key.objectid);
3992                         ret = repair_btree(root, &corrupt_blocks);
3993                         if (ret < 0)
3994                                 fprintf(stderr, "Failed to repair btree: %s\n",
3995                                         strerror(-ret));
3996                         if (!ret)
3997                                 printf("Btree for root %llu is fixed\n",
3998                                        root->root_key.objectid);
3999                 }
4000         }
4001
4002         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4003         if (err < 0)
4004                 ret = err;
4005
4006         if (root_node.current) {
4007                 root_node.current->checked = 1;
4008                 maybe_free_inode_rec(&root_node.inode_cache,
4009                                 root_node.current);
4010         }
4011
4012         err = check_inode_recs(root, &root_node.inode_cache);
4013         if (!ret)
4014                 ret = err;
4015
4016         free_corrupt_blocks_tree(&corrupt_blocks);
4017         root->fs_info->corrupt_blocks = NULL;
4018         free_orphan_data_extents(&root->orphan_data_extents);
4019         return ret;
4020 }
4021
4022 static int fs_root_objectid(u64 objectid)
4023 {
4024         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4025             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4026                 return 1;
4027         return is_fstree(objectid);
4028 }
4029
4030 static int check_fs_roots(struct btrfs_root *root,
4031                           struct cache_tree *root_cache)
4032 {
4033         struct btrfs_path path;
4034         struct btrfs_key key;
4035         struct walk_control wc;
4036         struct extent_buffer *leaf, *tree_node;
4037         struct btrfs_root *tmp_root;
4038         struct btrfs_root *tree_root = root->fs_info->tree_root;
4039         int ret;
4040         int err = 0;
4041
4042         if (ctx.progress_enabled) {
4043                 ctx.tp = TASK_FS_ROOTS;
4044                 task_start(ctx.info);
4045         }
4046
4047         /*
4048          * Just in case we made any changes to the extent tree that weren't
4049          * reflected into the free space cache yet.
4050          */
4051         if (repair)
4052                 reset_cached_block_groups(root->fs_info);
4053         memset(&wc, 0, sizeof(wc));
4054         cache_tree_init(&wc.shared);
4055         btrfs_init_path(&path);
4056
4057 again:
4058         key.offset = 0;
4059         key.objectid = 0;
4060         key.type = BTRFS_ROOT_ITEM_KEY;
4061         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4062         if (ret < 0) {
4063                 err = 1;
4064                 goto out;
4065         }
4066         tree_node = tree_root->node;
4067         while (1) {
4068                 if (tree_node != tree_root->node) {
4069                         free_root_recs_tree(root_cache);
4070                         btrfs_release_path(&path);
4071                         goto again;
4072                 }
4073                 leaf = path.nodes[0];
4074                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4075                         ret = btrfs_next_leaf(tree_root, &path);
4076                         if (ret) {
4077                                 if (ret < 0)
4078                                         err = 1;
4079                                 break;
4080                         }
4081                         leaf = path.nodes[0];
4082                 }
4083                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4084                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4085                     fs_root_objectid(key.objectid)) {
4086                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4087                                 tmp_root = btrfs_read_fs_root_no_cache(
4088                                                 root->fs_info, &key);
4089                         } else {
4090                                 key.offset = (u64)-1;
4091                                 tmp_root = btrfs_read_fs_root(
4092                                                 root->fs_info, &key);
4093                         }
4094                         if (IS_ERR(tmp_root)) {
4095                                 err = 1;
4096                                 goto next;
4097                         }
4098                         ret = check_fs_root(tmp_root, root_cache, &wc);
4099                         if (ret == -EAGAIN) {
4100                                 free_root_recs_tree(root_cache);
4101                                 btrfs_release_path(&path);
4102                                 goto again;
4103                         }
4104                         if (ret)
4105                                 err = 1;
4106                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4107                                 btrfs_free_fs_root(tmp_root);
4108                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4109                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4110                         process_root_ref(leaf, path.slots[0], &key,
4111                                          root_cache);
4112                 }
4113 next:
4114                 path.slots[0]++;
4115         }
4116 out:
4117         btrfs_release_path(&path);
4118         if (err)
4119                 free_extent_cache_tree(&wc.shared);
4120         if (!cache_tree_empty(&wc.shared))
4121                 fprintf(stderr, "warning line %d\n", __LINE__);
4122
4123         task_stop(ctx.info);
4124
4125         return err;
4126 }
4127
4128 /*
4129  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4130  * INODE_REF/INODE_EXTREF match.
4131  *
4132  * @root:       the root of the fs/file tree
4133  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4134  * @key:        the key of the DIR_ITEM/DIR_INDEX
4135  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4136  *              distinguish root_dir between normal dir/file
4137  * @name:       the name in the INODE_REF/INODE_EXTREF
4138  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4139  * @mode:       the st_mode of INODE_ITEM
4140  *
4141  * Return 0 if no error occurred.
4142  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4143  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4144  * dir/file.
4145  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4146  * not match for normal dir/file.
4147  */
4148 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4149                          struct btrfs_key *key, u64 index, char *name,
4150                          u32 namelen, u32 mode)
4151 {
4152         struct btrfs_path path;
4153         struct extent_buffer *node;
4154         struct btrfs_dir_item *di;
4155         struct btrfs_key location;
4156         char namebuf[BTRFS_NAME_LEN] = {0};
4157         u32 total;
4158         u32 cur = 0;
4159         u32 len;
4160         u32 name_len;
4161         u32 data_len;
4162         u8 filetype;
4163         int slot;
4164         int ret;
4165
4166         btrfs_init_path(&path);
4167         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4168         if (ret < 0) {
4169                 ret = DIR_ITEM_MISSING;
4170                 goto out;
4171         }
4172
4173         /* Process root dir and goto out*/
4174         if (index == 0) {
4175                 if (ret == 0) {
4176                         ret = ROOT_DIR_ERROR;
4177                         error(
4178                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4179                                 root->objectid,
4180                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4181                                         "REF" : "EXTREF",
4182                                 ref_key->objectid, ref_key->offset,
4183                                 key->type == BTRFS_DIR_ITEM_KEY ?
4184                                         "DIR_ITEM" : "DIR_INDEX");
4185                 } else {
4186                         ret = 0;
4187                 }
4188
4189                 goto out;
4190         }
4191
4192         /* Process normal file/dir */
4193         if (ret > 0) {
4194                 ret = DIR_ITEM_MISSING;
4195                 error(
4196                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4197                         root->objectid,
4198                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4199                         ref_key->objectid, ref_key->offset,
4200                         key->type == BTRFS_DIR_ITEM_KEY ?
4201                                 "DIR_ITEM" : "DIR_INDEX",
4202                         key->objectid, key->offset, namelen, name,
4203                         imode_to_type(mode));
4204                 goto out;
4205         }
4206
4207         /* Check whether inode_id/filetype/name match */
4208         node = path.nodes[0];
4209         slot = path.slots[0];
4210         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4211         total = btrfs_item_size_nr(node, slot);
4212         while (cur < total) {
4213                 ret = DIR_ITEM_MISMATCH;
4214                 name_len = btrfs_dir_name_len(node, di);
4215                 data_len = btrfs_dir_data_len(node, di);
4216
4217                 btrfs_dir_item_key_to_cpu(node, di, &location);
4218                 if (location.objectid != ref_key->objectid ||
4219                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4220                     location.offset != 0)
4221                         goto next;
4222
4223                 filetype = btrfs_dir_type(node, di);
4224                 if (imode_to_type(mode) != filetype)
4225                         goto next;
4226
4227                 if (name_len <= BTRFS_NAME_LEN) {
4228                         len = name_len;
4229                 } else {
4230                         len = BTRFS_NAME_LEN;
4231                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4232                         root->objectid,
4233                         key->type == BTRFS_DIR_ITEM_KEY ?
4234                         "DIR_ITEM" : "DIR_INDEX",
4235                         key->objectid, key->offset, name_len);
4236                 }
4237                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4238                 if (len != namelen || strncmp(namebuf, name, len))
4239                         goto next;
4240
4241                 ret = 0;
4242                 goto out;
4243 next:
4244                 len = sizeof(*di) + name_len + data_len;
4245                 di = (struct btrfs_dir_item *)((char *)di + len);
4246                 cur += len;
4247         }
4248         if (ret == DIR_ITEM_MISMATCH)
4249                 error(
4250                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4251                         root->objectid,
4252                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4253                         ref_key->objectid, ref_key->offset,
4254                         key->type == BTRFS_DIR_ITEM_KEY ?
4255                                 "DIR_ITEM" : "DIR_INDEX",
4256                         key->objectid, key->offset, namelen, name,
4257                         imode_to_type(mode));
4258 out:
4259         btrfs_release_path(&path);
4260         return ret;
4261 }
4262
4263 /*
4264  * Traverse the given INODE_REF and call find_dir_item() to find related
4265  * DIR_ITEM/DIR_INDEX.
4266  *
4267  * @root:       the root of the fs/file tree
4268  * @ref_key:    the key of the INODE_REF
4269  * @refs:       the count of INODE_REF
4270  * @mode:       the st_mode of INODE_ITEM
4271  *
4272  * Return 0 if no error occurred.
4273  */
4274 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4275                            struct extent_buffer *node, int slot, u64 *refs,
4276                            int mode)
4277 {
4278         struct btrfs_key key;
4279         struct btrfs_inode_ref *ref;
4280         char namebuf[BTRFS_NAME_LEN] = {0};
4281         u32 total;
4282         u32 cur = 0;
4283         u32 len;
4284         u32 name_len;
4285         u64 index;
4286         int ret, err = 0;
4287
4288         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4289         total = btrfs_item_size_nr(node, slot);
4290
4291 next:
4292         /* Update inode ref count */
4293         (*refs)++;
4294
4295         index = btrfs_inode_ref_index(node, ref);
4296         name_len = btrfs_inode_ref_name_len(node, ref);
4297         if (name_len <= BTRFS_NAME_LEN) {
4298                 len = name_len;
4299         } else {
4300                 len = BTRFS_NAME_LEN;
4301                 warning("root %llu INODE_REF[%llu %llu] name too long",
4302                         root->objectid, ref_key->objectid, ref_key->offset);
4303         }
4304
4305         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4306
4307         /* Check root dir ref name */
4308         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4309                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4310                       root->objectid, ref_key->objectid, ref_key->offset,
4311                       namebuf);
4312                 err |= ROOT_DIR_ERROR;
4313         }
4314
4315         /* Find related DIR_INDEX */
4316         key.objectid = ref_key->offset;
4317         key.type = BTRFS_DIR_INDEX_KEY;
4318         key.offset = index;
4319         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4320         err |= ret;
4321
4322         /* Find related dir_item */
4323         key.objectid = ref_key->offset;
4324         key.type = BTRFS_DIR_ITEM_KEY;
4325         key.offset = btrfs_name_hash(namebuf, len);
4326         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4327         err |= ret;
4328
4329         len = sizeof(*ref) + name_len;
4330         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4331         cur += len;
4332         if (cur < total)
4333                 goto next;
4334
4335         return err;
4336 }
4337
4338 /*
4339  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4340  * DIR_ITEM/DIR_INDEX.
4341  *
4342  * @root:       the root of the fs/file tree
4343  * @ref_key:    the key of the INODE_EXTREF
4344  * @refs:       the count of INODE_EXTREF
4345  * @mode:       the st_mode of INODE_ITEM
4346  *
4347  * Return 0 if no error occurred.
4348  */
4349 static int check_inode_extref(struct btrfs_root *root,
4350                               struct btrfs_key *ref_key,
4351                               struct extent_buffer *node, int slot, u64 *refs,
4352                               int mode)
4353 {
4354         struct btrfs_key key;
4355         struct btrfs_inode_extref *extref;
4356         char namebuf[BTRFS_NAME_LEN] = {0};
4357         u32 total;
4358         u32 cur = 0;
4359         u32 len;
4360         u32 name_len;
4361         u64 index;
4362         u64 parent;
4363         int ret;
4364         int err = 0;
4365
4366         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4367         total = btrfs_item_size_nr(node, slot);
4368
4369 next:
4370         /* update inode ref count */
4371         (*refs)++;
4372         name_len = btrfs_inode_extref_name_len(node, extref);
4373         index = btrfs_inode_extref_index(node, extref);
4374         parent = btrfs_inode_extref_parent(node, extref);
4375         if (name_len <= BTRFS_NAME_LEN) {
4376                 len = name_len;
4377         } else {
4378                 len = BTRFS_NAME_LEN;
4379                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4380                         root->objectid, ref_key->objectid, ref_key->offset);
4381         }
4382         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4383
4384         /* Check root dir ref name */
4385         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4386                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4387                       root->objectid, ref_key->objectid, ref_key->offset,
4388                       namebuf);
4389                 err |= ROOT_DIR_ERROR;
4390         }
4391
4392         /* find related dir_index */
4393         key.objectid = parent;
4394         key.type = BTRFS_DIR_INDEX_KEY;
4395         key.offset = index;
4396         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4397         err |= ret;
4398
4399         /* find related dir_item */
4400         key.objectid = parent;
4401         key.type = BTRFS_DIR_ITEM_KEY;
4402         key.offset = btrfs_name_hash(namebuf, len);
4403         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4404         err |= ret;
4405
4406         len = sizeof(*extref) + name_len;
4407         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4408         cur += len;
4409
4410         if (cur < total)
4411                 goto next;
4412
4413         return err;
4414 }
4415
4416 /*
4417  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4418  * DIR_ITEM/DIR_INDEX match.
4419  *
4420  * @root:       the root of the fs/file tree
4421  * @key:        the key of the INODE_REF/INODE_EXTREF
4422  * @name:       the name in the INODE_REF/INODE_EXTREF
4423  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4424  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4425  * to (u64)-1
4426  * @ext_ref:    the EXTENDED_IREF feature
4427  *
4428  * Return 0 if no error occurred.
4429  * Return >0 for error bitmap
4430  */
4431 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4432                           char *name, int namelen, u64 index,
4433                           unsigned int ext_ref)
4434 {
4435         struct btrfs_path path;
4436         struct btrfs_inode_ref *ref;
4437         struct btrfs_inode_extref *extref;
4438         struct extent_buffer *node;
4439         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4440         u32 total;
4441         u32 cur = 0;
4442         u32 len;
4443         u32 ref_namelen;
4444         u64 ref_index;
4445         u64 parent;
4446         u64 dir_id;
4447         int slot;
4448         int ret;
4449
4450         btrfs_init_path(&path);
4451         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4452         if (ret) {
4453                 ret = INODE_REF_MISSING;
4454                 goto extref;
4455         }
4456
4457         node = path.nodes[0];
4458         slot = path.slots[0];
4459
4460         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4461         total = btrfs_item_size_nr(node, slot);
4462
4463         /* Iterate all entry of INODE_REF */
4464         while (cur < total) {
4465                 ret = INODE_REF_MISSING;
4466
4467                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4468                 ref_index = btrfs_inode_ref_index(node, ref);
4469                 if (index != (u64)-1 && index != ref_index)
4470                         goto next_ref;
4471
4472                 if (ref_namelen <= BTRFS_NAME_LEN) {
4473                         len = ref_namelen;
4474                 } else {
4475                         len = BTRFS_NAME_LEN;
4476                         warning("root %llu INODE %s[%llu %llu] name too long",
4477                                 root->objectid,
4478                                 key->type == BTRFS_INODE_REF_KEY ?
4479                                         "REF" : "EXTREF",
4480                                 key->objectid, key->offset);
4481                 }
4482                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4483                                    len);
4484
4485                 if (len != namelen || strncmp(ref_namebuf, name, len))
4486                         goto next_ref;
4487
4488                 ret = 0;
4489                 goto out;
4490 next_ref:
4491                 len = sizeof(*ref) + ref_namelen;
4492                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4493                 cur += len;
4494         }
4495
4496 extref:
4497         /* Skip if not support EXTENDED_IREF feature */
4498         if (!ext_ref)
4499                 goto out;
4500
4501         btrfs_release_path(&path);
4502         btrfs_init_path(&path);
4503
4504         dir_id = key->offset;
4505         key->type = BTRFS_INODE_EXTREF_KEY;
4506         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4507
4508         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4509         if (ret) {
4510                 ret = INODE_REF_MISSING;
4511                 goto out;
4512         }
4513
4514         node = path.nodes[0];
4515         slot = path.slots[0];
4516
4517         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4518         cur = 0;
4519         total = btrfs_item_size_nr(node, slot);
4520
4521         /* Iterate all entry of INODE_EXTREF */
4522         while (cur < total) {
4523                 ret = INODE_REF_MISSING;
4524
4525                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4526                 ref_index = btrfs_inode_extref_index(node, extref);
4527                 parent = btrfs_inode_extref_parent(node, extref);
4528                 if (index != (u64)-1 && index != ref_index)
4529                         goto next_extref;
4530
4531                 if (parent != dir_id)
4532                         goto next_extref;
4533
4534                 if (ref_namelen <= BTRFS_NAME_LEN) {
4535                         len = ref_namelen;
4536                 } else {
4537                         len = BTRFS_NAME_LEN;
4538                         warning("root %llu INODE %s[%llu %llu] name too long",
4539                                 root->objectid,
4540                                 key->type == BTRFS_INODE_REF_KEY ?
4541                                         "REF" : "EXTREF",
4542                                 key->objectid, key->offset);
4543                 }
4544                 read_extent_buffer(node, ref_namebuf,
4545                                    (unsigned long)(extref + 1), len);
4546
4547                 if (len != namelen || strncmp(ref_namebuf, name, len))
4548                         goto next_extref;
4549
4550                 ret = 0;
4551                 goto out;
4552
4553 next_extref:
4554                 len = sizeof(*extref) + ref_namelen;
4555                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4556                 cur += len;
4557
4558         }
4559 out:
4560         btrfs_release_path(&path);
4561         return ret;
4562 }
4563
4564 /*
4565  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4566  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4567  *
4568  * @root:       the root of the fs/file tree
4569  * @key:        the key of the INODE_REF/INODE_EXTREF
4570  * @size:       the st_size of the INODE_ITEM
4571  * @ext_ref:    the EXTENDED_IREF feature
4572  *
4573  * Return 0 if no error occurred.
4574  */
4575 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4576                           struct extent_buffer *node, int slot, u64 *size,
4577                           unsigned int ext_ref)
4578 {
4579         struct btrfs_dir_item *di;
4580         struct btrfs_inode_item *ii;
4581         struct btrfs_path path;
4582         struct btrfs_key location;
4583         char namebuf[BTRFS_NAME_LEN] = {0};
4584         u32 total;
4585         u32 cur = 0;
4586         u32 len;
4587         u32 name_len;
4588         u32 data_len;
4589         u8 filetype;
4590         u32 mode;
4591         u64 index;
4592         int ret;
4593         int err = 0;
4594
4595         /*
4596          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4597          * ignore index check.
4598          */
4599         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4600
4601         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4602         total = btrfs_item_size_nr(node, slot);
4603
4604         while (cur < total) {
4605                 data_len = btrfs_dir_data_len(node, di);
4606                 if (data_len)
4607                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4608                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4609                               "DIR_ITEM" : "DIR_INDEX",
4610                               key->objectid, key->offset, data_len);
4611
4612                 name_len = btrfs_dir_name_len(node, di);
4613                 if (name_len <= BTRFS_NAME_LEN) {
4614                         len = name_len;
4615                 } else {
4616                         len = BTRFS_NAME_LEN;
4617                         warning("root %llu %s[%llu %llu] name too long",
4618                                 root->objectid,
4619                                 key->type == BTRFS_DIR_ITEM_KEY ?
4620                                 "DIR_ITEM" : "DIR_INDEX",
4621                                 key->objectid, key->offset);
4622                 }
4623                 (*size) += name_len;
4624
4625                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4626                 filetype = btrfs_dir_type(node, di);
4627
4628                 btrfs_init_path(&path);
4629                 btrfs_dir_item_key_to_cpu(node, di, &location);
4630
4631                 /* Ignore related ROOT_ITEM check */
4632                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4633                         goto next;
4634
4635                 /* Check relative INODE_ITEM(existence/filetype) */
4636                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4637                 if (ret) {
4638                         err |= INODE_ITEM_MISSING;
4639                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4640                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4641                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4642                               key->offset, location.objectid, name_len,
4643                               namebuf, filetype);
4644                         goto next;
4645                 }
4646
4647                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4648                                     struct btrfs_inode_item);
4649                 mode = btrfs_inode_mode(path.nodes[0], ii);
4650
4651                 if (imode_to_type(mode) != filetype) {
4652                         err |= INODE_ITEM_MISMATCH;
4653                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4654                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4655                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4656                               key->offset, name_len, namebuf, filetype);
4657                 }
4658
4659                 /* Check relative INODE_REF/INODE_EXTREF */
4660                 location.type = BTRFS_INODE_REF_KEY;
4661                 location.offset = key->objectid;
4662                 ret = find_inode_ref(root, &location, namebuf, len,
4663                                        index, ext_ref);
4664                 err |= ret;
4665                 if (ret & INODE_REF_MISSING)
4666                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4667                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4668                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4669                               key->offset, name_len, namebuf, filetype);
4670
4671 next:
4672                 btrfs_release_path(&path);
4673                 len = sizeof(*di) + name_len + data_len;
4674                 di = (struct btrfs_dir_item *)((char *)di + len);
4675                 cur += len;
4676
4677                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4678                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4679                               root->objectid, key->objectid, key->offset);
4680                         break;
4681                 }
4682         }
4683
4684         return err;
4685 }
4686
4687 /*
4688  * Check file extent datasum/hole, update the size of the file extents,
4689  * check and update the last offset of the file extent.
4690  *
4691  * @root:       the root of fs/file tree.
4692  * @fkey:       the key of the file extent.
4693  * @nodatasum:  INODE_NODATASUM feature.
4694  * @size:       the sum of all EXTENT_DATA items size for this inode.
4695  * @end:        the offset of the last extent.
4696  *
4697  * Return 0 if no error occurred.
4698  */
4699 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4700                              struct extent_buffer *node, int slot,
4701                              unsigned int nodatasum, u64 *size, u64 *end)
4702 {
4703         struct btrfs_file_extent_item *fi;
4704         u64 disk_bytenr;
4705         u64 disk_num_bytes;
4706         u64 extent_num_bytes;
4707         u64 found;
4708         unsigned int extent_type;
4709         unsigned int is_hole;
4710         int ret;
4711         int err = 0;
4712
4713         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4714
4715         extent_type = btrfs_file_extent_type(node, fi);
4716         /* Skip if file extent is inline */
4717         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4718                 struct btrfs_item *e = btrfs_item_nr(slot);
4719                 u32 item_inline_len;
4720
4721                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4722                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4723                 if (extent_num_bytes == 0 ||
4724                     extent_num_bytes != item_inline_len)
4725                         err |= FILE_EXTENT_ERROR;
4726                 *size += extent_num_bytes;
4727                 return err;
4728         }
4729
4730         /* Check extent type */
4731         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4732                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4733                 err |= FILE_EXTENT_ERROR;
4734                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4735                       root->objectid, fkey->objectid, fkey->offset);
4736                 return err;
4737         }
4738
4739         /* Check REG_EXTENT/PREALLOC_EXTENT */
4740         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4741         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4742         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4743         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4744
4745         /* Check EXTENT_DATA datasum */
4746         ret = count_csum_range(root, disk_bytenr, disk_num_bytes, &found);
4747         if (found > 0 && nodatasum) {
4748                 err |= ODD_CSUM_ITEM;
4749                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4750                       root->objectid, fkey->objectid, fkey->offset);
4751         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4752                    !is_hole &&
4753                    (ret < 0 || found == 0 || found < disk_num_bytes)) {
4754                 err |= CSUM_ITEM_MISSING;
4755                 error("root %llu EXTENT_DATA[%llu %llu] datasum missing",
4756                       root->objectid, fkey->objectid, fkey->offset);
4757         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && found > 0) {
4758                 err |= ODD_CSUM_ITEM;
4759                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have datasum",
4760                       root->objectid, fkey->objectid, fkey->offset);
4761         }
4762
4763         /* Check EXTENT_DATA hole */
4764         if (no_holes && is_hole) {
4765                 err |= FILE_EXTENT_ERROR;
4766                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4767                       root->objectid, fkey->objectid, fkey->offset);
4768         } else if (!no_holes && *end != fkey->offset) {
4769                 err |= FILE_EXTENT_ERROR;
4770                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4771                       root->objectid, fkey->objectid, fkey->offset);
4772         }
4773
4774         *end += extent_num_bytes;
4775         if (!is_hole)
4776                 *size += extent_num_bytes;
4777
4778         return err;
4779 }
4780
4781 /*
4782  * Check INODE_ITEM and related ITEMs (the same inode number)
4783  * 1. check link count
4784  * 2. check inode ref/extref
4785  * 3. check dir item/index
4786  *
4787  * @ext_ref:    the EXTENDED_IREF feature
4788  *
4789  * Return 0 if no error occurred.
4790  * Return >0 for error or hit the traversal is done(by error bitmap)
4791  */
4792 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4793                             unsigned int ext_ref)
4794 {
4795         struct extent_buffer *node;
4796         struct btrfs_inode_item *ii;
4797         struct btrfs_key key;
4798         u64 inode_id;
4799         u32 mode;
4800         u64 nlink;
4801         u64 nbytes;
4802         u64 isize;
4803         u64 size = 0;
4804         u64 refs = 0;
4805         u64 extent_end = 0;
4806         u64 extent_size = 0;
4807         unsigned int dir;
4808         unsigned int nodatasum;
4809         int slot;
4810         int ret;
4811         int err = 0;
4812
4813         node = path->nodes[0];
4814         slot = path->slots[0];
4815
4816         btrfs_item_key_to_cpu(node, &key, slot);
4817         inode_id = key.objectid;
4818
4819         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4820                 ret = btrfs_next_item(root, path);
4821                 if (ret > 0)
4822                         err |= LAST_ITEM;
4823                 return err;
4824         }
4825
4826         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4827         isize = btrfs_inode_size(node, ii);
4828         nbytes = btrfs_inode_nbytes(node, ii);
4829         mode = btrfs_inode_mode(node, ii);
4830         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4831         nlink = btrfs_inode_nlink(node, ii);
4832         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4833
4834         while (1) {
4835                 ret = btrfs_next_item(root, path);
4836                 if (ret < 0) {
4837                         /* out will fill 'err' rusing current statistics */
4838                         goto out;
4839                 } else if (ret > 0) {
4840                         err |= LAST_ITEM;
4841                         goto out;
4842                 }
4843
4844                 node = path->nodes[0];
4845                 slot = path->slots[0];
4846                 btrfs_item_key_to_cpu(node, &key, slot);
4847                 if (key.objectid != inode_id)
4848                         goto out;
4849
4850                 switch (key.type) {
4851                 case BTRFS_INODE_REF_KEY:
4852                         ret = check_inode_ref(root, &key, node, slot, &refs,
4853                                               mode);
4854                         err |= ret;
4855                         break;
4856                 case BTRFS_INODE_EXTREF_KEY:
4857                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4858                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4859                                         root->objectid, key.objectid,
4860                                         key.offset);
4861                         ret = check_inode_extref(root, &key, node, slot, &refs,
4862                                                  mode);
4863                         err |= ret;
4864                         break;
4865                 case BTRFS_DIR_ITEM_KEY:
4866                 case BTRFS_DIR_INDEX_KEY:
4867                         if (!dir) {
4868                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4869                                         root->objectid, inode_id,
4870                                         imode_to_type(mode), key.objectid,
4871                                         key.offset);
4872                         }
4873                         ret = check_dir_item(root, &key, node, slot, &size,
4874                                              ext_ref);
4875                         err |= ret;
4876                         break;
4877                 case BTRFS_EXTENT_DATA_KEY:
4878                         if (dir) {
4879                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4880                                         root->objectid, inode_id, key.objectid,
4881                                         key.offset);
4882                         }
4883                         ret = check_file_extent(root, &key, node, slot,
4884                                                 nodatasum, &extent_size,
4885                                                 &extent_end);
4886                         err |= ret;
4887                         break;
4888                 case BTRFS_XATTR_ITEM_KEY:
4889                         break;
4890                 default:
4891                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4892                               key.objectid, key.type, key.offset);
4893                 }
4894         }
4895
4896 out:
4897         /* verify INODE_ITEM nlink/isize/nbytes */
4898         if (dir) {
4899                 if (nlink != 1) {
4900                         err |= LINK_COUNT_ERROR;
4901                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4902                               root->objectid, inode_id, nlink);
4903                 }
4904
4905                 /*
4906                  * Just a warning, as dir inode nbytes is just an
4907                  * instructive value.
4908                  */
4909                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4910                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4911                                 root->objectid, inode_id, root->nodesize);
4912                 }
4913
4914                 if (isize != size) {
4915                         err |= ISIZE_ERROR;
4916                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4917                               root->objectid, inode_id, isize, size);
4918                 }
4919         } else {
4920                 if (nlink != refs) {
4921                         err |= LINK_COUNT_ERROR;
4922                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4923                               root->objectid, inode_id, nlink, refs);
4924                 } else if (!nlink) {
4925                         err |= ORPHAN_ITEM;
4926                 }
4927
4928                 if (!nbytes && !no_holes && extent_end < isize) {
4929                         err |= NBYTES_ERROR;
4930                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4931                               root->objectid, inode_id, isize);
4932                 }
4933
4934                 if (nbytes != extent_size) {
4935                         err |= NBYTES_ERROR;
4936                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4937                               root->objectid, inode_id, nbytes, extent_size);
4938                 }
4939         }
4940
4941         return err;
4942 }
4943
4944 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4945 {
4946         struct btrfs_path path;
4947         struct btrfs_key key;
4948         int err = 0;
4949         int ret;
4950
4951         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4952         key.type = BTRFS_INODE_ITEM_KEY;
4953         key.offset = 0;
4954
4955         /* For root being dropped, we don't need to check first inode */
4956         if (btrfs_root_refs(&root->root_item) == 0 &&
4957             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
4958             key.objectid)
4959                 return 0;
4960
4961         btrfs_init_path(&path);
4962
4963         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4964         if (ret < 0)
4965                 goto out;
4966         if (ret > 0) {
4967                 ret = 0;
4968                 err |= INODE_ITEM_MISSING;
4969         }
4970
4971         err |= check_inode_item(root, &path, ext_ref);
4972         err &= ~LAST_ITEM;
4973         if (err && !ret)
4974                 ret = -EIO;
4975 out:
4976         btrfs_release_path(&path);
4977         return ret;
4978 }
4979
4980 /*
4981  * Iterate all item on the tree and call check_inode_item() to check.
4982  *
4983  * @root:       the root of the tree to be checked.
4984  * @ext_ref:    the EXTENDED_IREF feature
4985  *
4986  * Return 0 if no error found.
4987  * Return <0 for error.
4988  */
4989 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
4990 {
4991         struct btrfs_path path;
4992         struct node_refs nrefs;
4993         struct btrfs_root_item *root_item = &root->root_item;
4994         int ret, wret;
4995         int level;
4996
4997         /*
4998          * We need to manually check the first inode item(256)
4999          * As the following traversal function will only start from
5000          * the first inode item in the leaf, if inode item(256) is missing
5001          * we will just skip it forever.
5002          */
5003         ret = check_fs_first_inode(root, ext_ref);
5004         if (ret < 0)
5005                 return ret;
5006
5007         memset(&nrefs, 0, sizeof(nrefs));
5008         level = btrfs_header_level(root->node);
5009         btrfs_init_path(&path);
5010
5011         if (btrfs_root_refs(root_item) > 0 ||
5012             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5013                 path.nodes[level] = root->node;
5014                 path.slots[level] = 0;
5015                 extent_buffer_get(root->node);
5016         } else {
5017                 struct btrfs_key key;
5018
5019                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5020                 level = root_item->drop_level;
5021                 path.lowest_level = level;
5022                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5023                 if (ret < 0)
5024                         goto out;
5025                 ret = 0;
5026         }
5027
5028         while (1) {
5029                 wret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5030                 if (wret < 0)
5031                         ret = wret;
5032                 if (wret != 0)
5033                         break;
5034
5035                 wret = walk_up_tree_v2(root, &path, &level);
5036                 if (wret < 0)
5037                         ret = wret;
5038                 if (wret != 0)
5039                         break;
5040         }
5041
5042 out:
5043         btrfs_release_path(&path);
5044         return ret;
5045 }
5046
5047 /*
5048  * Find the relative ref for root_ref and root_backref.
5049  *
5050  * @root:       the root of the root tree.
5051  * @ref_key:    the key of the root ref.
5052  *
5053  * Return 0 if no error occurred.
5054  */
5055 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5056                           struct extent_buffer *node, int slot)
5057 {
5058         struct btrfs_path path;
5059         struct btrfs_key key;
5060         struct btrfs_root_ref *ref;
5061         struct btrfs_root_ref *backref;
5062         char ref_name[BTRFS_NAME_LEN] = {0};
5063         char backref_name[BTRFS_NAME_LEN] = {0};
5064         u64 ref_dirid;
5065         u64 ref_seq;
5066         u32 ref_namelen;
5067         u64 backref_dirid;
5068         u64 backref_seq;
5069         u32 backref_namelen;
5070         u32 len;
5071         int ret;
5072         int err = 0;
5073
5074         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5075         ref_dirid = btrfs_root_ref_dirid(node, ref);
5076         ref_seq = btrfs_root_ref_sequence(node, ref);
5077         ref_namelen = btrfs_root_ref_name_len(node, ref);
5078
5079         if (ref_namelen <= BTRFS_NAME_LEN) {
5080                 len = ref_namelen;
5081         } else {
5082                 len = BTRFS_NAME_LEN;
5083                 warning("%s[%llu %llu] ref_name too long",
5084                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5085                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5086                         ref_key->offset);
5087         }
5088         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5089
5090         /* Find relative root_ref */
5091         key.objectid = ref_key->offset;
5092         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5093         key.offset = ref_key->objectid;
5094
5095         btrfs_init_path(&path);
5096         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5097         if (ret) {
5098                 err |= ROOT_REF_MISSING;
5099                 error("%s[%llu %llu] couldn't find relative ref",
5100                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5101                       "ROOT_REF" : "ROOT_BACKREF",
5102                       ref_key->objectid, ref_key->offset);
5103                 goto out;
5104         }
5105
5106         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5107                                  struct btrfs_root_ref);
5108         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5109         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5110         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5111
5112         if (backref_namelen <= BTRFS_NAME_LEN) {
5113                 len = backref_namelen;
5114         } else {
5115                 len = BTRFS_NAME_LEN;
5116                 warning("%s[%llu %llu] ref_name too long",
5117                         key.type == BTRFS_ROOT_REF_KEY ?
5118                         "ROOT_REF" : "ROOT_BACKREF",
5119                         key.objectid, key.offset);
5120         }
5121         read_extent_buffer(path.nodes[0], backref_name,
5122                            (unsigned long)(backref + 1), len);
5123
5124         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5125             ref_namelen != backref_namelen ||
5126             strncmp(ref_name, backref_name, len)) {
5127                 err |= ROOT_REF_MISMATCH;
5128                 error("%s[%llu %llu] mismatch relative ref",
5129                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5130                       "ROOT_REF" : "ROOT_BACKREF",
5131                       ref_key->objectid, ref_key->offset);
5132         }
5133 out:
5134         btrfs_release_path(&path);
5135         return err;
5136 }
5137
5138 /*
5139  * Check all fs/file tree in low_memory mode.
5140  *
5141  * 1. for fs tree root item, call check_fs_root_v2()
5142  * 2. for fs tree root ref/backref, call check_root_ref()
5143  *
5144  * Return 0 if no error occurred.
5145  */
5146 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5147 {
5148         struct btrfs_root *tree_root = fs_info->tree_root;
5149         struct btrfs_root *cur_root = NULL;
5150         struct btrfs_path path;
5151         struct btrfs_key key;
5152         struct extent_buffer *node;
5153         unsigned int ext_ref;
5154         int slot;
5155         int ret;
5156         int err = 0;
5157
5158         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5159
5160         btrfs_init_path(&path);
5161         key.objectid = BTRFS_FS_TREE_OBJECTID;
5162         key.offset = 0;
5163         key.type = BTRFS_ROOT_ITEM_KEY;
5164
5165         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5166         if (ret < 0) {
5167                 err = ret;
5168                 goto out;
5169         } else if (ret > 0) {
5170                 err = -ENOENT;
5171                 goto out;
5172         }
5173
5174         while (1) {
5175                 node = path.nodes[0];
5176                 slot = path.slots[0];
5177                 btrfs_item_key_to_cpu(node, &key, slot);
5178                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5179                         goto out;
5180                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5181                     fs_root_objectid(key.objectid)) {
5182                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5183                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5184                                                                        &key);
5185                         } else {
5186                                 key.offset = (u64)-1;
5187                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5188                         }
5189
5190                         if (IS_ERR(cur_root)) {
5191                                 error("Fail to read fs/subvol tree: %lld",
5192                                       key.objectid);
5193                                 err = -EIO;
5194                                 goto next;
5195                         }
5196
5197                         ret = check_fs_root_v2(cur_root, ext_ref);
5198                         err |= ret;
5199
5200                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5201                                 btrfs_free_fs_root(cur_root);
5202                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5203                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5204                         ret = check_root_ref(tree_root, &key, node, slot);
5205                         err |= ret;
5206                 }
5207 next:
5208                 ret = btrfs_next_item(tree_root, &path);
5209                 if (ret > 0)
5210                         goto out;
5211                 if (ret < 0) {
5212                         err = ret;
5213                         goto out;
5214                 }
5215         }
5216
5217 out:
5218         btrfs_release_path(&path);
5219         return err;
5220 }
5221
5222 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5223 {
5224         struct list_head *cur = rec->backrefs.next;
5225         struct extent_backref *back;
5226         struct tree_backref *tback;
5227         struct data_backref *dback;
5228         u64 found = 0;
5229         int err = 0;
5230
5231         while(cur != &rec->backrefs) {
5232                 back = to_extent_backref(cur);
5233                 cur = cur->next;
5234                 if (!back->found_extent_tree) {
5235                         err = 1;
5236                         if (!print_errs)
5237                                 goto out;
5238                         if (back->is_data) {
5239                                 dback = to_data_backref(back);
5240                                 fprintf(stderr, "Backref %llu %s %llu"
5241                                         " owner %llu offset %llu num_refs %lu"
5242                                         " not found in extent tree\n",
5243                                         (unsigned long long)rec->start,
5244                                         back->full_backref ?
5245                                         "parent" : "root",
5246                                         back->full_backref ?
5247                                         (unsigned long long)dback->parent:
5248                                         (unsigned long long)dback->root,
5249                                         (unsigned long long)dback->owner,
5250                                         (unsigned long long)dback->offset,
5251                                         (unsigned long)dback->num_refs);
5252                         } else {
5253                                 tback = to_tree_backref(back);
5254                                 fprintf(stderr, "Backref %llu parent %llu"
5255                                         " root %llu not found in extent tree\n",
5256                                         (unsigned long long)rec->start,
5257                                         (unsigned long long)tback->parent,
5258                                         (unsigned long long)tback->root);
5259                         }
5260                 }
5261                 if (!back->is_data && !back->found_ref) {
5262                         err = 1;
5263                         if (!print_errs)
5264                                 goto out;
5265                         tback = to_tree_backref(back);
5266                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5267                                 (unsigned long long)rec->start,
5268                                 back->full_backref ? "parent" : "root",
5269                                 back->full_backref ?
5270                                 (unsigned long long)tback->parent :
5271                                 (unsigned long long)tback->root, back);
5272                 }
5273                 if (back->is_data) {
5274                         dback = to_data_backref(back);
5275                         if (dback->found_ref != dback->num_refs) {
5276                                 err = 1;
5277                                 if (!print_errs)
5278                                         goto out;
5279                                 fprintf(stderr, "Incorrect local backref count"
5280                                         " on %llu %s %llu owner %llu"
5281                                         " offset %llu found %u wanted %u back %p\n",
5282                                         (unsigned long long)rec->start,
5283                                         back->full_backref ?
5284                                         "parent" : "root",
5285                                         back->full_backref ?
5286                                         (unsigned long long)dback->parent:
5287                                         (unsigned long long)dback->root,
5288                                         (unsigned long long)dback->owner,
5289                                         (unsigned long long)dback->offset,
5290                                         dback->found_ref, dback->num_refs, back);
5291                         }
5292                         if (dback->disk_bytenr != rec->start) {
5293                                 err = 1;
5294                                 if (!print_errs)
5295                                         goto out;
5296                                 fprintf(stderr, "Backref disk bytenr does not"
5297                                         " match extent record, bytenr=%llu, "
5298                                         "ref bytenr=%llu\n",
5299                                         (unsigned long long)rec->start,
5300                                         (unsigned long long)dback->disk_bytenr);
5301                         }
5302
5303                         if (dback->bytes != rec->nr) {
5304                                 err = 1;
5305                                 if (!print_errs)
5306                                         goto out;
5307                                 fprintf(stderr, "Backref bytes do not match "
5308                                         "extent backref, bytenr=%llu, ref "
5309                                         "bytes=%llu, backref bytes=%llu\n",
5310                                         (unsigned long long)rec->start,
5311                                         (unsigned long long)rec->nr,
5312                                         (unsigned long long)dback->bytes);
5313                         }
5314                 }
5315                 if (!back->is_data) {
5316                         found += 1;
5317                 } else {
5318                         dback = to_data_backref(back);
5319                         found += dback->found_ref;
5320                 }
5321         }
5322         if (found != rec->refs) {
5323                 err = 1;
5324                 if (!print_errs)
5325                         goto out;
5326                 fprintf(stderr, "Incorrect global backref count "
5327                         "on %llu found %llu wanted %llu\n",
5328                         (unsigned long long)rec->start,
5329                         (unsigned long long)found,
5330                         (unsigned long long)rec->refs);
5331         }
5332 out:
5333         return err;
5334 }
5335
5336 static int free_all_extent_backrefs(struct extent_record *rec)
5337 {
5338         struct extent_backref *back;
5339         struct list_head *cur;
5340         while (!list_empty(&rec->backrefs)) {
5341                 cur = rec->backrefs.next;
5342                 back = to_extent_backref(cur);
5343                 list_del(cur);
5344                 free(back);
5345         }
5346         return 0;
5347 }
5348
5349 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
5350                                      struct cache_tree *extent_cache)
5351 {
5352         struct cache_extent *cache;
5353         struct extent_record *rec;
5354
5355         while (1) {
5356                 cache = first_cache_extent(extent_cache);
5357                 if (!cache)
5358                         break;
5359                 rec = container_of(cache, struct extent_record, cache);
5360                 remove_cache_extent(extent_cache, cache);
5361                 free_all_extent_backrefs(rec);
5362                 free(rec);
5363         }
5364 }
5365
5366 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5367                                  struct extent_record *rec)
5368 {
5369         if (rec->content_checked && rec->owner_ref_checked &&
5370             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5371             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5372             !rec->bad_full_backref && !rec->crossing_stripes &&
5373             !rec->wrong_chunk_type) {
5374                 remove_cache_extent(extent_cache, &rec->cache);
5375                 free_all_extent_backrefs(rec);
5376                 list_del_init(&rec->list);
5377                 free(rec);
5378         }
5379         return 0;
5380 }
5381
5382 static int check_owner_ref(struct btrfs_root *root,
5383                             struct extent_record *rec,
5384                             struct extent_buffer *buf)
5385 {
5386         struct extent_backref *node;
5387         struct tree_backref *back;
5388         struct btrfs_root *ref_root;
5389         struct btrfs_key key;
5390         struct btrfs_path path;
5391         struct extent_buffer *parent;
5392         int level;
5393         int found = 0;
5394         int ret;
5395
5396         list_for_each_entry(node, &rec->backrefs, list) {
5397                 if (node->is_data)
5398                         continue;
5399                 if (!node->found_ref)
5400                         continue;
5401                 if (node->full_backref)
5402                         continue;
5403                 back = to_tree_backref(node);
5404                 if (btrfs_header_owner(buf) == back->root)
5405                         return 0;
5406         }
5407         BUG_ON(rec->is_root);
5408
5409         /* try to find the block by search corresponding fs tree */
5410         key.objectid = btrfs_header_owner(buf);
5411         key.type = BTRFS_ROOT_ITEM_KEY;
5412         key.offset = (u64)-1;
5413
5414         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5415         if (IS_ERR(ref_root))
5416                 return 1;
5417
5418         level = btrfs_header_level(buf);
5419         if (level == 0)
5420                 btrfs_item_key_to_cpu(buf, &key, 0);
5421         else
5422                 btrfs_node_key_to_cpu(buf, &key, 0);
5423
5424         btrfs_init_path(&path);
5425         path.lowest_level = level + 1;
5426         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5427         if (ret < 0)
5428                 return 0;
5429
5430         parent = path.nodes[level + 1];
5431         if (parent && buf->start == btrfs_node_blockptr(parent,
5432                                                         path.slots[level + 1]))
5433                 found = 1;
5434
5435         btrfs_release_path(&path);
5436         return found ? 0 : 1;
5437 }
5438
5439 static int is_extent_tree_record(struct extent_record *rec)
5440 {
5441         struct list_head *cur = rec->backrefs.next;
5442         struct extent_backref *node;
5443         struct tree_backref *back;
5444         int is_extent = 0;
5445
5446         while(cur != &rec->backrefs) {
5447                 node = to_extent_backref(cur);
5448                 cur = cur->next;
5449                 if (node->is_data)
5450                         return 0;
5451                 back = to_tree_backref(node);
5452                 if (node->full_backref)
5453                         return 0;
5454                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5455                         is_extent = 1;
5456         }
5457         return is_extent;
5458 }
5459
5460
5461 static int record_bad_block_io(struct btrfs_fs_info *info,
5462                                struct cache_tree *extent_cache,
5463                                u64 start, u64 len)
5464 {
5465         struct extent_record *rec;
5466         struct cache_extent *cache;
5467         struct btrfs_key key;
5468
5469         cache = lookup_cache_extent(extent_cache, start, len);
5470         if (!cache)
5471                 return 0;
5472
5473         rec = container_of(cache, struct extent_record, cache);
5474         if (!is_extent_tree_record(rec))
5475                 return 0;
5476
5477         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5478         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5479 }
5480
5481 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5482                        struct extent_buffer *buf, int slot)
5483 {
5484         if (btrfs_header_level(buf)) {
5485                 struct btrfs_key_ptr ptr1, ptr2;
5486
5487                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5488                                    sizeof(struct btrfs_key_ptr));
5489                 read_extent_buffer(buf, &ptr2,
5490                                    btrfs_node_key_ptr_offset(slot + 1),
5491                                    sizeof(struct btrfs_key_ptr));
5492                 write_extent_buffer(buf, &ptr1,
5493                                     btrfs_node_key_ptr_offset(slot + 1),
5494                                     sizeof(struct btrfs_key_ptr));
5495                 write_extent_buffer(buf, &ptr2,
5496                                     btrfs_node_key_ptr_offset(slot),
5497                                     sizeof(struct btrfs_key_ptr));
5498                 if (slot == 0) {
5499                         struct btrfs_disk_key key;
5500                         btrfs_node_key(buf, &key, 0);
5501                         btrfs_fixup_low_keys(root, path, &key,
5502                                              btrfs_header_level(buf) + 1);
5503                 }
5504         } else {
5505                 struct btrfs_item *item1, *item2;
5506                 struct btrfs_key k1, k2;
5507                 char *item1_data, *item2_data;
5508                 u32 item1_offset, item2_offset, item1_size, item2_size;
5509
5510                 item1 = btrfs_item_nr(slot);
5511                 item2 = btrfs_item_nr(slot + 1);
5512                 btrfs_item_key_to_cpu(buf, &k1, slot);
5513                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5514                 item1_offset = btrfs_item_offset(buf, item1);
5515                 item2_offset = btrfs_item_offset(buf, item2);
5516                 item1_size = btrfs_item_size(buf, item1);
5517                 item2_size = btrfs_item_size(buf, item2);
5518
5519                 item1_data = malloc(item1_size);
5520                 if (!item1_data)
5521                         return -ENOMEM;
5522                 item2_data = malloc(item2_size);
5523                 if (!item2_data) {
5524                         free(item1_data);
5525                         return -ENOMEM;
5526                 }
5527
5528                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5529                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5530
5531                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5532                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5533                 free(item1_data);
5534                 free(item2_data);
5535
5536                 btrfs_set_item_offset(buf, item1, item2_offset);
5537                 btrfs_set_item_offset(buf, item2, item1_offset);
5538                 btrfs_set_item_size(buf, item1, item2_size);
5539                 btrfs_set_item_size(buf, item2, item1_size);
5540
5541                 path->slots[0] = slot;
5542                 btrfs_set_item_key_unsafe(root, path, &k2);
5543                 path->slots[0] = slot + 1;
5544                 btrfs_set_item_key_unsafe(root, path, &k1);
5545         }
5546         return 0;
5547 }
5548
5549 static int fix_key_order(struct btrfs_trans_handle *trans,
5550                          struct btrfs_root *root,
5551                          struct btrfs_path *path)
5552 {
5553         struct extent_buffer *buf;
5554         struct btrfs_key k1, k2;
5555         int i;
5556         int level = path->lowest_level;
5557         int ret = -EIO;
5558
5559         buf = path->nodes[level];
5560         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5561                 if (level) {
5562                         btrfs_node_key_to_cpu(buf, &k1, i);
5563                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5564                 } else {
5565                         btrfs_item_key_to_cpu(buf, &k1, i);
5566                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5567                 }
5568                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5569                         continue;
5570                 ret = swap_values(root, path, buf, i);
5571                 if (ret)
5572                         break;
5573                 btrfs_mark_buffer_dirty(buf);
5574                 i = 0;
5575         }
5576         return ret;
5577 }
5578
5579 static int delete_bogus_item(struct btrfs_trans_handle *trans,
5580                              struct btrfs_root *root,
5581                              struct btrfs_path *path,
5582                              struct extent_buffer *buf, int slot)
5583 {
5584         struct btrfs_key key;
5585         int nritems = btrfs_header_nritems(buf);
5586
5587         btrfs_item_key_to_cpu(buf, &key, slot);
5588
5589         /* These are all the keys we can deal with missing. */
5590         if (key.type != BTRFS_DIR_INDEX_KEY &&
5591             key.type != BTRFS_EXTENT_ITEM_KEY &&
5592             key.type != BTRFS_METADATA_ITEM_KEY &&
5593             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5594             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5595                 return -1;
5596
5597         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5598                (unsigned long long)key.objectid, key.type,
5599                (unsigned long long)key.offset, slot, buf->start);
5600         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5601                               btrfs_item_nr_offset(slot + 1),
5602                               sizeof(struct btrfs_item) *
5603                               (nritems - slot - 1));
5604         btrfs_set_header_nritems(buf, nritems - 1);
5605         if (slot == 0) {
5606                 struct btrfs_disk_key disk_key;
5607
5608                 btrfs_item_key(buf, &disk_key, 0);
5609                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5610         }
5611         btrfs_mark_buffer_dirty(buf);
5612         return 0;
5613 }
5614
5615 static int fix_item_offset(struct btrfs_trans_handle *trans,
5616                            struct btrfs_root *root,
5617                            struct btrfs_path *path)
5618 {
5619         struct extent_buffer *buf;
5620         int i;
5621         int ret = 0;
5622
5623         /* We should only get this for leaves */
5624         BUG_ON(path->lowest_level);
5625         buf = path->nodes[0];
5626 again:
5627         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5628                 unsigned int shift = 0, offset;
5629
5630                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5631                     BTRFS_LEAF_DATA_SIZE(root)) {
5632                         if (btrfs_item_end_nr(buf, i) >
5633                             BTRFS_LEAF_DATA_SIZE(root)) {
5634                                 ret = delete_bogus_item(trans, root, path,
5635                                                         buf, i);
5636                                 if (!ret)
5637                                         goto again;
5638                                 fprintf(stderr, "item is off the end of the "
5639                                         "leaf, can't fix\n");
5640                                 ret = -EIO;
5641                                 break;
5642                         }
5643                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5644                                 btrfs_item_end_nr(buf, i);
5645                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5646                            btrfs_item_offset_nr(buf, i - 1)) {
5647                         if (btrfs_item_end_nr(buf, i) >
5648                             btrfs_item_offset_nr(buf, i - 1)) {
5649                                 ret = delete_bogus_item(trans, root, path,
5650                                                         buf, i);
5651                                 if (!ret)
5652                                         goto again;
5653                                 fprintf(stderr, "items overlap, can't fix\n");
5654                                 ret = -EIO;
5655                                 break;
5656                         }
5657                         shift = btrfs_item_offset_nr(buf, i - 1) -
5658                                 btrfs_item_end_nr(buf, i);
5659                 }
5660                 if (!shift)
5661                         continue;
5662
5663                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5664                        i, shift, (unsigned long long)buf->start);
5665                 offset = btrfs_item_offset_nr(buf, i);
5666                 memmove_extent_buffer(buf,
5667                                       btrfs_leaf_data(buf) + offset + shift,
5668                                       btrfs_leaf_data(buf) + offset,
5669                                       btrfs_item_size_nr(buf, i));
5670                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5671                                       offset + shift);
5672                 btrfs_mark_buffer_dirty(buf);
5673         }
5674
5675         /*
5676          * We may have moved things, in which case we want to exit so we don't
5677          * write those changes out.  Once we have proper abort functionality in
5678          * progs this can be changed to something nicer.
5679          */
5680         BUG_ON(ret);
5681         return ret;
5682 }
5683
5684 /*
5685  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5686  * then just return -EIO.
5687  */
5688 static int try_to_fix_bad_block(struct btrfs_root *root,
5689                                 struct extent_buffer *buf,
5690                                 enum btrfs_tree_block_status status)
5691 {
5692         struct btrfs_trans_handle *trans;
5693         struct ulist *roots;
5694         struct ulist_node *node;
5695         struct btrfs_root *search_root;
5696         struct btrfs_path path;
5697         struct ulist_iterator iter;
5698         struct btrfs_key root_key, key;
5699         int ret;
5700
5701         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5702             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5703                 return -EIO;
5704
5705         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5706         if (ret)
5707                 return -EIO;
5708
5709         btrfs_init_path(&path);
5710         ULIST_ITER_INIT(&iter);
5711         while ((node = ulist_next(roots, &iter))) {
5712                 root_key.objectid = node->val;
5713                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5714                 root_key.offset = (u64)-1;
5715
5716                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5717                 if (IS_ERR(root)) {
5718                         ret = -EIO;
5719                         break;
5720                 }
5721
5722
5723                 trans = btrfs_start_transaction(search_root, 0);
5724                 if (IS_ERR(trans)) {
5725                         ret = PTR_ERR(trans);
5726                         break;
5727                 }
5728
5729                 path.lowest_level = btrfs_header_level(buf);
5730                 path.skip_check_block = 1;
5731                 if (path.lowest_level)
5732                         btrfs_node_key_to_cpu(buf, &key, 0);
5733                 else
5734                         btrfs_item_key_to_cpu(buf, &key, 0);
5735                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5736                 if (ret) {
5737                         ret = -EIO;
5738                         btrfs_commit_transaction(trans, search_root);
5739                         break;
5740                 }
5741                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5742                         ret = fix_key_order(trans, search_root, &path);
5743                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5744                         ret = fix_item_offset(trans, search_root, &path);
5745                 if (ret) {
5746                         btrfs_commit_transaction(trans, search_root);
5747                         break;
5748                 }
5749                 btrfs_release_path(&path);
5750                 btrfs_commit_transaction(trans, search_root);
5751         }
5752         ulist_free(roots);
5753         btrfs_release_path(&path);
5754         return ret;
5755 }
5756
5757 static int check_block(struct btrfs_root *root,
5758                        struct cache_tree *extent_cache,
5759                        struct extent_buffer *buf, u64 flags)
5760 {
5761         struct extent_record *rec;
5762         struct cache_extent *cache;
5763         struct btrfs_key key;
5764         enum btrfs_tree_block_status status;
5765         int ret = 0;
5766         int level;
5767
5768         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5769         if (!cache)
5770                 return 1;
5771         rec = container_of(cache, struct extent_record, cache);
5772         rec->generation = btrfs_header_generation(buf);
5773
5774         level = btrfs_header_level(buf);
5775         if (btrfs_header_nritems(buf) > 0) {
5776
5777                 if (level == 0)
5778                         btrfs_item_key_to_cpu(buf, &key, 0);
5779                 else
5780                         btrfs_node_key_to_cpu(buf, &key, 0);
5781
5782                 rec->info_objectid = key.objectid;
5783         }
5784         rec->info_level = level;
5785
5786         if (btrfs_is_leaf(buf))
5787                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5788         else
5789                 status = btrfs_check_node(root, &rec->parent_key, buf);
5790
5791         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5792                 if (repair)
5793                         status = try_to_fix_bad_block(root, buf, status);
5794                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5795                         ret = -EIO;
5796                         fprintf(stderr, "bad block %llu\n",
5797                                 (unsigned long long)buf->start);
5798                 } else {
5799                         /*
5800                          * Signal to callers we need to start the scan over
5801                          * again since we'll have cowed blocks.
5802                          */
5803                         ret = -EAGAIN;
5804                 }
5805         } else {
5806                 rec->content_checked = 1;
5807                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5808                         rec->owner_ref_checked = 1;
5809                 else {
5810                         ret = check_owner_ref(root, rec, buf);
5811                         if (!ret)
5812                                 rec->owner_ref_checked = 1;
5813                 }
5814         }
5815         if (!ret)
5816                 maybe_free_extent_rec(extent_cache, rec);
5817         return ret;
5818 }
5819
5820 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5821                                                 u64 parent, u64 root)
5822 {
5823         struct list_head *cur = rec->backrefs.next;
5824         struct extent_backref *node;
5825         struct tree_backref *back;
5826
5827         while(cur != &rec->backrefs) {
5828                 node = to_extent_backref(cur);
5829                 cur = cur->next;
5830                 if (node->is_data)
5831                         continue;
5832                 back = to_tree_backref(node);
5833                 if (parent > 0) {
5834                         if (!node->full_backref)
5835                                 continue;
5836                         if (parent == back->parent)
5837                                 return back;
5838                 } else {
5839                         if (node->full_backref)
5840                                 continue;
5841                         if (back->root == root)
5842                                 return back;
5843                 }
5844         }
5845         return NULL;
5846 }
5847
5848 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5849                                                 u64 parent, u64 root)
5850 {
5851         struct tree_backref *ref = malloc(sizeof(*ref));
5852
5853         if (!ref)
5854                 return NULL;
5855         memset(&ref->node, 0, sizeof(ref->node));
5856         if (parent > 0) {
5857                 ref->parent = parent;
5858                 ref->node.full_backref = 1;
5859         } else {
5860                 ref->root = root;
5861                 ref->node.full_backref = 0;
5862         }
5863         list_add_tail(&ref->node.list, &rec->backrefs);
5864
5865         return ref;
5866 }
5867
5868 static struct data_backref *find_data_backref(struct extent_record *rec,
5869                                                 u64 parent, u64 root,
5870                                                 u64 owner, u64 offset,
5871                                                 int found_ref,
5872                                                 u64 disk_bytenr, u64 bytes)
5873 {
5874         struct list_head *cur = rec->backrefs.next;
5875         struct extent_backref *node;
5876         struct data_backref *back;
5877
5878         while(cur != &rec->backrefs) {
5879                 node = to_extent_backref(cur);
5880                 cur = cur->next;
5881                 if (!node->is_data)
5882                         continue;
5883                 back = to_data_backref(node);
5884                 if (parent > 0) {
5885                         if (!node->full_backref)
5886                                 continue;
5887                         if (parent == back->parent)
5888                                 return back;
5889                 } else {
5890                         if (node->full_backref)
5891                                 continue;
5892                         if (back->root == root && back->owner == owner &&
5893                             back->offset == offset) {
5894                                 if (found_ref && node->found_ref &&
5895                                     (back->bytes != bytes ||
5896                                     back->disk_bytenr != disk_bytenr))
5897                                         continue;
5898                                 return back;
5899                         }
5900                 }
5901         }
5902         return NULL;
5903 }
5904
5905 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5906                                                 u64 parent, u64 root,
5907                                                 u64 owner, u64 offset,
5908                                                 u64 max_size)
5909 {
5910         struct data_backref *ref = malloc(sizeof(*ref));
5911
5912         if (!ref)
5913                 return NULL;
5914         memset(&ref->node, 0, sizeof(ref->node));
5915         ref->node.is_data = 1;
5916
5917         if (parent > 0) {
5918                 ref->parent = parent;
5919                 ref->owner = 0;
5920                 ref->offset = 0;
5921                 ref->node.full_backref = 1;
5922         } else {
5923                 ref->root = root;
5924                 ref->owner = owner;
5925                 ref->offset = offset;
5926                 ref->node.full_backref = 0;
5927         }
5928         ref->bytes = max_size;
5929         ref->found_ref = 0;
5930         ref->num_refs = 0;
5931         list_add_tail(&ref->node.list, &rec->backrefs);
5932         if (max_size > rec->max_size)
5933                 rec->max_size = max_size;
5934         return ref;
5935 }
5936
5937 /* Check if the type of extent matches with its chunk */
5938 static void check_extent_type(struct extent_record *rec)
5939 {
5940         struct btrfs_block_group_cache *bg_cache;
5941
5942         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5943         if (!bg_cache)
5944                 return;
5945
5946         /* data extent, check chunk directly*/
5947         if (!rec->metadata) {
5948                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5949                         rec->wrong_chunk_type = 1;
5950                 return;
5951         }
5952
5953         /* metadata extent, check the obvious case first */
5954         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5955                                  BTRFS_BLOCK_GROUP_METADATA))) {
5956                 rec->wrong_chunk_type = 1;
5957                 return;
5958         }
5959
5960         /*
5961          * Check SYSTEM extent, as it's also marked as metadata, we can only
5962          * make sure it's a SYSTEM extent by its backref
5963          */
5964         if (!list_empty(&rec->backrefs)) {
5965                 struct extent_backref *node;
5966                 struct tree_backref *tback;
5967                 u64 bg_type;
5968
5969                 node = to_extent_backref(rec->backrefs.next);
5970                 if (node->is_data) {
5971                         /* tree block shouldn't have data backref */
5972                         rec->wrong_chunk_type = 1;
5973                         return;
5974                 }
5975                 tback = container_of(node, struct tree_backref, node);
5976
5977                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5978                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5979                 else
5980                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
5981                 if (!(bg_cache->flags & bg_type))
5982                         rec->wrong_chunk_type = 1;
5983         }
5984 }
5985
5986 /*
5987  * Allocate a new extent record, fill default values from @tmpl and insert int
5988  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
5989  * the cache, otherwise it fails.
5990  */
5991 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
5992                 struct extent_record *tmpl)
5993 {
5994         struct extent_record *rec;
5995         int ret = 0;
5996
5997         rec = malloc(sizeof(*rec));
5998         if (!rec)
5999                 return -ENOMEM;
6000         rec->start = tmpl->start;
6001         rec->max_size = tmpl->max_size;
6002         rec->nr = max(tmpl->nr, tmpl->max_size);
6003         rec->found_rec = tmpl->found_rec;
6004         rec->content_checked = tmpl->content_checked;
6005         rec->owner_ref_checked = tmpl->owner_ref_checked;
6006         rec->num_duplicates = 0;
6007         rec->metadata = tmpl->metadata;
6008         rec->flag_block_full_backref = FLAG_UNSET;
6009         rec->bad_full_backref = 0;
6010         rec->crossing_stripes = 0;
6011         rec->wrong_chunk_type = 0;
6012         rec->is_root = tmpl->is_root;
6013         rec->refs = tmpl->refs;
6014         rec->extent_item_refs = tmpl->extent_item_refs;
6015         rec->parent_generation = tmpl->parent_generation;
6016         INIT_LIST_HEAD(&rec->backrefs);
6017         INIT_LIST_HEAD(&rec->dups);
6018         INIT_LIST_HEAD(&rec->list);
6019         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6020         rec->cache.start = tmpl->start;
6021         rec->cache.size = tmpl->nr;
6022         ret = insert_cache_extent(extent_cache, &rec->cache);
6023         if (ret) {
6024                 free(rec);
6025                 return ret;
6026         }
6027         bytes_used += rec->nr;
6028
6029         if (tmpl->metadata)
6030                 rec->crossing_stripes = check_crossing_stripes(global_info,
6031                                 rec->start, global_info->tree_root->nodesize);
6032         check_extent_type(rec);
6033         return ret;
6034 }
6035
6036 /*
6037  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6038  * some are hints:
6039  * - refs              - if found, increase refs
6040  * - is_root           - if found, set
6041  * - content_checked   - if found, set
6042  * - owner_ref_checked - if found, set
6043  *
6044  * If not found, create a new one, initialize and insert.
6045  */
6046 static int add_extent_rec(struct cache_tree *extent_cache,
6047                 struct extent_record *tmpl)
6048 {
6049         struct extent_record *rec;
6050         struct cache_extent *cache;
6051         int ret = 0;
6052         int dup = 0;
6053
6054         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6055         if (cache) {
6056                 rec = container_of(cache, struct extent_record, cache);
6057                 if (tmpl->refs)
6058                         rec->refs++;
6059                 if (rec->nr == 1)
6060                         rec->nr = max(tmpl->nr, tmpl->max_size);
6061
6062                 /*
6063                  * We need to make sure to reset nr to whatever the extent
6064                  * record says was the real size, this way we can compare it to
6065                  * the backrefs.
6066                  */
6067                 if (tmpl->found_rec) {
6068                         if (tmpl->start != rec->start || rec->found_rec) {
6069                                 struct extent_record *tmp;
6070
6071                                 dup = 1;
6072                                 if (list_empty(&rec->list))
6073                                         list_add_tail(&rec->list,
6074                                                       &duplicate_extents);
6075
6076                                 /*
6077                                  * We have to do this song and dance in case we
6078                                  * find an extent record that falls inside of
6079                                  * our current extent record but does not have
6080                                  * the same objectid.
6081                                  */
6082                                 tmp = malloc(sizeof(*tmp));
6083                                 if (!tmp)
6084                                         return -ENOMEM;
6085                                 tmp->start = tmpl->start;
6086                                 tmp->max_size = tmpl->max_size;
6087                                 tmp->nr = tmpl->nr;
6088                                 tmp->found_rec = 1;
6089                                 tmp->metadata = tmpl->metadata;
6090                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6091                                 INIT_LIST_HEAD(&tmp->list);
6092                                 list_add_tail(&tmp->list, &rec->dups);
6093                                 rec->num_duplicates++;
6094                         } else {
6095                                 rec->nr = tmpl->nr;
6096                                 rec->found_rec = 1;
6097                         }
6098                 }
6099
6100                 if (tmpl->extent_item_refs && !dup) {
6101                         if (rec->extent_item_refs) {
6102                                 fprintf(stderr, "block %llu rec "
6103                                         "extent_item_refs %llu, passed %llu\n",
6104                                         (unsigned long long)tmpl->start,
6105                                         (unsigned long long)
6106                                                         rec->extent_item_refs,
6107                                         (unsigned long long)tmpl->extent_item_refs);
6108                         }
6109                         rec->extent_item_refs = tmpl->extent_item_refs;
6110                 }
6111                 if (tmpl->is_root)
6112                         rec->is_root = 1;
6113                 if (tmpl->content_checked)
6114                         rec->content_checked = 1;
6115                 if (tmpl->owner_ref_checked)
6116                         rec->owner_ref_checked = 1;
6117                 memcpy(&rec->parent_key, &tmpl->parent_key,
6118                                 sizeof(tmpl->parent_key));
6119                 if (tmpl->parent_generation)
6120                         rec->parent_generation = tmpl->parent_generation;
6121                 if (rec->max_size < tmpl->max_size)
6122                         rec->max_size = tmpl->max_size;
6123
6124                 /*
6125                  * A metadata extent can't cross stripe_len boundary, otherwise
6126                  * kernel scrub won't be able to handle it.
6127                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6128                  * it.
6129                  */
6130                 if (tmpl->metadata)
6131                         rec->crossing_stripes = check_crossing_stripes(
6132                                         global_info, rec->start,
6133                                         global_info->tree_root->nodesize);
6134                 check_extent_type(rec);
6135                 maybe_free_extent_rec(extent_cache, rec);
6136                 return ret;
6137         }
6138
6139         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6140
6141         return ret;
6142 }
6143
6144 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6145                             u64 parent, u64 root, int found_ref)
6146 {
6147         struct extent_record *rec;
6148         struct tree_backref *back;
6149         struct cache_extent *cache;
6150         int ret;
6151
6152         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6153         if (!cache) {
6154                 struct extent_record tmpl;
6155
6156                 memset(&tmpl, 0, sizeof(tmpl));
6157                 tmpl.start = bytenr;
6158                 tmpl.nr = 1;
6159                 tmpl.metadata = 1;
6160
6161                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6162                 if (ret)
6163                         return ret;
6164
6165                 /* really a bug in cache_extent implement now */
6166                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6167                 if (!cache)
6168                         return -ENOENT;
6169         }
6170
6171         rec = container_of(cache, struct extent_record, cache);
6172         if (rec->start != bytenr) {
6173                 /*
6174                  * Several cause, from unaligned bytenr to over lapping extents
6175                  */
6176                 return -EEXIST;
6177         }
6178
6179         back = find_tree_backref(rec, parent, root);
6180         if (!back) {
6181                 back = alloc_tree_backref(rec, parent, root);
6182                 if (!back)
6183                         return -ENOMEM;
6184         }
6185
6186         if (found_ref) {
6187                 if (back->node.found_ref) {
6188                         fprintf(stderr, "Extent back ref already exists "
6189                                 "for %llu parent %llu root %llu \n",
6190                                 (unsigned long long)bytenr,
6191                                 (unsigned long long)parent,
6192                                 (unsigned long long)root);
6193                 }
6194                 back->node.found_ref = 1;
6195         } else {
6196                 if (back->node.found_extent_tree) {
6197                         fprintf(stderr, "Extent back ref already exists "
6198                                 "for %llu parent %llu root %llu \n",
6199                                 (unsigned long long)bytenr,
6200                                 (unsigned long long)parent,
6201                                 (unsigned long long)root);
6202                 }
6203                 back->node.found_extent_tree = 1;
6204         }
6205         check_extent_type(rec);
6206         maybe_free_extent_rec(extent_cache, rec);
6207         return 0;
6208 }
6209
6210 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6211                             u64 parent, u64 root, u64 owner, u64 offset,
6212                             u32 num_refs, int found_ref, u64 max_size)
6213 {
6214         struct extent_record *rec;
6215         struct data_backref *back;
6216         struct cache_extent *cache;
6217         int ret;
6218
6219         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6220         if (!cache) {
6221                 struct extent_record tmpl;
6222
6223                 memset(&tmpl, 0, sizeof(tmpl));
6224                 tmpl.start = bytenr;
6225                 tmpl.nr = 1;
6226                 tmpl.max_size = max_size;
6227
6228                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6229                 if (ret)
6230                         return ret;
6231
6232                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6233                 if (!cache)
6234                         abort();
6235         }
6236
6237         rec = container_of(cache, struct extent_record, cache);
6238         if (rec->max_size < max_size)
6239                 rec->max_size = max_size;
6240
6241         /*
6242          * If found_ref is set then max_size is the real size and must match the
6243          * existing refs.  So if we have already found a ref then we need to
6244          * make sure that this ref matches the existing one, otherwise we need
6245          * to add a new backref so we can notice that the backrefs don't match
6246          * and we need to figure out who is telling the truth.  This is to
6247          * account for that awful fsync bug I introduced where we'd end up with
6248          * a btrfs_file_extent_item that would have its length include multiple
6249          * prealloc extents or point inside of a prealloc extent.
6250          */
6251         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6252                                  bytenr, max_size);
6253         if (!back) {
6254                 back = alloc_data_backref(rec, parent, root, owner, offset,
6255                                           max_size);
6256                 BUG_ON(!back);
6257         }
6258
6259         if (found_ref) {
6260                 BUG_ON(num_refs != 1);
6261                 if (back->node.found_ref)
6262                         BUG_ON(back->bytes != max_size);
6263                 back->node.found_ref = 1;
6264                 back->found_ref += 1;
6265                 back->bytes = max_size;
6266                 back->disk_bytenr = bytenr;
6267                 rec->refs += 1;
6268                 rec->content_checked = 1;
6269                 rec->owner_ref_checked = 1;
6270         } else {
6271                 if (back->node.found_extent_tree) {
6272                         fprintf(stderr, "Extent back ref already exists "
6273                                 "for %llu parent %llu root %llu "
6274                                 "owner %llu offset %llu num_refs %lu\n",
6275                                 (unsigned long long)bytenr,
6276                                 (unsigned long long)parent,
6277                                 (unsigned long long)root,
6278                                 (unsigned long long)owner,
6279                                 (unsigned long long)offset,
6280                                 (unsigned long)num_refs);
6281                 }
6282                 back->num_refs = num_refs;
6283                 back->node.found_extent_tree = 1;
6284         }
6285         maybe_free_extent_rec(extent_cache, rec);
6286         return 0;
6287 }
6288
6289 static int add_pending(struct cache_tree *pending,
6290                        struct cache_tree *seen, u64 bytenr, u32 size)
6291 {
6292         int ret;
6293         ret = add_cache_extent(seen, bytenr, size);
6294         if (ret)
6295                 return ret;
6296         add_cache_extent(pending, bytenr, size);
6297         return 0;
6298 }
6299
6300 static int pick_next_pending(struct cache_tree *pending,
6301                         struct cache_tree *reada,
6302                         struct cache_tree *nodes,
6303                         u64 last, struct block_info *bits, int bits_nr,
6304                         int *reada_bits)
6305 {
6306         unsigned long node_start = last;
6307         struct cache_extent *cache;
6308         int ret;
6309
6310         cache = search_cache_extent(reada, 0);
6311         if (cache) {
6312                 bits[0].start = cache->start;
6313                 bits[0].size = cache->size;
6314                 *reada_bits = 1;
6315                 return 1;
6316         }
6317         *reada_bits = 0;
6318         if (node_start > 32768)
6319                 node_start -= 32768;
6320
6321         cache = search_cache_extent(nodes, node_start);
6322         if (!cache)
6323                 cache = search_cache_extent(nodes, 0);
6324
6325         if (!cache) {
6326                  cache = search_cache_extent(pending, 0);
6327                  if (!cache)
6328                          return 0;
6329                  ret = 0;
6330                  do {
6331                          bits[ret].start = cache->start;
6332                          bits[ret].size = cache->size;
6333                          cache = next_cache_extent(cache);
6334                          ret++;
6335                  } while (cache && ret < bits_nr);
6336                  return ret;
6337         }
6338
6339         ret = 0;
6340         do {
6341                 bits[ret].start = cache->start;
6342                 bits[ret].size = cache->size;
6343                 cache = next_cache_extent(cache);
6344                 ret++;
6345         } while (cache && ret < bits_nr);
6346
6347         if (bits_nr - ret > 8) {
6348                 u64 lookup = bits[0].start + bits[0].size;
6349                 struct cache_extent *next;
6350                 next = search_cache_extent(pending, lookup);
6351                 while(next) {
6352                         if (next->start - lookup > 32768)
6353                                 break;
6354                         bits[ret].start = next->start;
6355                         bits[ret].size = next->size;
6356                         lookup = next->start + next->size;
6357                         ret++;
6358                         if (ret == bits_nr)
6359                                 break;
6360                         next = next_cache_extent(next);
6361                         if (!next)
6362                                 break;
6363                 }
6364         }
6365         return ret;
6366 }
6367
6368 static void free_chunk_record(struct cache_extent *cache)
6369 {
6370         struct chunk_record *rec;
6371
6372         rec = container_of(cache, struct chunk_record, cache);
6373         list_del_init(&rec->list);
6374         list_del_init(&rec->dextents);
6375         free(rec);
6376 }
6377
6378 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6379 {
6380         cache_tree_free_extents(chunk_cache, free_chunk_record);
6381 }
6382
6383 static void free_device_record(struct rb_node *node)
6384 {
6385         struct device_record *rec;
6386
6387         rec = container_of(node, struct device_record, node);
6388         free(rec);
6389 }
6390
6391 FREE_RB_BASED_TREE(device_cache, free_device_record);
6392
6393 int insert_block_group_record(struct block_group_tree *tree,
6394                               struct block_group_record *bg_rec)
6395 {
6396         int ret;
6397
6398         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6399         if (ret)
6400                 return ret;
6401
6402         list_add_tail(&bg_rec->list, &tree->block_groups);
6403         return 0;
6404 }
6405
6406 static void free_block_group_record(struct cache_extent *cache)
6407 {
6408         struct block_group_record *rec;
6409
6410         rec = container_of(cache, struct block_group_record, cache);
6411         list_del_init(&rec->list);
6412         free(rec);
6413 }
6414
6415 void free_block_group_tree(struct block_group_tree *tree)
6416 {
6417         cache_tree_free_extents(&tree->tree, free_block_group_record);
6418 }
6419
6420 int insert_device_extent_record(struct device_extent_tree *tree,
6421                                 struct device_extent_record *de_rec)
6422 {
6423         int ret;
6424
6425         /*
6426          * Device extent is a bit different from the other extents, because
6427          * the extents which belong to the different devices may have the
6428          * same start and size, so we need use the special extent cache
6429          * search/insert functions.
6430          */
6431         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6432         if (ret)
6433                 return ret;
6434
6435         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6436         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6437         return 0;
6438 }
6439
6440 static void free_device_extent_record(struct cache_extent *cache)
6441 {
6442         struct device_extent_record *rec;
6443
6444         rec = container_of(cache, struct device_extent_record, cache);
6445         if (!list_empty(&rec->chunk_list))
6446                 list_del_init(&rec->chunk_list);
6447         if (!list_empty(&rec->device_list))
6448                 list_del_init(&rec->device_list);
6449         free(rec);
6450 }
6451
6452 void free_device_extent_tree(struct device_extent_tree *tree)
6453 {
6454         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6455 }
6456
6457 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6458 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6459                                  struct extent_buffer *leaf, int slot)
6460 {
6461         struct btrfs_extent_ref_v0 *ref0;
6462         struct btrfs_key key;
6463         int ret;
6464
6465         btrfs_item_key_to_cpu(leaf, &key, slot);
6466         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6467         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6468                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6469                                 0, 0);
6470         } else {
6471                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6472                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6473         }
6474         return ret;
6475 }
6476 #endif
6477
6478 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6479                                             struct btrfs_key *key,
6480                                             int slot)
6481 {
6482         struct btrfs_chunk *ptr;
6483         struct chunk_record *rec;
6484         int num_stripes, i;
6485
6486         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6487         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6488
6489         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6490         if (!rec) {
6491                 fprintf(stderr, "memory allocation failed\n");
6492                 exit(-1);
6493         }
6494
6495         INIT_LIST_HEAD(&rec->list);
6496         INIT_LIST_HEAD(&rec->dextents);
6497         rec->bg_rec = NULL;
6498
6499         rec->cache.start = key->offset;
6500         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6501
6502         rec->generation = btrfs_header_generation(leaf);
6503
6504         rec->objectid = key->objectid;
6505         rec->type = key->type;
6506         rec->offset = key->offset;
6507
6508         rec->length = rec->cache.size;
6509         rec->owner = btrfs_chunk_owner(leaf, ptr);
6510         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6511         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6512         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6513         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6514         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6515         rec->num_stripes = num_stripes;
6516         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6517
6518         for (i = 0; i < rec->num_stripes; ++i) {
6519                 rec->stripes[i].devid =
6520                         btrfs_stripe_devid_nr(leaf, ptr, i);
6521                 rec->stripes[i].offset =
6522                         btrfs_stripe_offset_nr(leaf, ptr, i);
6523                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6524                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6525                                 BTRFS_UUID_SIZE);
6526         }
6527
6528         return rec;
6529 }
6530
6531 static int process_chunk_item(struct cache_tree *chunk_cache,
6532                               struct btrfs_key *key, struct extent_buffer *eb,
6533                               int slot)
6534 {
6535         struct chunk_record *rec;
6536         struct btrfs_chunk *chunk;
6537         int ret = 0;
6538
6539         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6540         /*
6541          * Do extra check for this chunk item,
6542          *
6543          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6544          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6545          * and owner<->key_type check.
6546          */
6547         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6548                                       key->offset);
6549         if (ret < 0) {
6550                 error("chunk(%llu, %llu) is not valid, ignore it",
6551                       key->offset, btrfs_chunk_length(eb, chunk));
6552                 return 0;
6553         }
6554         rec = btrfs_new_chunk_record(eb, key, slot);
6555         ret = insert_cache_extent(chunk_cache, &rec->cache);
6556         if (ret) {
6557                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6558                         rec->offset, rec->length);
6559                 free(rec);
6560         }
6561
6562         return ret;
6563 }
6564
6565 static int process_device_item(struct rb_root *dev_cache,
6566                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6567 {
6568         struct btrfs_dev_item *ptr;
6569         struct device_record *rec;
6570         int ret = 0;
6571
6572         ptr = btrfs_item_ptr(eb,
6573                 slot, struct btrfs_dev_item);
6574
6575         rec = malloc(sizeof(*rec));
6576         if (!rec) {
6577                 fprintf(stderr, "memory allocation failed\n");
6578                 return -ENOMEM;
6579         }
6580
6581         rec->devid = key->offset;
6582         rec->generation = btrfs_header_generation(eb);
6583
6584         rec->objectid = key->objectid;
6585         rec->type = key->type;
6586         rec->offset = key->offset;
6587
6588         rec->devid = btrfs_device_id(eb, ptr);
6589         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6590         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6591
6592         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6593         if (ret) {
6594                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6595                 free(rec);
6596         }
6597
6598         return ret;
6599 }
6600
6601 struct block_group_record *
6602 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6603                              int slot)
6604 {
6605         struct btrfs_block_group_item *ptr;
6606         struct block_group_record *rec;
6607
6608         rec = calloc(1, sizeof(*rec));
6609         if (!rec) {
6610                 fprintf(stderr, "memory allocation failed\n");
6611                 exit(-1);
6612         }
6613
6614         rec->cache.start = key->objectid;
6615         rec->cache.size = key->offset;
6616
6617         rec->generation = btrfs_header_generation(leaf);
6618
6619         rec->objectid = key->objectid;
6620         rec->type = key->type;
6621         rec->offset = key->offset;
6622
6623         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6624         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6625
6626         INIT_LIST_HEAD(&rec->list);
6627
6628         return rec;
6629 }
6630
6631 static int process_block_group_item(struct block_group_tree *block_group_cache,
6632                                     struct btrfs_key *key,
6633                                     struct extent_buffer *eb, int slot)
6634 {
6635         struct block_group_record *rec;
6636         int ret = 0;
6637
6638         rec = btrfs_new_block_group_record(eb, key, slot);
6639         ret = insert_block_group_record(block_group_cache, rec);
6640         if (ret) {
6641                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6642                         rec->objectid, rec->offset);
6643                 free(rec);
6644         }
6645
6646         return ret;
6647 }
6648
6649 struct device_extent_record *
6650 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6651                                struct btrfs_key *key, int slot)
6652 {
6653         struct device_extent_record *rec;
6654         struct btrfs_dev_extent *ptr;
6655
6656         rec = calloc(1, sizeof(*rec));
6657         if (!rec) {
6658                 fprintf(stderr, "memory allocation failed\n");
6659                 exit(-1);
6660         }
6661
6662         rec->cache.objectid = key->objectid;
6663         rec->cache.start = key->offset;
6664
6665         rec->generation = btrfs_header_generation(leaf);
6666
6667         rec->objectid = key->objectid;
6668         rec->type = key->type;
6669         rec->offset = key->offset;
6670
6671         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6672         rec->chunk_objecteid =
6673                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6674         rec->chunk_offset =
6675                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6676         rec->length = btrfs_dev_extent_length(leaf, ptr);
6677         rec->cache.size = rec->length;
6678
6679         INIT_LIST_HEAD(&rec->chunk_list);
6680         INIT_LIST_HEAD(&rec->device_list);
6681
6682         return rec;
6683 }
6684
6685 static int
6686 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6687                            struct btrfs_key *key, struct extent_buffer *eb,
6688                            int slot)
6689 {
6690         struct device_extent_record *rec;
6691         int ret;
6692
6693         rec = btrfs_new_device_extent_record(eb, key, slot);
6694         ret = insert_device_extent_record(dev_extent_cache, rec);
6695         if (ret) {
6696                 fprintf(stderr,
6697                         "Device extent[%llu, %llu, %llu] existed.\n",
6698                         rec->objectid, rec->offset, rec->length);
6699                 free(rec);
6700         }
6701
6702         return ret;
6703 }
6704
6705 static int process_extent_item(struct btrfs_root *root,
6706                                struct cache_tree *extent_cache,
6707                                struct extent_buffer *eb, int slot)
6708 {
6709         struct btrfs_extent_item *ei;
6710         struct btrfs_extent_inline_ref *iref;
6711         struct btrfs_extent_data_ref *dref;
6712         struct btrfs_shared_data_ref *sref;
6713         struct btrfs_key key;
6714         struct extent_record tmpl;
6715         unsigned long end;
6716         unsigned long ptr;
6717         int ret;
6718         int type;
6719         u32 item_size = btrfs_item_size_nr(eb, slot);
6720         u64 refs = 0;
6721         u64 offset;
6722         u64 num_bytes;
6723         int metadata = 0;
6724
6725         btrfs_item_key_to_cpu(eb, &key, slot);
6726
6727         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6728                 metadata = 1;
6729                 num_bytes = root->nodesize;
6730         } else {
6731                 num_bytes = key.offset;
6732         }
6733
6734         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6735                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6736                       key.objectid, root->sectorsize);
6737                 return -EIO;
6738         }
6739         if (item_size < sizeof(*ei)) {
6740 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6741                 struct btrfs_extent_item_v0 *ei0;
6742                 BUG_ON(item_size != sizeof(*ei0));
6743                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6744                 refs = btrfs_extent_refs_v0(eb, ei0);
6745 #else
6746                 BUG();
6747 #endif
6748                 memset(&tmpl, 0, sizeof(tmpl));
6749                 tmpl.start = key.objectid;
6750                 tmpl.nr = num_bytes;
6751                 tmpl.extent_item_refs = refs;
6752                 tmpl.metadata = metadata;
6753                 tmpl.found_rec = 1;
6754                 tmpl.max_size = num_bytes;
6755
6756                 return add_extent_rec(extent_cache, &tmpl);
6757         }
6758
6759         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6760         refs = btrfs_extent_refs(eb, ei);
6761         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6762                 metadata = 1;
6763         else
6764                 metadata = 0;
6765         if (metadata && num_bytes != root->nodesize) {
6766                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6767                       num_bytes, root->nodesize);
6768                 return -EIO;
6769         }
6770         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6771                 error("ignore invalid data extent, length %llu is not aligned to %u",
6772                       num_bytes, root->sectorsize);
6773                 return -EIO;
6774         }
6775
6776         memset(&tmpl, 0, sizeof(tmpl));
6777         tmpl.start = key.objectid;
6778         tmpl.nr = num_bytes;
6779         tmpl.extent_item_refs = refs;
6780         tmpl.metadata = metadata;
6781         tmpl.found_rec = 1;
6782         tmpl.max_size = num_bytes;
6783         add_extent_rec(extent_cache, &tmpl);
6784
6785         ptr = (unsigned long)(ei + 1);
6786         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6787             key.type == BTRFS_EXTENT_ITEM_KEY)
6788                 ptr += sizeof(struct btrfs_tree_block_info);
6789
6790         end = (unsigned long)ei + item_size;
6791         while (ptr < end) {
6792                 iref = (struct btrfs_extent_inline_ref *)ptr;
6793                 type = btrfs_extent_inline_ref_type(eb, iref);
6794                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6795                 switch (type) {
6796                 case BTRFS_TREE_BLOCK_REF_KEY:
6797                         ret = add_tree_backref(extent_cache, key.objectid,
6798                                         0, offset, 0);
6799                         if (ret < 0)
6800                                 error("add_tree_backref failed: %s",
6801                                       strerror(-ret));
6802                         break;
6803                 case BTRFS_SHARED_BLOCK_REF_KEY:
6804                         ret = add_tree_backref(extent_cache, key.objectid,
6805                                         offset, 0, 0);
6806                         if (ret < 0)
6807                                 error("add_tree_backref failed: %s",
6808                                       strerror(-ret));
6809                         break;
6810                 case BTRFS_EXTENT_DATA_REF_KEY:
6811                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6812                         add_data_backref(extent_cache, key.objectid, 0,
6813                                         btrfs_extent_data_ref_root(eb, dref),
6814                                         btrfs_extent_data_ref_objectid(eb,
6815                                                                        dref),
6816                                         btrfs_extent_data_ref_offset(eb, dref),
6817                                         btrfs_extent_data_ref_count(eb, dref),
6818                                         0, num_bytes);
6819                         break;
6820                 case BTRFS_SHARED_DATA_REF_KEY:
6821                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6822                         add_data_backref(extent_cache, key.objectid, offset,
6823                                         0, 0, 0,
6824                                         btrfs_shared_data_ref_count(eb, sref),
6825                                         0, num_bytes);
6826                         break;
6827                 default:
6828                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6829                                 key.objectid, key.type, num_bytes);
6830                         goto out;
6831                 }
6832                 ptr += btrfs_extent_inline_ref_size(type);
6833         }
6834         WARN_ON(ptr > end);
6835 out:
6836         return 0;
6837 }
6838
6839 static int check_cache_range(struct btrfs_root *root,
6840                              struct btrfs_block_group_cache *cache,
6841                              u64 offset, u64 bytes)
6842 {
6843         struct btrfs_free_space *entry;
6844         u64 *logical;
6845         u64 bytenr;
6846         int stripe_len;
6847         int i, nr, ret;
6848
6849         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6850                 bytenr = btrfs_sb_offset(i);
6851                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6852                                        cache->key.objectid, bytenr, 0,
6853                                        &logical, &nr, &stripe_len);
6854                 if (ret)
6855                         return ret;
6856
6857                 while (nr--) {
6858                         if (logical[nr] + stripe_len <= offset)
6859                                 continue;
6860                         if (offset + bytes <= logical[nr])
6861                                 continue;
6862                         if (logical[nr] == offset) {
6863                                 if (stripe_len >= bytes) {
6864                                         free(logical);
6865                                         return 0;
6866                                 }
6867                                 bytes -= stripe_len;
6868                                 offset += stripe_len;
6869                         } else if (logical[nr] < offset) {
6870                                 if (logical[nr] + stripe_len >=
6871                                     offset + bytes) {
6872                                         free(logical);
6873                                         return 0;
6874                                 }
6875                                 bytes = (offset + bytes) -
6876                                         (logical[nr] + stripe_len);
6877                                 offset = logical[nr] + stripe_len;
6878                         } else {
6879                                 /*
6880                                  * Could be tricky, the super may land in the
6881                                  * middle of the area we're checking.  First
6882                                  * check the easiest case, it's at the end.
6883                                  */
6884                                 if (logical[nr] + stripe_len >=
6885                                     bytes + offset) {
6886                                         bytes = logical[nr] - offset;
6887                                         continue;
6888                                 }
6889
6890                                 /* Check the left side */
6891                                 ret = check_cache_range(root, cache,
6892                                                         offset,
6893                                                         logical[nr] - offset);
6894                                 if (ret) {
6895                                         free(logical);
6896                                         return ret;
6897                                 }
6898
6899                                 /* Now we continue with the right side */
6900                                 bytes = (offset + bytes) -
6901                                         (logical[nr] + stripe_len);
6902                                 offset = logical[nr] + stripe_len;
6903                         }
6904                 }
6905
6906                 free(logical);
6907         }
6908
6909         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6910         if (!entry) {
6911                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6912                         offset, offset+bytes);
6913                 return -EINVAL;
6914         }
6915
6916         if (entry->offset != offset) {
6917                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6918                         entry->offset);
6919                 return -EINVAL;
6920         }
6921
6922         if (entry->bytes != bytes) {
6923                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6924                         bytes, entry->bytes, offset);
6925                 return -EINVAL;
6926         }
6927
6928         unlink_free_space(cache->free_space_ctl, entry);
6929         free(entry);
6930         return 0;
6931 }
6932
6933 static int verify_space_cache(struct btrfs_root *root,
6934                               struct btrfs_block_group_cache *cache)
6935 {
6936         struct btrfs_path path;
6937         struct extent_buffer *leaf;
6938         struct btrfs_key key;
6939         u64 last;
6940         int ret = 0;
6941
6942         root = root->fs_info->extent_root;
6943
6944         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6945
6946         btrfs_init_path(&path);
6947         key.objectid = last;
6948         key.offset = 0;
6949         key.type = BTRFS_EXTENT_ITEM_KEY;
6950         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6951         if (ret < 0)
6952                 goto out;
6953         ret = 0;
6954         while (1) {
6955                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6956                         ret = btrfs_next_leaf(root, &path);
6957                         if (ret < 0)
6958                                 goto out;
6959                         if (ret > 0) {
6960                                 ret = 0;
6961                                 break;
6962                         }
6963                 }
6964                 leaf = path.nodes[0];
6965                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6966                 if (key.objectid >= cache->key.offset + cache->key.objectid)
6967                         break;
6968                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6969                     key.type != BTRFS_METADATA_ITEM_KEY) {
6970                         path.slots[0]++;
6971                         continue;
6972                 }
6973
6974                 if (last == key.objectid) {
6975                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
6976                                 last = key.objectid + key.offset;
6977                         else
6978                                 last = key.objectid + root->nodesize;
6979                         path.slots[0]++;
6980                         continue;
6981                 }
6982
6983                 ret = check_cache_range(root, cache, last,
6984                                         key.objectid - last);
6985                 if (ret)
6986                         break;
6987                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6988                         last = key.objectid + key.offset;
6989                 else
6990                         last = key.objectid + root->nodesize;
6991                 path.slots[0]++;
6992         }
6993
6994         if (last < cache->key.objectid + cache->key.offset)
6995                 ret = check_cache_range(root, cache, last,
6996                                         cache->key.objectid +
6997                                         cache->key.offset - last);
6998
6999 out:
7000         btrfs_release_path(&path);
7001
7002         if (!ret &&
7003             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7004                 fprintf(stderr, "There are still entries left in the space "
7005                         "cache\n");
7006                 ret = -EINVAL;
7007         }
7008
7009         return ret;
7010 }
7011
7012 static int check_space_cache(struct btrfs_root *root)
7013 {
7014         struct btrfs_block_group_cache *cache;
7015         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7016         int ret;
7017         int error = 0;
7018
7019         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7020             btrfs_super_generation(root->fs_info->super_copy) !=
7021             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7022                 printf("cache and super generation don't match, space cache "
7023                        "will be invalidated\n");
7024                 return 0;
7025         }
7026
7027         if (ctx.progress_enabled) {
7028                 ctx.tp = TASK_FREE_SPACE;
7029                 task_start(ctx.info);
7030         }
7031
7032         while (1) {
7033                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7034                 if (!cache)
7035                         break;
7036
7037                 start = cache->key.objectid + cache->key.offset;
7038                 if (!cache->free_space_ctl) {
7039                         if (btrfs_init_free_space_ctl(cache,
7040                                                       root->sectorsize)) {
7041                                 ret = -ENOMEM;
7042                                 break;
7043                         }
7044                 } else {
7045                         btrfs_remove_free_space_cache(cache);
7046                 }
7047
7048                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7049                         ret = exclude_super_stripes(root, cache);
7050                         if (ret) {
7051                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7052                                         strerror(-ret));
7053                                 error++;
7054                                 continue;
7055                         }
7056                         ret = load_free_space_tree(root->fs_info, cache);
7057                         free_excluded_extents(root, cache);
7058                         if (ret < 0) {
7059                                 fprintf(stderr, "could not load free space tree: %s\n",
7060                                         strerror(-ret));
7061                                 error++;
7062                                 continue;
7063                         }
7064                         error += ret;
7065                 } else {
7066                         ret = load_free_space_cache(root->fs_info, cache);
7067                         if (!ret)
7068                                 continue;
7069                 }
7070
7071                 ret = verify_space_cache(root, cache);
7072                 if (ret) {
7073                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7074                                 cache->key.objectid);
7075                         error++;
7076                 }
7077         }
7078
7079         task_stop(ctx.info);
7080
7081         return error ? -EINVAL : 0;
7082 }
7083
7084 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7085                         u64 num_bytes, unsigned long leaf_offset,
7086                         struct extent_buffer *eb) {
7087
7088         u64 offset = 0;
7089         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7090         char *data;
7091         unsigned long csum_offset;
7092         u32 csum;
7093         u32 csum_expected;
7094         u64 read_len;
7095         u64 data_checked = 0;
7096         u64 tmp;
7097         int ret = 0;
7098         int mirror;
7099         int num_copies;
7100
7101         if (num_bytes % root->sectorsize)
7102                 return -EINVAL;
7103
7104         data = malloc(num_bytes);
7105         if (!data)
7106                 return -ENOMEM;
7107
7108         while (offset < num_bytes) {
7109                 mirror = 0;
7110 again:
7111                 read_len = num_bytes - offset;
7112                 /* read as much space once a time */
7113                 ret = read_extent_data(root, data + offset,
7114                                 bytenr + offset, &read_len, mirror);
7115                 if (ret)
7116                         goto out;
7117                 data_checked = 0;
7118                 /* verify every 4k data's checksum */
7119                 while (data_checked < read_len) {
7120                         csum = ~(u32)0;
7121                         tmp = offset + data_checked;
7122
7123                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
7124                                                csum, root->sectorsize);
7125                         btrfs_csum_final(csum, (u8 *)&csum);
7126
7127                         csum_offset = leaf_offset +
7128                                  tmp / root->sectorsize * csum_size;
7129                         read_extent_buffer(eb, (char *)&csum_expected,
7130                                            csum_offset, csum_size);
7131                         /* try another mirror */
7132                         if (csum != csum_expected) {
7133                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7134                                                 mirror, bytenr + tmp,
7135                                                 csum, csum_expected);
7136                                 num_copies = btrfs_num_copies(
7137                                                 &root->fs_info->mapping_tree,
7138                                                 bytenr, num_bytes);
7139                                 if (mirror < num_copies - 1) {
7140                                         mirror += 1;
7141                                         goto again;
7142                                 }
7143                         }
7144                         data_checked += root->sectorsize;
7145                 }
7146                 offset += read_len;
7147         }
7148 out:
7149         free(data);
7150         return ret;
7151 }
7152
7153 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7154                                u64 num_bytes)
7155 {
7156         struct btrfs_path path;
7157         struct extent_buffer *leaf;
7158         struct btrfs_key key;
7159         int ret;
7160
7161         btrfs_init_path(&path);
7162         key.objectid = bytenr;
7163         key.type = BTRFS_EXTENT_ITEM_KEY;
7164         key.offset = (u64)-1;
7165
7166 again:
7167         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7168                                 0, 0);
7169         if (ret < 0) {
7170                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7171                 btrfs_release_path(&path);
7172                 return ret;
7173         } else if (ret) {
7174                 if (path.slots[0] > 0) {
7175                         path.slots[0]--;
7176                 } else {
7177                         ret = btrfs_prev_leaf(root, &path);
7178                         if (ret < 0) {
7179                                 goto out;
7180                         } else if (ret > 0) {
7181                                 ret = 0;
7182                                 goto out;
7183                         }
7184                 }
7185         }
7186
7187         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7188
7189         /*
7190          * Block group items come before extent items if they have the same
7191          * bytenr, so walk back one more just in case.  Dear future traveller,
7192          * first congrats on mastering time travel.  Now if it's not too much
7193          * trouble could you go back to 2006 and tell Chris to make the
7194          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7195          * EXTENT_ITEM_KEY please?
7196          */
7197         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7198                 if (path.slots[0] > 0) {
7199                         path.slots[0]--;
7200                 } else {
7201                         ret = btrfs_prev_leaf(root, &path);
7202                         if (ret < 0) {
7203                                 goto out;
7204                         } else if (ret > 0) {
7205                                 ret = 0;
7206                                 goto out;
7207                         }
7208                 }
7209                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7210         }
7211
7212         while (num_bytes) {
7213                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7214                         ret = btrfs_next_leaf(root, &path);
7215                         if (ret < 0) {
7216                                 fprintf(stderr, "Error going to next leaf "
7217                                         "%d\n", ret);
7218                                 btrfs_release_path(&path);
7219                                 return ret;
7220                         } else if (ret) {
7221                                 break;
7222                         }
7223                 }
7224                 leaf = path.nodes[0];
7225                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7226                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7227                         path.slots[0]++;
7228                         continue;
7229                 }
7230                 if (key.objectid + key.offset < bytenr) {
7231                         path.slots[0]++;
7232                         continue;
7233                 }
7234                 if (key.objectid > bytenr + num_bytes)
7235                         break;
7236
7237                 if (key.objectid == bytenr) {
7238                         if (key.offset >= num_bytes) {
7239                                 num_bytes = 0;
7240                                 break;
7241                         }
7242                         num_bytes -= key.offset;
7243                         bytenr += key.offset;
7244                 } else if (key.objectid < bytenr) {
7245                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7246                                 num_bytes = 0;
7247                                 break;
7248                         }
7249                         num_bytes = (bytenr + num_bytes) -
7250                                 (key.objectid + key.offset);
7251                         bytenr = key.objectid + key.offset;
7252                 } else {
7253                         if (key.objectid + key.offset < bytenr + num_bytes) {
7254                                 u64 new_start = key.objectid + key.offset;
7255                                 u64 new_bytes = bytenr + num_bytes - new_start;
7256
7257                                 /*
7258                                  * Weird case, the extent is in the middle of
7259                                  * our range, we'll have to search one side
7260                                  * and then the other.  Not sure if this happens
7261                                  * in real life, but no harm in coding it up
7262                                  * anyway just in case.
7263                                  */
7264                                 btrfs_release_path(&path);
7265                                 ret = check_extent_exists(root, new_start,
7266                                                           new_bytes);
7267                                 if (ret) {
7268                                         fprintf(stderr, "Right section didn't "
7269                                                 "have a record\n");
7270                                         break;
7271                                 }
7272                                 num_bytes = key.objectid - bytenr;
7273                                 goto again;
7274                         }
7275                         num_bytes = key.objectid - bytenr;
7276                 }
7277                 path.slots[0]++;
7278         }
7279         ret = 0;
7280
7281 out:
7282         if (num_bytes && !ret) {
7283                 fprintf(stderr, "There are no extents for csum range "
7284                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7285                 ret = 1;
7286         }
7287
7288         btrfs_release_path(&path);
7289         return ret;
7290 }
7291
7292 static int check_csums(struct btrfs_root *root)
7293 {
7294         struct btrfs_path path;
7295         struct extent_buffer *leaf;
7296         struct btrfs_key key;
7297         u64 offset = 0, num_bytes = 0;
7298         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7299         int errors = 0;
7300         int ret;
7301         u64 data_len;
7302         unsigned long leaf_offset;
7303
7304         root = root->fs_info->csum_root;
7305         if (!extent_buffer_uptodate(root->node)) {
7306                 fprintf(stderr, "No valid csum tree found\n");
7307                 return -ENOENT;
7308         }
7309
7310         btrfs_init_path(&path);
7311         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7312         key.type = BTRFS_EXTENT_CSUM_KEY;
7313         key.offset = 0;
7314         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7315         if (ret < 0) {
7316                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7317                 btrfs_release_path(&path);
7318                 return ret;
7319         }
7320
7321         if (ret > 0 && path.slots[0])
7322                 path.slots[0]--;
7323         ret = 0;
7324
7325         while (1) {
7326                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7327                         ret = btrfs_next_leaf(root, &path);
7328                         if (ret < 0) {
7329                                 fprintf(stderr, "Error going to next leaf "
7330                                         "%d\n", ret);
7331                                 break;
7332                         }
7333                         if (ret)
7334                                 break;
7335                 }
7336                 leaf = path.nodes[0];
7337
7338                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7339                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7340                         path.slots[0]++;
7341                         continue;
7342                 }
7343
7344                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7345                               csum_size) * root->sectorsize;
7346                 if (!check_data_csum)
7347                         goto skip_csum_check;
7348                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7349                 ret = check_extent_csums(root, key.offset, data_len,
7350                                          leaf_offset, leaf);
7351                 if (ret)
7352                         break;
7353 skip_csum_check:
7354                 if (!num_bytes) {
7355                         offset = key.offset;
7356                 } else if (key.offset != offset + num_bytes) {
7357                         ret = check_extent_exists(root, offset, num_bytes);
7358                         if (ret) {
7359                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7360                                         "there is no extent record\n",
7361                                         offset, offset+num_bytes);
7362                                 errors++;
7363                         }
7364                         offset = key.offset;
7365                         num_bytes = 0;
7366                 }
7367                 num_bytes += data_len;
7368                 path.slots[0]++;
7369         }
7370
7371         btrfs_release_path(&path);
7372         return errors;
7373 }
7374
7375 static int is_dropped_key(struct btrfs_key *key,
7376                           struct btrfs_key *drop_key) {
7377         if (key->objectid < drop_key->objectid)
7378                 return 1;
7379         else if (key->objectid == drop_key->objectid) {
7380                 if (key->type < drop_key->type)
7381                         return 1;
7382                 else if (key->type == drop_key->type) {
7383                         if (key->offset < drop_key->offset)
7384                                 return 1;
7385                 }
7386         }
7387         return 0;
7388 }
7389
7390 /*
7391  * Here are the rules for FULL_BACKREF.
7392  *
7393  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7394  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7395  *      FULL_BACKREF set.
7396  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7397  *    if it happened after the relocation occurred since we'll have dropped the
7398  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7399  *    have no real way to know for sure.
7400  *
7401  * We process the blocks one root at a time, and we start from the lowest root
7402  * objectid and go to the highest.  So we can just lookup the owner backref for
7403  * the record and if we don't find it then we know it doesn't exist and we have
7404  * a FULL BACKREF.
7405  *
7406  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7407  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7408  * be set or not and then we can check later once we've gathered all the refs.
7409  */
7410 static int calc_extent_flag(struct btrfs_root *root,
7411                            struct cache_tree *extent_cache,
7412                            struct extent_buffer *buf,
7413                            struct root_item_record *ri,
7414                            u64 *flags)
7415 {
7416         struct extent_record *rec;
7417         struct cache_extent *cache;
7418         struct tree_backref *tback;
7419         u64 owner = 0;
7420
7421         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7422         /* we have added this extent before */
7423         if (!cache)
7424                 return -ENOENT;
7425
7426         rec = container_of(cache, struct extent_record, cache);
7427
7428         /*
7429          * Except file/reloc tree, we can not have
7430          * FULL BACKREF MODE
7431          */
7432         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7433                 goto normal;
7434         /*
7435          * root node
7436          */
7437         if (buf->start == ri->bytenr)
7438                 goto normal;
7439
7440         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7441                 goto full_backref;
7442
7443         owner = btrfs_header_owner(buf);
7444         if (owner == ri->objectid)
7445                 goto normal;
7446
7447         tback = find_tree_backref(rec, 0, owner);
7448         if (!tback)
7449                 goto full_backref;
7450 normal:
7451         *flags = 0;
7452         if (rec->flag_block_full_backref != FLAG_UNSET &&
7453             rec->flag_block_full_backref != 0)
7454                 rec->bad_full_backref = 1;
7455         return 0;
7456 full_backref:
7457         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7458         if (rec->flag_block_full_backref != FLAG_UNSET &&
7459             rec->flag_block_full_backref != 1)
7460                 rec->bad_full_backref = 1;
7461         return 0;
7462 }
7463
7464 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7465 {
7466         fprintf(stderr, "Invalid key type(");
7467         print_key_type(stderr, 0, key_type);
7468         fprintf(stderr, ") found in root(");
7469         print_objectid(stderr, rootid, 0);
7470         fprintf(stderr, ")\n");
7471 }
7472
7473 /*
7474  * Check if the key is valid with its extent buffer.
7475  *
7476  * This is a early check in case invalid key exists in a extent buffer
7477  * This is not comprehensive yet, but should prevent wrong key/item passed
7478  * further
7479  */
7480 static int check_type_with_root(u64 rootid, u8 key_type)
7481 {
7482         switch (key_type) {
7483         /* Only valid in chunk tree */
7484         case BTRFS_DEV_ITEM_KEY:
7485         case BTRFS_CHUNK_ITEM_KEY:
7486                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7487                         goto err;
7488                 break;
7489         /* valid in csum and log tree */
7490         case BTRFS_CSUM_TREE_OBJECTID:
7491                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7492                       is_fstree(rootid)))
7493                         goto err;
7494                 break;
7495         case BTRFS_EXTENT_ITEM_KEY:
7496         case BTRFS_METADATA_ITEM_KEY:
7497         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7498                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7499                         goto err;
7500                 break;
7501         case BTRFS_ROOT_ITEM_KEY:
7502                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7503                         goto err;
7504                 break;
7505         case BTRFS_DEV_EXTENT_KEY:
7506                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7507                         goto err;
7508                 break;
7509         }
7510         return 0;
7511 err:
7512         report_mismatch_key_root(key_type, rootid);
7513         return -EINVAL;
7514 }
7515
7516 static int run_next_block(struct btrfs_root *root,
7517                           struct block_info *bits,
7518                           int bits_nr,
7519                           u64 *last,
7520                           struct cache_tree *pending,
7521                           struct cache_tree *seen,
7522                           struct cache_tree *reada,
7523                           struct cache_tree *nodes,
7524                           struct cache_tree *extent_cache,
7525                           struct cache_tree *chunk_cache,
7526                           struct rb_root *dev_cache,
7527                           struct block_group_tree *block_group_cache,
7528                           struct device_extent_tree *dev_extent_cache,
7529                           struct root_item_record *ri)
7530 {
7531         struct extent_buffer *buf;
7532         struct extent_record *rec = NULL;
7533         u64 bytenr;
7534         u32 size;
7535         u64 parent;
7536         u64 owner;
7537         u64 flags;
7538         u64 ptr;
7539         u64 gen = 0;
7540         int ret = 0;
7541         int i;
7542         int nritems;
7543         struct btrfs_key key;
7544         struct cache_extent *cache;
7545         int reada_bits;
7546
7547         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7548                                     bits_nr, &reada_bits);
7549         if (nritems == 0)
7550                 return 1;
7551
7552         if (!reada_bits) {
7553                 for(i = 0; i < nritems; i++) {
7554                         ret = add_cache_extent(reada, bits[i].start,
7555                                                bits[i].size);
7556                         if (ret == -EEXIST)
7557                                 continue;
7558
7559                         /* fixme, get the parent transid */
7560                         readahead_tree_block(root, bits[i].start,
7561                                              bits[i].size, 0);
7562                 }
7563         }
7564         *last = bits[0].start;
7565         bytenr = bits[0].start;
7566         size = bits[0].size;
7567
7568         cache = lookup_cache_extent(pending, bytenr, size);
7569         if (cache) {
7570                 remove_cache_extent(pending, cache);
7571                 free(cache);
7572         }
7573         cache = lookup_cache_extent(reada, bytenr, size);
7574         if (cache) {
7575                 remove_cache_extent(reada, cache);
7576                 free(cache);
7577         }
7578         cache = lookup_cache_extent(nodes, bytenr, size);
7579         if (cache) {
7580                 remove_cache_extent(nodes, cache);
7581                 free(cache);
7582         }
7583         cache = lookup_cache_extent(extent_cache, bytenr, size);
7584         if (cache) {
7585                 rec = container_of(cache, struct extent_record, cache);
7586                 gen = rec->parent_generation;
7587         }
7588
7589         /* fixme, get the real parent transid */
7590         buf = read_tree_block(root, bytenr, size, gen);
7591         if (!extent_buffer_uptodate(buf)) {
7592                 record_bad_block_io(root->fs_info,
7593                                     extent_cache, bytenr, size);
7594                 goto out;
7595         }
7596
7597         nritems = btrfs_header_nritems(buf);
7598
7599         flags = 0;
7600         if (!init_extent_tree) {
7601                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7602                                        btrfs_header_level(buf), 1, NULL,
7603                                        &flags);
7604                 if (ret < 0) {
7605                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7606                         if (ret < 0) {
7607                                 fprintf(stderr, "Couldn't calc extent flags\n");
7608                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7609                         }
7610                 }
7611         } else {
7612                 flags = 0;
7613                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7614                 if (ret < 0) {
7615                         fprintf(stderr, "Couldn't calc extent flags\n");
7616                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7617                 }
7618         }
7619
7620         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7621                 if (ri != NULL &&
7622                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7623                     ri->objectid == btrfs_header_owner(buf)) {
7624                         /*
7625                          * Ok we got to this block from it's original owner and
7626                          * we have FULL_BACKREF set.  Relocation can leave
7627                          * converted blocks over so this is altogether possible,
7628                          * however it's not possible if the generation > the
7629                          * last snapshot, so check for this case.
7630                          */
7631                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7632                             btrfs_header_generation(buf) > ri->last_snapshot) {
7633                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7634                                 rec->bad_full_backref = 1;
7635                         }
7636                 }
7637         } else {
7638                 if (ri != NULL &&
7639                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7640                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7641                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7642                         rec->bad_full_backref = 1;
7643                 }
7644         }
7645
7646         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7647                 rec->flag_block_full_backref = 1;
7648                 parent = bytenr;
7649                 owner = 0;
7650         } else {
7651                 rec->flag_block_full_backref = 0;
7652                 parent = 0;
7653                 owner = btrfs_header_owner(buf);
7654         }
7655
7656         ret = check_block(root, extent_cache, buf, flags);
7657         if (ret)
7658                 goto out;
7659
7660         if (btrfs_is_leaf(buf)) {
7661                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7662                 for (i = 0; i < nritems; i++) {
7663                         struct btrfs_file_extent_item *fi;
7664                         btrfs_item_key_to_cpu(buf, &key, i);
7665                         /*
7666                          * Check key type against the leaf owner.
7667                          * Could filter quite a lot of early error if
7668                          * owner is correct
7669                          */
7670                         if (check_type_with_root(btrfs_header_owner(buf),
7671                                                  key.type)) {
7672                                 fprintf(stderr, "ignoring invalid key\n");
7673                                 continue;
7674                         }
7675                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7676                                 process_extent_item(root, extent_cache, buf,
7677                                                     i);
7678                                 continue;
7679                         }
7680                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7681                                 process_extent_item(root, extent_cache, buf,
7682                                                     i);
7683                                 continue;
7684                         }
7685                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7686                                 total_csum_bytes +=
7687                                         btrfs_item_size_nr(buf, i);
7688                                 continue;
7689                         }
7690                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7691                                 process_chunk_item(chunk_cache, &key, buf, i);
7692                                 continue;
7693                         }
7694                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7695                                 process_device_item(dev_cache, &key, buf, i);
7696                                 continue;
7697                         }
7698                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7699                                 process_block_group_item(block_group_cache,
7700                                         &key, buf, i);
7701                                 continue;
7702                         }
7703                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7704                                 process_device_extent_item(dev_extent_cache,
7705                                         &key, buf, i);
7706                                 continue;
7707
7708                         }
7709                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7710 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7711                                 process_extent_ref_v0(extent_cache, buf, i);
7712 #else
7713                                 BUG();
7714 #endif
7715                                 continue;
7716                         }
7717
7718                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7719                                 ret = add_tree_backref(extent_cache,
7720                                                 key.objectid, 0, key.offset, 0);
7721                                 if (ret < 0)
7722                                         error("add_tree_backref failed: %s",
7723                                               strerror(-ret));
7724                                 continue;
7725                         }
7726                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7727                                 ret = add_tree_backref(extent_cache,
7728                                                 key.objectid, key.offset, 0, 0);
7729                                 if (ret < 0)
7730                                         error("add_tree_backref failed: %s",
7731                                               strerror(-ret));
7732                                 continue;
7733                         }
7734                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7735                                 struct btrfs_extent_data_ref *ref;
7736                                 ref = btrfs_item_ptr(buf, i,
7737                                                 struct btrfs_extent_data_ref);
7738                                 add_data_backref(extent_cache,
7739                                         key.objectid, 0,
7740                                         btrfs_extent_data_ref_root(buf, ref),
7741                                         btrfs_extent_data_ref_objectid(buf,
7742                                                                        ref),
7743                                         btrfs_extent_data_ref_offset(buf, ref),
7744                                         btrfs_extent_data_ref_count(buf, ref),
7745                                         0, root->sectorsize);
7746                                 continue;
7747                         }
7748                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7749                                 struct btrfs_shared_data_ref *ref;
7750                                 ref = btrfs_item_ptr(buf, i,
7751                                                 struct btrfs_shared_data_ref);
7752                                 add_data_backref(extent_cache,
7753                                         key.objectid, key.offset, 0, 0, 0,
7754                                         btrfs_shared_data_ref_count(buf, ref),
7755                                         0, root->sectorsize);
7756                                 continue;
7757                         }
7758                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7759                                 struct bad_item *bad;
7760
7761                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7762                                         continue;
7763                                 if (!owner)
7764                                         continue;
7765                                 bad = malloc(sizeof(struct bad_item));
7766                                 if (!bad)
7767                                         continue;
7768                                 INIT_LIST_HEAD(&bad->list);
7769                                 memcpy(&bad->key, &key,
7770                                        sizeof(struct btrfs_key));
7771                                 bad->root_id = owner;
7772                                 list_add_tail(&bad->list, &delete_items);
7773                                 continue;
7774                         }
7775                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7776                                 continue;
7777                         fi = btrfs_item_ptr(buf, i,
7778                                             struct btrfs_file_extent_item);
7779                         if (btrfs_file_extent_type(buf, fi) ==
7780                             BTRFS_FILE_EXTENT_INLINE)
7781                                 continue;
7782                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7783                                 continue;
7784
7785                         data_bytes_allocated +=
7786                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7787                         if (data_bytes_allocated < root->sectorsize) {
7788                                 abort();
7789                         }
7790                         data_bytes_referenced +=
7791                                 btrfs_file_extent_num_bytes(buf, fi);
7792                         add_data_backref(extent_cache,
7793                                 btrfs_file_extent_disk_bytenr(buf, fi),
7794                                 parent, owner, key.objectid, key.offset -
7795                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7796                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7797                 }
7798         } else {
7799                 int level;
7800                 struct btrfs_key first_key;
7801
7802                 first_key.objectid = 0;
7803
7804                 if (nritems > 0)
7805                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7806                 level = btrfs_header_level(buf);
7807                 for (i = 0; i < nritems; i++) {
7808                         struct extent_record tmpl;
7809
7810                         ptr = btrfs_node_blockptr(buf, i);
7811                         size = root->nodesize;
7812                         btrfs_node_key_to_cpu(buf, &key, i);
7813                         if (ri != NULL) {
7814                                 if ((level == ri->drop_level)
7815                                     && is_dropped_key(&key, &ri->drop_key)) {
7816                                         continue;
7817                                 }
7818                         }
7819
7820                         memset(&tmpl, 0, sizeof(tmpl));
7821                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7822                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7823                         tmpl.start = ptr;
7824                         tmpl.nr = size;
7825                         tmpl.refs = 1;
7826                         tmpl.metadata = 1;
7827                         tmpl.max_size = size;
7828                         ret = add_extent_rec(extent_cache, &tmpl);
7829                         if (ret < 0)
7830                                 goto out;
7831
7832                         ret = add_tree_backref(extent_cache, ptr, parent,
7833                                         owner, 1);
7834                         if (ret < 0) {
7835                                 error("add_tree_backref failed: %s",
7836                                       strerror(-ret));
7837                                 continue;
7838                         }
7839
7840                         if (level > 1) {
7841                                 add_pending(nodes, seen, ptr, size);
7842                         } else {
7843                                 add_pending(pending, seen, ptr, size);
7844                         }
7845                 }
7846                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7847                                       nritems) * sizeof(struct btrfs_key_ptr);
7848         }
7849         total_btree_bytes += buf->len;
7850         if (fs_root_objectid(btrfs_header_owner(buf)))
7851                 total_fs_tree_bytes += buf->len;
7852         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7853                 total_extent_tree_bytes += buf->len;
7854         if (!found_old_backref &&
7855             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7856             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7857             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7858                 found_old_backref = 1;
7859 out:
7860         free_extent_buffer(buf);
7861         return ret;
7862 }
7863
7864 static int add_root_to_pending(struct extent_buffer *buf,
7865                                struct cache_tree *extent_cache,
7866                                struct cache_tree *pending,
7867                                struct cache_tree *seen,
7868                                struct cache_tree *nodes,
7869                                u64 objectid)
7870 {
7871         struct extent_record tmpl;
7872         int ret;
7873
7874         if (btrfs_header_level(buf) > 0)
7875                 add_pending(nodes, seen, buf->start, buf->len);
7876         else
7877                 add_pending(pending, seen, buf->start, buf->len);
7878
7879         memset(&tmpl, 0, sizeof(tmpl));
7880         tmpl.start = buf->start;
7881         tmpl.nr = buf->len;
7882         tmpl.is_root = 1;
7883         tmpl.refs = 1;
7884         tmpl.metadata = 1;
7885         tmpl.max_size = buf->len;
7886         add_extent_rec(extent_cache, &tmpl);
7887
7888         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7889             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7890                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7891                                 0, 1);
7892         else
7893                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7894                                 1);
7895         return ret;
7896 }
7897
7898 /* as we fix the tree, we might be deleting blocks that
7899  * we're tracking for repair.  This hook makes sure we
7900  * remove any backrefs for blocks as we are fixing them.
7901  */
7902 static int free_extent_hook(struct btrfs_trans_handle *trans,
7903                             struct btrfs_root *root,
7904                             u64 bytenr, u64 num_bytes, u64 parent,
7905                             u64 root_objectid, u64 owner, u64 offset,
7906                             int refs_to_drop)
7907 {
7908         struct extent_record *rec;
7909         struct cache_extent *cache;
7910         int is_data;
7911         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7912
7913         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7914         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7915         if (!cache)
7916                 return 0;
7917
7918         rec = container_of(cache, struct extent_record, cache);
7919         if (is_data) {
7920                 struct data_backref *back;
7921                 back = find_data_backref(rec, parent, root_objectid, owner,
7922                                          offset, 1, bytenr, num_bytes);
7923                 if (!back)
7924                         goto out;
7925                 if (back->node.found_ref) {
7926                         back->found_ref -= refs_to_drop;
7927                         if (rec->refs)
7928                                 rec->refs -= refs_to_drop;
7929                 }
7930                 if (back->node.found_extent_tree) {
7931                         back->num_refs -= refs_to_drop;
7932                         if (rec->extent_item_refs)
7933                                 rec->extent_item_refs -= refs_to_drop;
7934                 }
7935                 if (back->found_ref == 0)
7936                         back->node.found_ref = 0;
7937                 if (back->num_refs == 0)
7938                         back->node.found_extent_tree = 0;
7939
7940                 if (!back->node.found_extent_tree && back->node.found_ref) {
7941                         list_del(&back->node.list);
7942                         free(back);
7943                 }
7944         } else {
7945                 struct tree_backref *back;
7946                 back = find_tree_backref(rec, parent, root_objectid);
7947                 if (!back)
7948                         goto out;
7949                 if (back->node.found_ref) {
7950                         if (rec->refs)
7951                                 rec->refs--;
7952                         back->node.found_ref = 0;
7953                 }
7954                 if (back->node.found_extent_tree) {
7955                         if (rec->extent_item_refs)
7956                                 rec->extent_item_refs--;
7957                         back->node.found_extent_tree = 0;
7958                 }
7959                 if (!back->node.found_extent_tree && back->node.found_ref) {
7960                         list_del(&back->node.list);
7961                         free(back);
7962                 }
7963         }
7964         maybe_free_extent_rec(extent_cache, rec);
7965 out:
7966         return 0;
7967 }
7968
7969 static int delete_extent_records(struct btrfs_trans_handle *trans,
7970                                  struct btrfs_root *root,
7971                                  struct btrfs_path *path,
7972                                  u64 bytenr, u64 new_len)
7973 {
7974         struct btrfs_key key;
7975         struct btrfs_key found_key;
7976         struct extent_buffer *leaf;
7977         int ret;
7978         int slot;
7979
7980
7981         key.objectid = bytenr;
7982         key.type = (u8)-1;
7983         key.offset = (u64)-1;
7984
7985         while(1) {
7986                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7987                                         &key, path, 0, 1);
7988                 if (ret < 0)
7989                         break;
7990
7991                 if (ret > 0) {
7992                         ret = 0;
7993                         if (path->slots[0] == 0)
7994                                 break;
7995                         path->slots[0]--;
7996                 }
7997                 ret = 0;
7998
7999                 leaf = path->nodes[0];
8000                 slot = path->slots[0];
8001
8002                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8003                 if (found_key.objectid != bytenr)
8004                         break;
8005
8006                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8007                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8008                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8009                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8010                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8011                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8012                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8013                         btrfs_release_path(path);
8014                         if (found_key.type == 0) {
8015                                 if (found_key.offset == 0)
8016                                         break;
8017                                 key.offset = found_key.offset - 1;
8018                                 key.type = found_key.type;
8019                         }
8020                         key.type = found_key.type - 1;
8021                         key.offset = (u64)-1;
8022                         continue;
8023                 }
8024
8025                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8026                         found_key.objectid, found_key.type, found_key.offset);
8027
8028                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8029                 if (ret)
8030                         break;
8031                 btrfs_release_path(path);
8032
8033                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8034                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8035                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8036                                 found_key.offset : root->nodesize;
8037
8038                         ret = btrfs_update_block_group(trans, root, bytenr,
8039                                                        bytes, 0, 0);
8040                         if (ret)
8041                                 break;
8042                 }
8043         }
8044
8045         btrfs_release_path(path);
8046         return ret;
8047 }
8048
8049 /*
8050  * for a single backref, this will allocate a new extent
8051  * and add the backref to it.
8052  */
8053 static int record_extent(struct btrfs_trans_handle *trans,
8054                          struct btrfs_fs_info *info,
8055                          struct btrfs_path *path,
8056                          struct extent_record *rec,
8057                          struct extent_backref *back,
8058                          int allocated, u64 flags)
8059 {
8060         int ret = 0;
8061         struct btrfs_root *extent_root = info->extent_root;
8062         struct extent_buffer *leaf;
8063         struct btrfs_key ins_key;
8064         struct btrfs_extent_item *ei;
8065         struct data_backref *dback;
8066         struct btrfs_tree_block_info *bi;
8067
8068         if (!back->is_data)
8069                 rec->max_size = max_t(u64, rec->max_size,
8070                                     info->extent_root->nodesize);
8071
8072         if (!allocated) {
8073                 u32 item_size = sizeof(*ei);
8074
8075                 if (!back->is_data)
8076                         item_size += sizeof(*bi);
8077
8078                 ins_key.objectid = rec->start;
8079                 ins_key.offset = rec->max_size;
8080                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8081
8082                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8083                                         &ins_key, item_size);
8084                 if (ret)
8085                         goto fail;
8086
8087                 leaf = path->nodes[0];
8088                 ei = btrfs_item_ptr(leaf, path->slots[0],
8089                                     struct btrfs_extent_item);
8090
8091                 btrfs_set_extent_refs(leaf, ei, 0);
8092                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8093
8094                 if (back->is_data) {
8095                         btrfs_set_extent_flags(leaf, ei,
8096                                                BTRFS_EXTENT_FLAG_DATA);
8097                 } else {
8098                         struct btrfs_disk_key copy_key;;
8099
8100                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8101                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8102                                              sizeof(*bi));
8103
8104                         btrfs_set_disk_key_objectid(&copy_key,
8105                                                     rec->info_objectid);
8106                         btrfs_set_disk_key_type(&copy_key, 0);
8107                         btrfs_set_disk_key_offset(&copy_key, 0);
8108
8109                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8110                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8111
8112                         btrfs_set_extent_flags(leaf, ei,
8113                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8114                 }
8115
8116                 btrfs_mark_buffer_dirty(leaf);
8117                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8118                                                rec->max_size, 1, 0);
8119                 if (ret)
8120                         goto fail;
8121                 btrfs_release_path(path);
8122         }
8123
8124         if (back->is_data) {
8125                 u64 parent;
8126                 int i;
8127
8128                 dback = to_data_backref(back);
8129                 if (back->full_backref)
8130                         parent = dback->parent;
8131                 else
8132                         parent = 0;
8133
8134                 for (i = 0; i < dback->found_ref; i++) {
8135                         /* if parent != 0, we're doing a full backref
8136                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8137                          * just makes the backref allocator create a data
8138                          * backref
8139                          */
8140                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8141                                                    rec->start, rec->max_size,
8142                                                    parent,
8143                                                    dback->root,
8144                                                    parent ?
8145                                                    BTRFS_FIRST_FREE_OBJECTID :
8146                                                    dback->owner,
8147                                                    dback->offset);
8148                         if (ret)
8149                                 break;
8150                 }
8151                 fprintf(stderr, "adding new data backref"
8152                                 " on %llu %s %llu owner %llu"
8153                                 " offset %llu found %d\n",
8154                                 (unsigned long long)rec->start,
8155                                 back->full_backref ?
8156                                 "parent" : "root",
8157                                 back->full_backref ?
8158                                 (unsigned long long)parent :
8159                                 (unsigned long long)dback->root,
8160                                 (unsigned long long)dback->owner,
8161                                 (unsigned long long)dback->offset,
8162                                 dback->found_ref);
8163         } else {
8164                 u64 parent;
8165                 struct tree_backref *tback;
8166
8167                 tback = to_tree_backref(back);
8168                 if (back->full_backref)
8169                         parent = tback->parent;
8170                 else
8171                         parent = 0;
8172
8173                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8174                                            rec->start, rec->max_size,
8175                                            parent, tback->root, 0, 0);
8176                 fprintf(stderr, "adding new tree backref on "
8177                         "start %llu len %llu parent %llu root %llu\n",
8178                         rec->start, rec->max_size, parent, tback->root);
8179         }
8180 fail:
8181         btrfs_release_path(path);
8182         return ret;
8183 }
8184
8185 static struct extent_entry *find_entry(struct list_head *entries,
8186                                        u64 bytenr, u64 bytes)
8187 {
8188         struct extent_entry *entry = NULL;
8189
8190         list_for_each_entry(entry, entries, list) {
8191                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8192                         return entry;
8193         }
8194
8195         return NULL;
8196 }
8197
8198 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8199 {
8200         struct extent_entry *entry, *best = NULL, *prev = NULL;
8201
8202         list_for_each_entry(entry, entries, list) {
8203                 /*
8204                  * If there are as many broken entries as entries then we know
8205                  * not to trust this particular entry.
8206                  */
8207                 if (entry->broken == entry->count)
8208                         continue;
8209
8210                 /*
8211                  * Special case, when there are only two entries and 'best' is
8212                  * the first one
8213                  */
8214                 if (!prev) {
8215                         best = entry;
8216                         prev = entry;
8217                         continue;
8218                 }
8219
8220                 /*
8221                  * If our current entry == best then we can't be sure our best
8222                  * is really the best, so we need to keep searching.
8223                  */
8224                 if (best && best->count == entry->count) {
8225                         prev = entry;
8226                         best = NULL;
8227                         continue;
8228                 }
8229
8230                 /* Prev == entry, not good enough, have to keep searching */
8231                 if (!prev->broken && prev->count == entry->count)
8232                         continue;
8233
8234                 if (!best)
8235                         best = (prev->count > entry->count) ? prev : entry;
8236                 else if (best->count < entry->count)
8237                         best = entry;
8238                 prev = entry;
8239         }
8240
8241         return best;
8242 }
8243
8244 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8245                       struct data_backref *dback, struct extent_entry *entry)
8246 {
8247         struct btrfs_trans_handle *trans;
8248         struct btrfs_root *root;
8249         struct btrfs_file_extent_item *fi;
8250         struct extent_buffer *leaf;
8251         struct btrfs_key key;
8252         u64 bytenr, bytes;
8253         int ret, err;
8254
8255         key.objectid = dback->root;
8256         key.type = BTRFS_ROOT_ITEM_KEY;
8257         key.offset = (u64)-1;
8258         root = btrfs_read_fs_root(info, &key);
8259         if (IS_ERR(root)) {
8260                 fprintf(stderr, "Couldn't find root for our ref\n");
8261                 return -EINVAL;
8262         }
8263
8264         /*
8265          * The backref points to the original offset of the extent if it was
8266          * split, so we need to search down to the offset we have and then walk
8267          * forward until we find the backref we're looking for.
8268          */
8269         key.objectid = dback->owner;
8270         key.type = BTRFS_EXTENT_DATA_KEY;
8271         key.offset = dback->offset;
8272         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8273         if (ret < 0) {
8274                 fprintf(stderr, "Error looking up ref %d\n", ret);
8275                 return ret;
8276         }
8277
8278         while (1) {
8279                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8280                         ret = btrfs_next_leaf(root, path);
8281                         if (ret) {
8282                                 fprintf(stderr, "Couldn't find our ref, next\n");
8283                                 return -EINVAL;
8284                         }
8285                 }
8286                 leaf = path->nodes[0];
8287                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8288                 if (key.objectid != dback->owner ||
8289                     key.type != BTRFS_EXTENT_DATA_KEY) {
8290                         fprintf(stderr, "Couldn't find our ref, search\n");
8291                         return -EINVAL;
8292                 }
8293                 fi = btrfs_item_ptr(leaf, path->slots[0],
8294                                     struct btrfs_file_extent_item);
8295                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8296                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8297
8298                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8299                         break;
8300                 path->slots[0]++;
8301         }
8302
8303         btrfs_release_path(path);
8304
8305         trans = btrfs_start_transaction(root, 1);
8306         if (IS_ERR(trans))
8307                 return PTR_ERR(trans);
8308
8309         /*
8310          * Ok we have the key of the file extent we want to fix, now we can cow
8311          * down to the thing and fix it.
8312          */
8313         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8314         if (ret < 0) {
8315                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8316                         key.objectid, key.type, key.offset, ret);
8317                 goto out;
8318         }
8319         if (ret > 0) {
8320                 fprintf(stderr, "Well that's odd, we just found this key "
8321                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8322                         key.offset);
8323                 ret = -EINVAL;
8324                 goto out;
8325         }
8326         leaf = path->nodes[0];
8327         fi = btrfs_item_ptr(leaf, path->slots[0],
8328                             struct btrfs_file_extent_item);
8329
8330         if (btrfs_file_extent_compression(leaf, fi) &&
8331             dback->disk_bytenr != entry->bytenr) {
8332                 fprintf(stderr, "Ref doesn't match the record start and is "
8333                         "compressed, please take a btrfs-image of this file "
8334                         "system and send it to a btrfs developer so they can "
8335                         "complete this functionality for bytenr %Lu\n",
8336                         dback->disk_bytenr);
8337                 ret = -EINVAL;
8338                 goto out;
8339         }
8340
8341         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8342                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8343         } else if (dback->disk_bytenr > entry->bytenr) {
8344                 u64 off_diff, offset;
8345
8346                 off_diff = dback->disk_bytenr - entry->bytenr;
8347                 offset = btrfs_file_extent_offset(leaf, fi);
8348                 if (dback->disk_bytenr + offset +
8349                     btrfs_file_extent_num_bytes(leaf, fi) >
8350                     entry->bytenr + entry->bytes) {
8351                         fprintf(stderr, "Ref is past the entry end, please "
8352                                 "take a btrfs-image of this file system and "
8353                                 "send it to a btrfs developer, ref %Lu\n",
8354                                 dback->disk_bytenr);
8355                         ret = -EINVAL;
8356                         goto out;
8357                 }
8358                 offset += off_diff;
8359                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8360                 btrfs_set_file_extent_offset(leaf, fi, offset);
8361         } else if (dback->disk_bytenr < entry->bytenr) {
8362                 u64 offset;
8363
8364                 offset = btrfs_file_extent_offset(leaf, fi);
8365                 if (dback->disk_bytenr + offset < entry->bytenr) {
8366                         fprintf(stderr, "Ref is before the entry start, please"
8367                                 " take a btrfs-image of this file system and "
8368                                 "send it to a btrfs developer, ref %Lu\n",
8369                                 dback->disk_bytenr);
8370                         ret = -EINVAL;
8371                         goto out;
8372                 }
8373
8374                 offset += dback->disk_bytenr;
8375                 offset -= entry->bytenr;
8376                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8377                 btrfs_set_file_extent_offset(leaf, fi, offset);
8378         }
8379
8380         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8381
8382         /*
8383          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8384          * only do this if we aren't using compression, otherwise it's a
8385          * trickier case.
8386          */
8387         if (!btrfs_file_extent_compression(leaf, fi))
8388                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8389         else
8390                 printf("ram bytes may be wrong?\n");
8391         btrfs_mark_buffer_dirty(leaf);
8392 out:
8393         err = btrfs_commit_transaction(trans, root);
8394         btrfs_release_path(path);
8395         return ret ? ret : err;
8396 }
8397
8398 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8399                            struct extent_record *rec)
8400 {
8401         struct extent_backref *back;
8402         struct data_backref *dback;
8403         struct extent_entry *entry, *best = NULL;
8404         LIST_HEAD(entries);
8405         int nr_entries = 0;
8406         int broken_entries = 0;
8407         int ret = 0;
8408         short mismatch = 0;
8409
8410         /*
8411          * Metadata is easy and the backrefs should always agree on bytenr and
8412          * size, if not we've got bigger issues.
8413          */
8414         if (rec->metadata)
8415                 return 0;
8416
8417         list_for_each_entry(back, &rec->backrefs, list) {
8418                 if (back->full_backref || !back->is_data)
8419                         continue;
8420
8421                 dback = to_data_backref(back);
8422
8423                 /*
8424                  * We only pay attention to backrefs that we found a real
8425                  * backref for.
8426                  */
8427                 if (dback->found_ref == 0)
8428                         continue;
8429
8430                 /*
8431                  * For now we only catch when the bytes don't match, not the
8432                  * bytenr.  We can easily do this at the same time, but I want
8433                  * to have a fs image to test on before we just add repair
8434                  * functionality willy-nilly so we know we won't screw up the
8435                  * repair.
8436                  */
8437
8438                 entry = find_entry(&entries, dback->disk_bytenr,
8439                                    dback->bytes);
8440                 if (!entry) {
8441                         entry = malloc(sizeof(struct extent_entry));
8442                         if (!entry) {
8443                                 ret = -ENOMEM;
8444                                 goto out;
8445                         }
8446                         memset(entry, 0, sizeof(*entry));
8447                         entry->bytenr = dback->disk_bytenr;
8448                         entry->bytes = dback->bytes;
8449                         list_add_tail(&entry->list, &entries);
8450                         nr_entries++;
8451                 }
8452
8453                 /*
8454                  * If we only have on entry we may think the entries agree when
8455                  * in reality they don't so we have to do some extra checking.
8456                  */
8457                 if (dback->disk_bytenr != rec->start ||
8458                     dback->bytes != rec->nr || back->broken)
8459                         mismatch = 1;
8460
8461                 if (back->broken) {
8462                         entry->broken++;
8463                         broken_entries++;
8464                 }
8465
8466                 entry->count++;
8467         }
8468
8469         /* Yay all the backrefs agree, carry on good sir */
8470         if (nr_entries <= 1 && !mismatch)
8471                 goto out;
8472
8473         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8474                 "%Lu\n", rec->start);
8475
8476         /*
8477          * First we want to see if the backrefs can agree amongst themselves who
8478          * is right, so figure out which one of the entries has the highest
8479          * count.
8480          */
8481         best = find_most_right_entry(&entries);
8482
8483         /*
8484          * Ok so we may have an even split between what the backrefs think, so
8485          * this is where we use the extent ref to see what it thinks.
8486          */
8487         if (!best) {
8488                 entry = find_entry(&entries, rec->start, rec->nr);
8489                 if (!entry && (!broken_entries || !rec->found_rec)) {
8490                         fprintf(stderr, "Backrefs don't agree with each other "
8491                                 "and extent record doesn't agree with anybody,"
8492                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8493                                 rec->start, rec->nr);
8494                         ret = -EINVAL;
8495                         goto out;
8496                 } else if (!entry) {
8497                         /*
8498                          * Ok our backrefs were broken, we'll assume this is the
8499                          * correct value and add an entry for this range.
8500                          */
8501                         entry = malloc(sizeof(struct extent_entry));
8502                         if (!entry) {
8503                                 ret = -ENOMEM;
8504                                 goto out;
8505                         }
8506                         memset(entry, 0, sizeof(*entry));
8507                         entry->bytenr = rec->start;
8508                         entry->bytes = rec->nr;
8509                         list_add_tail(&entry->list, &entries);
8510                         nr_entries++;
8511                 }
8512                 entry->count++;
8513                 best = find_most_right_entry(&entries);
8514                 if (!best) {
8515                         fprintf(stderr, "Backrefs and extent record evenly "
8516                                 "split on who is right, this is going to "
8517                                 "require user input to fix bytenr %Lu bytes "
8518                                 "%Lu\n", rec->start, rec->nr);
8519                         ret = -EINVAL;
8520                         goto out;
8521                 }
8522         }
8523
8524         /*
8525          * I don't think this can happen currently as we'll abort() if we catch
8526          * this case higher up, but in case somebody removes that we still can't
8527          * deal with it properly here yet, so just bail out of that's the case.
8528          */
8529         if (best->bytenr != rec->start) {
8530                 fprintf(stderr, "Extent start and backref starts don't match, "
8531                         "please use btrfs-image on this file system and send "
8532                         "it to a btrfs developer so they can make fsck fix "
8533                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8534                         rec->start, rec->nr);
8535                 ret = -EINVAL;
8536                 goto out;
8537         }
8538
8539         /*
8540          * Ok great we all agreed on an extent record, let's go find the real
8541          * references and fix up the ones that don't match.
8542          */
8543         list_for_each_entry(back, &rec->backrefs, list) {
8544                 if (back->full_backref || !back->is_data)
8545                         continue;
8546
8547                 dback = to_data_backref(back);
8548
8549                 /*
8550                  * Still ignoring backrefs that don't have a real ref attached
8551                  * to them.
8552                  */
8553                 if (dback->found_ref == 0)
8554                         continue;
8555
8556                 if (dback->bytes == best->bytes &&
8557                     dback->disk_bytenr == best->bytenr)
8558                         continue;
8559
8560                 ret = repair_ref(info, path, dback, best);
8561                 if (ret)
8562                         goto out;
8563         }
8564
8565         /*
8566          * Ok we messed with the actual refs, which means we need to drop our
8567          * entire cache and go back and rescan.  I know this is a huge pain and
8568          * adds a lot of extra work, but it's the only way to be safe.  Once all
8569          * the backrefs agree we may not need to do anything to the extent
8570          * record itself.
8571          */
8572         ret = -EAGAIN;
8573 out:
8574         while (!list_empty(&entries)) {
8575                 entry = list_entry(entries.next, struct extent_entry, list);
8576                 list_del_init(&entry->list);
8577                 free(entry);
8578         }
8579         return ret;
8580 }
8581
8582 static int process_duplicates(struct btrfs_root *root,
8583                               struct cache_tree *extent_cache,
8584                               struct extent_record *rec)
8585 {
8586         struct extent_record *good, *tmp;
8587         struct cache_extent *cache;
8588         int ret;
8589
8590         /*
8591          * If we found a extent record for this extent then return, or if we
8592          * have more than one duplicate we are likely going to need to delete
8593          * something.
8594          */
8595         if (rec->found_rec || rec->num_duplicates > 1)
8596                 return 0;
8597
8598         /* Shouldn't happen but just in case */
8599         BUG_ON(!rec->num_duplicates);
8600
8601         /*
8602          * So this happens if we end up with a backref that doesn't match the
8603          * actual extent entry.  So either the backref is bad or the extent
8604          * entry is bad.  Either way we want to have the extent_record actually
8605          * reflect what we found in the extent_tree, so we need to take the
8606          * duplicate out and use that as the extent_record since the only way we
8607          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8608          */
8609         remove_cache_extent(extent_cache, &rec->cache);
8610
8611         good = to_extent_record(rec->dups.next);
8612         list_del_init(&good->list);
8613         INIT_LIST_HEAD(&good->backrefs);
8614         INIT_LIST_HEAD(&good->dups);
8615         good->cache.start = good->start;
8616         good->cache.size = good->nr;
8617         good->content_checked = 0;
8618         good->owner_ref_checked = 0;
8619         good->num_duplicates = 0;
8620         good->refs = rec->refs;
8621         list_splice_init(&rec->backrefs, &good->backrefs);
8622         while (1) {
8623                 cache = lookup_cache_extent(extent_cache, good->start,
8624                                             good->nr);
8625                 if (!cache)
8626                         break;
8627                 tmp = container_of(cache, struct extent_record, cache);
8628
8629                 /*
8630                  * If we find another overlapping extent and it's found_rec is
8631                  * set then it's a duplicate and we need to try and delete
8632                  * something.
8633                  */
8634                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8635                         if (list_empty(&good->list))
8636                                 list_add_tail(&good->list,
8637                                               &duplicate_extents);
8638                         good->num_duplicates += tmp->num_duplicates + 1;
8639                         list_splice_init(&tmp->dups, &good->dups);
8640                         list_del_init(&tmp->list);
8641                         list_add_tail(&tmp->list, &good->dups);
8642                         remove_cache_extent(extent_cache, &tmp->cache);
8643                         continue;
8644                 }
8645
8646                 /*
8647                  * Ok we have another non extent item backed extent rec, so lets
8648                  * just add it to this extent and carry on like we did above.
8649                  */
8650                 good->refs += tmp->refs;
8651                 list_splice_init(&tmp->backrefs, &good->backrefs);
8652                 remove_cache_extent(extent_cache, &tmp->cache);
8653                 free(tmp);
8654         }
8655         ret = insert_cache_extent(extent_cache, &good->cache);
8656         BUG_ON(ret);
8657         free(rec);
8658         return good->num_duplicates ? 0 : 1;
8659 }
8660
8661 static int delete_duplicate_records(struct btrfs_root *root,
8662                                     struct extent_record *rec)
8663 {
8664         struct btrfs_trans_handle *trans;
8665         LIST_HEAD(delete_list);
8666         struct btrfs_path path;
8667         struct extent_record *tmp, *good, *n;
8668         int nr_del = 0;
8669         int ret = 0, err;
8670         struct btrfs_key key;
8671
8672         btrfs_init_path(&path);
8673
8674         good = rec;
8675         /* Find the record that covers all of the duplicates. */
8676         list_for_each_entry(tmp, &rec->dups, list) {
8677                 if (good->start < tmp->start)
8678                         continue;
8679                 if (good->nr > tmp->nr)
8680                         continue;
8681
8682                 if (tmp->start + tmp->nr < good->start + good->nr) {
8683                         fprintf(stderr, "Ok we have overlapping extents that "
8684                                 "aren't completely covered by each other, this "
8685                                 "is going to require more careful thought.  "
8686                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8687                                 tmp->start, tmp->nr, good->start, good->nr);
8688                         abort();
8689                 }
8690                 good = tmp;
8691         }
8692
8693         if (good != rec)
8694                 list_add_tail(&rec->list, &delete_list);
8695
8696         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8697                 if (tmp == good)
8698                         continue;
8699                 list_move_tail(&tmp->list, &delete_list);
8700         }
8701
8702         root = root->fs_info->extent_root;
8703         trans = btrfs_start_transaction(root, 1);
8704         if (IS_ERR(trans)) {
8705                 ret = PTR_ERR(trans);
8706                 goto out;
8707         }
8708
8709         list_for_each_entry(tmp, &delete_list, list) {
8710                 if (tmp->found_rec == 0)
8711                         continue;
8712                 key.objectid = tmp->start;
8713                 key.type = BTRFS_EXTENT_ITEM_KEY;
8714                 key.offset = tmp->nr;
8715
8716                 /* Shouldn't happen but just in case */
8717                 if (tmp->metadata) {
8718                         fprintf(stderr, "Well this shouldn't happen, extent "
8719                                 "record overlaps but is metadata? "
8720                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8721                         abort();
8722                 }
8723
8724                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8725                 if (ret) {
8726                         if (ret > 0)
8727                                 ret = -EINVAL;
8728                         break;
8729                 }
8730                 ret = btrfs_del_item(trans, root, &path);
8731                 if (ret)
8732                         break;
8733                 btrfs_release_path(&path);
8734                 nr_del++;
8735         }
8736         err = btrfs_commit_transaction(trans, root);
8737         if (err && !ret)
8738                 ret = err;
8739 out:
8740         while (!list_empty(&delete_list)) {
8741                 tmp = to_extent_record(delete_list.next);
8742                 list_del_init(&tmp->list);
8743                 if (tmp == rec)
8744                         continue;
8745                 free(tmp);
8746         }
8747
8748         while (!list_empty(&rec->dups)) {
8749                 tmp = to_extent_record(rec->dups.next);
8750                 list_del_init(&tmp->list);
8751                 free(tmp);
8752         }
8753
8754         btrfs_release_path(&path);
8755
8756         if (!ret && !nr_del)
8757                 rec->num_duplicates = 0;
8758
8759         return ret ? ret : nr_del;
8760 }
8761
8762 static int find_possible_backrefs(struct btrfs_fs_info *info,
8763                                   struct btrfs_path *path,
8764                                   struct cache_tree *extent_cache,
8765                                   struct extent_record *rec)
8766 {
8767         struct btrfs_root *root;
8768         struct extent_backref *back;
8769         struct data_backref *dback;
8770         struct cache_extent *cache;
8771         struct btrfs_file_extent_item *fi;
8772         struct btrfs_key key;
8773         u64 bytenr, bytes;
8774         int ret;
8775
8776         list_for_each_entry(back, &rec->backrefs, list) {
8777                 /* Don't care about full backrefs (poor unloved backrefs) */
8778                 if (back->full_backref || !back->is_data)
8779                         continue;
8780
8781                 dback = to_data_backref(back);
8782
8783                 /* We found this one, we don't need to do a lookup */
8784                 if (dback->found_ref)
8785                         continue;
8786
8787                 key.objectid = dback->root;
8788                 key.type = BTRFS_ROOT_ITEM_KEY;
8789                 key.offset = (u64)-1;
8790
8791                 root = btrfs_read_fs_root(info, &key);
8792
8793                 /* No root, definitely a bad ref, skip */
8794                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8795                         continue;
8796                 /* Other err, exit */
8797                 if (IS_ERR(root))
8798                         return PTR_ERR(root);
8799
8800                 key.objectid = dback->owner;
8801                 key.type = BTRFS_EXTENT_DATA_KEY;
8802                 key.offset = dback->offset;
8803                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8804                 if (ret) {
8805                         btrfs_release_path(path);
8806                         if (ret < 0)
8807                                 return ret;
8808                         /* Didn't find it, we can carry on */
8809                         ret = 0;
8810                         continue;
8811                 }
8812
8813                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8814                                     struct btrfs_file_extent_item);
8815                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8816                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8817                 btrfs_release_path(path);
8818                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8819                 if (cache) {
8820                         struct extent_record *tmp;
8821                         tmp = container_of(cache, struct extent_record, cache);
8822
8823                         /*
8824                          * If we found an extent record for the bytenr for this
8825                          * particular backref then we can't add it to our
8826                          * current extent record.  We only want to add backrefs
8827                          * that don't have a corresponding extent item in the
8828                          * extent tree since they likely belong to this record
8829                          * and we need to fix it if it doesn't match bytenrs.
8830                          */
8831                         if  (tmp->found_rec)
8832                                 continue;
8833                 }
8834
8835                 dback->found_ref += 1;
8836                 dback->disk_bytenr = bytenr;
8837                 dback->bytes = bytes;
8838
8839                 /*
8840                  * Set this so the verify backref code knows not to trust the
8841                  * values in this backref.
8842                  */
8843                 back->broken = 1;
8844         }
8845
8846         return 0;
8847 }
8848
8849 /*
8850  * Record orphan data ref into corresponding root.
8851  *
8852  * Return 0 if the extent item contains data ref and recorded.
8853  * Return 1 if the extent item contains no useful data ref
8854  *   On that case, it may contains only shared_dataref or metadata backref
8855  *   or the file extent exists(this should be handled by the extent bytenr
8856  *   recovery routine)
8857  * Return <0 if something goes wrong.
8858  */
8859 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8860                                       struct extent_record *rec)
8861 {
8862         struct btrfs_key key;
8863         struct btrfs_root *dest_root;
8864         struct extent_backref *back;
8865         struct data_backref *dback;
8866         struct orphan_data_extent *orphan;
8867         struct btrfs_path path;
8868         int recorded_data_ref = 0;
8869         int ret = 0;
8870
8871         if (rec->metadata)
8872                 return 1;
8873         btrfs_init_path(&path);
8874         list_for_each_entry(back, &rec->backrefs, list) {
8875                 if (back->full_backref || !back->is_data ||
8876                     !back->found_extent_tree)
8877                         continue;
8878                 dback = to_data_backref(back);
8879                 if (dback->found_ref)
8880                         continue;
8881                 key.objectid = dback->root;
8882                 key.type = BTRFS_ROOT_ITEM_KEY;
8883                 key.offset = (u64)-1;
8884
8885                 dest_root = btrfs_read_fs_root(fs_info, &key);
8886
8887                 /* For non-exist root we just skip it */
8888                 if (IS_ERR(dest_root) || !dest_root)
8889                         continue;
8890
8891                 key.objectid = dback->owner;
8892                 key.type = BTRFS_EXTENT_DATA_KEY;
8893                 key.offset = dback->offset;
8894
8895                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8896                 btrfs_release_path(&path);
8897                 /*
8898                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8899                  * we need to record it for inode/file extent rebuild.
8900                  * For ret > 0, we record it only for file extent rebuild.
8901                  * For ret == 0, the file extent exists but only bytenr
8902                  * mismatch, let the original bytenr fix routine to handle,
8903                  * don't record it.
8904                  */
8905                 if (ret == 0)
8906                         continue;
8907                 ret = 0;
8908                 orphan = malloc(sizeof(*orphan));
8909                 if (!orphan) {
8910                         ret = -ENOMEM;
8911                         goto out;
8912                 }
8913                 INIT_LIST_HEAD(&orphan->list);
8914                 orphan->root = dback->root;
8915                 orphan->objectid = dback->owner;
8916                 orphan->offset = dback->offset;
8917                 orphan->disk_bytenr = rec->cache.start;
8918                 orphan->disk_len = rec->cache.size;
8919                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8920                 recorded_data_ref = 1;
8921         }
8922 out:
8923         btrfs_release_path(&path);
8924         if (!ret)
8925                 return !recorded_data_ref;
8926         else
8927                 return ret;
8928 }
8929
8930 /*
8931  * when an incorrect extent item is found, this will delete
8932  * all of the existing entries for it and recreate them
8933  * based on what the tree scan found.
8934  */
8935 static int fixup_extent_refs(struct btrfs_fs_info *info,
8936                              struct cache_tree *extent_cache,
8937                              struct extent_record *rec)
8938 {
8939         struct btrfs_trans_handle *trans = NULL;
8940         int ret;
8941         struct btrfs_path path;
8942         struct list_head *cur = rec->backrefs.next;
8943         struct cache_extent *cache;
8944         struct extent_backref *back;
8945         int allocated = 0;
8946         u64 flags = 0;
8947
8948         if (rec->flag_block_full_backref)
8949                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8950
8951         btrfs_init_path(&path);
8952         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8953                 /*
8954                  * Sometimes the backrefs themselves are so broken they don't
8955                  * get attached to any meaningful rec, so first go back and
8956                  * check any of our backrefs that we couldn't find and throw
8957                  * them into the list if we find the backref so that
8958                  * verify_backrefs can figure out what to do.
8959                  */
8960                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8961                 if (ret < 0)
8962                         goto out;
8963         }
8964
8965         /* step one, make sure all of the backrefs agree */
8966         ret = verify_backrefs(info, &path, rec);
8967         if (ret < 0)
8968                 goto out;
8969
8970         trans = btrfs_start_transaction(info->extent_root, 1);
8971         if (IS_ERR(trans)) {
8972                 ret = PTR_ERR(trans);
8973                 goto out;
8974         }
8975
8976         /* step two, delete all the existing records */
8977         ret = delete_extent_records(trans, info->extent_root, &path,
8978                                     rec->start, rec->max_size);
8979
8980         if (ret < 0)
8981                 goto out;
8982
8983         /* was this block corrupt?  If so, don't add references to it */
8984         cache = lookup_cache_extent(info->corrupt_blocks,
8985                                     rec->start, rec->max_size);
8986         if (cache) {
8987                 ret = 0;
8988                 goto out;
8989         }
8990
8991         /* step three, recreate all the refs we did find */
8992         while(cur != &rec->backrefs) {
8993                 back = to_extent_backref(cur);
8994                 cur = cur->next;
8995
8996                 /*
8997                  * if we didn't find any references, don't create a
8998                  * new extent record
8999                  */
9000                 if (!back->found_ref)
9001                         continue;
9002
9003                 rec->bad_full_backref = 0;
9004                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9005                 allocated = 1;
9006
9007                 if (ret)
9008                         goto out;
9009         }
9010 out:
9011         if (trans) {
9012                 int err = btrfs_commit_transaction(trans, info->extent_root);
9013                 if (!ret)
9014                         ret = err;
9015         }
9016
9017         if (!ret)
9018                 fprintf(stderr, "Repaired extent references for %llu\n",
9019                                 (unsigned long long)rec->start);
9020
9021         btrfs_release_path(&path);
9022         return ret;
9023 }
9024
9025 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9026                               struct extent_record *rec)
9027 {
9028         struct btrfs_trans_handle *trans;
9029         struct btrfs_root *root = fs_info->extent_root;
9030         struct btrfs_path path;
9031         struct btrfs_extent_item *ei;
9032         struct btrfs_key key;
9033         u64 flags;
9034         int ret = 0;
9035
9036         key.objectid = rec->start;
9037         if (rec->metadata) {
9038                 key.type = BTRFS_METADATA_ITEM_KEY;
9039                 key.offset = rec->info_level;
9040         } else {
9041                 key.type = BTRFS_EXTENT_ITEM_KEY;
9042                 key.offset = rec->max_size;
9043         }
9044
9045         trans = btrfs_start_transaction(root, 0);
9046         if (IS_ERR(trans))
9047                 return PTR_ERR(trans);
9048
9049         btrfs_init_path(&path);
9050         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9051         if (ret < 0) {
9052                 btrfs_release_path(&path);
9053                 btrfs_commit_transaction(trans, root);
9054                 return ret;
9055         } else if (ret) {
9056                 fprintf(stderr, "Didn't find extent for %llu\n",
9057                         (unsigned long long)rec->start);
9058                 btrfs_release_path(&path);
9059                 btrfs_commit_transaction(trans, root);
9060                 return -ENOENT;
9061         }
9062
9063         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9064                             struct btrfs_extent_item);
9065         flags = btrfs_extent_flags(path.nodes[0], ei);
9066         if (rec->flag_block_full_backref) {
9067                 fprintf(stderr, "setting full backref on %llu\n",
9068                         (unsigned long long)key.objectid);
9069                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9070         } else {
9071                 fprintf(stderr, "clearing full backref on %llu\n",
9072                         (unsigned long long)key.objectid);
9073                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9074         }
9075         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9076         btrfs_mark_buffer_dirty(path.nodes[0]);
9077         btrfs_release_path(&path);
9078         ret = btrfs_commit_transaction(trans, root);
9079         if (!ret)
9080                 fprintf(stderr, "Repaired extent flags for %llu\n",
9081                                 (unsigned long long)rec->start);
9082
9083         return ret;
9084 }
9085
9086 /* right now we only prune from the extent allocation tree */
9087 static int prune_one_block(struct btrfs_trans_handle *trans,
9088                            struct btrfs_fs_info *info,
9089                            struct btrfs_corrupt_block *corrupt)
9090 {
9091         int ret;
9092         struct btrfs_path path;
9093         struct extent_buffer *eb;
9094         u64 found;
9095         int slot;
9096         int nritems;
9097         int level = corrupt->level + 1;
9098
9099         btrfs_init_path(&path);
9100 again:
9101         /* we want to stop at the parent to our busted block */
9102         path.lowest_level = level;
9103
9104         ret = btrfs_search_slot(trans, info->extent_root,
9105                                 &corrupt->key, &path, -1, 1);
9106
9107         if (ret < 0)
9108                 goto out;
9109
9110         eb = path.nodes[level];
9111         if (!eb) {
9112                 ret = -ENOENT;
9113                 goto out;
9114         }
9115
9116         /*
9117          * hopefully the search gave us the block we want to prune,
9118          * lets try that first
9119          */
9120         slot = path.slots[level];
9121         found =  btrfs_node_blockptr(eb, slot);
9122         if (found == corrupt->cache.start)
9123                 goto del_ptr;
9124
9125         nritems = btrfs_header_nritems(eb);
9126
9127         /* the search failed, lets scan this node and hope we find it */
9128         for (slot = 0; slot < nritems; slot++) {
9129                 found =  btrfs_node_blockptr(eb, slot);
9130                 if (found == corrupt->cache.start)
9131                         goto del_ptr;
9132         }
9133         /*
9134          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9135          * to this block
9136          */
9137         if (eb == info->extent_root->node) {
9138                 ret = -ENOENT;
9139                 goto out;
9140         } else {
9141                 level++;
9142                 btrfs_release_path(&path);
9143                 goto again;
9144         }
9145
9146 del_ptr:
9147         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9148         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
9149
9150 out:
9151         btrfs_release_path(&path);
9152         return ret;
9153 }
9154
9155 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9156 {
9157         struct btrfs_trans_handle *trans = NULL;
9158         struct cache_extent *cache;
9159         struct btrfs_corrupt_block *corrupt;
9160
9161         while (1) {
9162                 cache = search_cache_extent(info->corrupt_blocks, 0);
9163                 if (!cache)
9164                         break;
9165                 if (!trans) {
9166                         trans = btrfs_start_transaction(info->extent_root, 1);
9167                         if (IS_ERR(trans))
9168                                 return PTR_ERR(trans);
9169                 }
9170                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9171                 prune_one_block(trans, info, corrupt);
9172                 remove_cache_extent(info->corrupt_blocks, cache);
9173         }
9174         if (trans)
9175                 return btrfs_commit_transaction(trans, info->extent_root);
9176         return 0;
9177 }
9178
9179 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9180 {
9181         struct btrfs_block_group_cache *cache;
9182         u64 start, end;
9183         int ret;
9184
9185         while (1) {
9186                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9187                                             &start, &end, EXTENT_DIRTY);
9188                 if (ret)
9189                         break;
9190                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
9191                                    GFP_NOFS);
9192         }
9193
9194         start = 0;
9195         while (1) {
9196                 cache = btrfs_lookup_first_block_group(fs_info, start);
9197                 if (!cache)
9198                         break;
9199                 if (cache->cached)
9200                         cache->cached = 0;
9201                 start = cache->key.objectid + cache->key.offset;
9202         }
9203 }
9204
9205 static int check_extent_refs(struct btrfs_root *root,
9206                              struct cache_tree *extent_cache)
9207 {
9208         struct extent_record *rec;
9209         struct cache_extent *cache;
9210         int ret = 0;
9211         int had_dups = 0;
9212
9213         if (repair) {
9214                 /*
9215                  * if we're doing a repair, we have to make sure
9216                  * we don't allocate from the problem extents.
9217                  * In the worst case, this will be all the
9218                  * extents in the FS
9219                  */
9220                 cache = search_cache_extent(extent_cache, 0);
9221                 while(cache) {
9222                         rec = container_of(cache, struct extent_record, cache);
9223                         set_extent_dirty(root->fs_info->excluded_extents,
9224                                          rec->start,
9225                                          rec->start + rec->max_size - 1,
9226                                          GFP_NOFS);
9227                         cache = next_cache_extent(cache);
9228                 }
9229
9230                 /* pin down all the corrupted blocks too */
9231                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9232                 while(cache) {
9233                         set_extent_dirty(root->fs_info->excluded_extents,
9234                                          cache->start,
9235                                          cache->start + cache->size - 1,
9236                                          GFP_NOFS);
9237                         cache = next_cache_extent(cache);
9238                 }
9239                 prune_corrupt_blocks(root->fs_info);
9240                 reset_cached_block_groups(root->fs_info);
9241         }
9242
9243         reset_cached_block_groups(root->fs_info);
9244
9245         /*
9246          * We need to delete any duplicate entries we find first otherwise we
9247          * could mess up the extent tree when we have backrefs that actually
9248          * belong to a different extent item and not the weird duplicate one.
9249          */
9250         while (repair && !list_empty(&duplicate_extents)) {
9251                 rec = to_extent_record(duplicate_extents.next);
9252                 list_del_init(&rec->list);
9253
9254                 /* Sometimes we can find a backref before we find an actual
9255                  * extent, so we need to process it a little bit to see if there
9256                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9257                  * if this is a backref screwup.  If we need to delete stuff
9258                  * process_duplicates() will return 0, otherwise it will return
9259                  * 1 and we
9260                  */
9261                 if (process_duplicates(root, extent_cache, rec))
9262                         continue;
9263                 ret = delete_duplicate_records(root, rec);
9264                 if (ret < 0)
9265                         return ret;
9266                 /*
9267                  * delete_duplicate_records will return the number of entries
9268                  * deleted, so if it's greater than 0 then we know we actually
9269                  * did something and we need to remove.
9270                  */
9271                 if (ret)
9272                         had_dups = 1;
9273         }
9274
9275         if (had_dups)
9276                 return -EAGAIN;
9277
9278         while(1) {
9279                 int cur_err = 0;
9280                 int fix = 0;
9281
9282                 cache = search_cache_extent(extent_cache, 0);
9283                 if (!cache)
9284                         break;
9285                 rec = container_of(cache, struct extent_record, cache);
9286                 if (rec->num_duplicates) {
9287                         fprintf(stderr, "extent item %llu has multiple extent "
9288                                 "items\n", (unsigned long long)rec->start);
9289                         cur_err = 1;
9290                 }
9291
9292                 if (rec->refs != rec->extent_item_refs) {
9293                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9294                                 (unsigned long long)rec->start,
9295                                 (unsigned long long)rec->nr);
9296                         fprintf(stderr, "extent item %llu, found %llu\n",
9297                                 (unsigned long long)rec->extent_item_refs,
9298                                 (unsigned long long)rec->refs);
9299                         ret = record_orphan_data_extents(root->fs_info, rec);
9300                         if (ret < 0)
9301                                 goto repair_abort;
9302                         fix = ret;
9303                         cur_err = 1;
9304                 }
9305                 if (all_backpointers_checked(rec, 1)) {
9306                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9307                                 (unsigned long long)rec->start,
9308                                 (unsigned long long)rec->nr);
9309                         fix = 1;
9310                         cur_err = 1;
9311                 }
9312                 if (!rec->owner_ref_checked) {
9313                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9314                                 (unsigned long long)rec->start,
9315                                 (unsigned long long)rec->nr);
9316                         fix = 1;
9317                         cur_err = 1;
9318                 }
9319
9320                 if (repair && fix) {
9321                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9322                         if (ret)
9323                                 goto repair_abort;
9324                 }
9325
9326
9327                 if (rec->bad_full_backref) {
9328                         fprintf(stderr, "bad full backref, on [%llu]\n",
9329                                 (unsigned long long)rec->start);
9330                         if (repair) {
9331                                 ret = fixup_extent_flags(root->fs_info, rec);
9332                                 if (ret)
9333                                         goto repair_abort;
9334                                 fix = 1;
9335                         }
9336                         cur_err = 1;
9337                 }
9338                 /*
9339                  * Although it's not a extent ref's problem, we reuse this
9340                  * routine for error reporting.
9341                  * No repair function yet.
9342                  */
9343                 if (rec->crossing_stripes) {
9344                         fprintf(stderr,
9345                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9346                                 rec->start, rec->start + rec->max_size);
9347                         cur_err = 1;
9348                 }
9349
9350                 if (rec->wrong_chunk_type) {
9351                         fprintf(stderr,
9352                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9353                                 rec->start, rec->start + rec->max_size);
9354                         cur_err = 1;
9355                 }
9356
9357                 remove_cache_extent(extent_cache, cache);
9358                 free_all_extent_backrefs(rec);
9359                 if (!init_extent_tree && repair && (!cur_err || fix))
9360                         clear_extent_dirty(root->fs_info->excluded_extents,
9361                                            rec->start,
9362                                            rec->start + rec->max_size - 1,
9363                                            GFP_NOFS);
9364                 free(rec);
9365         }
9366 repair_abort:
9367         if (repair) {
9368                 if (ret && ret != -EAGAIN) {
9369                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9370                         exit(1);
9371                 } else if (!ret) {
9372                         struct btrfs_trans_handle *trans;
9373
9374                         root = root->fs_info->extent_root;
9375                         trans = btrfs_start_transaction(root, 1);
9376                         if (IS_ERR(trans)) {
9377                                 ret = PTR_ERR(trans);
9378                                 goto repair_abort;
9379                         }
9380
9381                         btrfs_fix_block_accounting(trans, root);
9382                         ret = btrfs_commit_transaction(trans, root);
9383                         if (ret)
9384                                 goto repair_abort;
9385                 }
9386                 return ret;
9387         }
9388         return 0;
9389 }
9390
9391 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9392 {
9393         u64 stripe_size;
9394
9395         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9396                 stripe_size = length;
9397                 stripe_size /= num_stripes;
9398         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9399                 stripe_size = length * 2;
9400                 stripe_size /= num_stripes;
9401         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9402                 stripe_size = length;
9403                 stripe_size /= (num_stripes - 1);
9404         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9405                 stripe_size = length;
9406                 stripe_size /= (num_stripes - 2);
9407         } else {
9408                 stripe_size = length;
9409         }
9410         return stripe_size;
9411 }
9412
9413 /*
9414  * Check the chunk with its block group/dev list ref:
9415  * Return 0 if all refs seems valid.
9416  * Return 1 if part of refs seems valid, need later check for rebuild ref
9417  * like missing block group and needs to search extent tree to rebuild them.
9418  * Return -1 if essential refs are missing and unable to rebuild.
9419  */
9420 static int check_chunk_refs(struct chunk_record *chunk_rec,
9421                             struct block_group_tree *block_group_cache,
9422                             struct device_extent_tree *dev_extent_cache,
9423                             int silent)
9424 {
9425         struct cache_extent *block_group_item;
9426         struct block_group_record *block_group_rec;
9427         struct cache_extent *dev_extent_item;
9428         struct device_extent_record *dev_extent_rec;
9429         u64 devid;
9430         u64 offset;
9431         u64 length;
9432         int metadump_v2 = 0;
9433         int i;
9434         int ret = 0;
9435
9436         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9437                                                chunk_rec->offset,
9438                                                chunk_rec->length);
9439         if (block_group_item) {
9440                 block_group_rec = container_of(block_group_item,
9441                                                struct block_group_record,
9442                                                cache);
9443                 if (chunk_rec->length != block_group_rec->offset ||
9444                     chunk_rec->offset != block_group_rec->objectid ||
9445                     (!metadump_v2 &&
9446                      chunk_rec->type_flags != block_group_rec->flags)) {
9447                         if (!silent)
9448                                 fprintf(stderr,
9449                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9450                                         chunk_rec->objectid,
9451                                         chunk_rec->type,
9452                                         chunk_rec->offset,
9453                                         chunk_rec->length,
9454                                         chunk_rec->offset,
9455                                         chunk_rec->type_flags,
9456                                         block_group_rec->objectid,
9457                                         block_group_rec->type,
9458                                         block_group_rec->offset,
9459                                         block_group_rec->offset,
9460                                         block_group_rec->objectid,
9461                                         block_group_rec->flags);
9462                         ret = -1;
9463                 } else {
9464                         list_del_init(&block_group_rec->list);
9465                         chunk_rec->bg_rec = block_group_rec;
9466                 }
9467         } else {
9468                 if (!silent)
9469                         fprintf(stderr,
9470                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9471                                 chunk_rec->objectid,
9472                                 chunk_rec->type,
9473                                 chunk_rec->offset,
9474                                 chunk_rec->length,
9475                                 chunk_rec->offset,
9476                                 chunk_rec->type_flags);
9477                 ret = 1;
9478         }
9479
9480         if (metadump_v2)
9481                 return ret;
9482
9483         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9484                                     chunk_rec->num_stripes);
9485         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9486                 devid = chunk_rec->stripes[i].devid;
9487                 offset = chunk_rec->stripes[i].offset;
9488                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9489                                                        devid, offset, length);
9490                 if (dev_extent_item) {
9491                         dev_extent_rec = container_of(dev_extent_item,
9492                                                 struct device_extent_record,
9493                                                 cache);
9494                         if (dev_extent_rec->objectid != devid ||
9495                             dev_extent_rec->offset != offset ||
9496                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9497                             dev_extent_rec->length != length) {
9498                                 if (!silent)
9499                                         fprintf(stderr,
9500                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9501                                                 chunk_rec->objectid,
9502                                                 chunk_rec->type,
9503                                                 chunk_rec->offset,
9504                                                 chunk_rec->stripes[i].devid,
9505                                                 chunk_rec->stripes[i].offset,
9506                                                 dev_extent_rec->objectid,
9507                                                 dev_extent_rec->offset,
9508                                                 dev_extent_rec->length);
9509                                 ret = -1;
9510                         } else {
9511                                 list_move(&dev_extent_rec->chunk_list,
9512                                           &chunk_rec->dextents);
9513                         }
9514                 } else {
9515                         if (!silent)
9516                                 fprintf(stderr,
9517                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9518                                         chunk_rec->objectid,
9519                                         chunk_rec->type,
9520                                         chunk_rec->offset,
9521                                         chunk_rec->stripes[i].devid,
9522                                         chunk_rec->stripes[i].offset);
9523                         ret = -1;
9524                 }
9525         }
9526         return ret;
9527 }
9528
9529 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9530 int check_chunks(struct cache_tree *chunk_cache,
9531                  struct block_group_tree *block_group_cache,
9532                  struct device_extent_tree *dev_extent_cache,
9533                  struct list_head *good, struct list_head *bad,
9534                  struct list_head *rebuild, int silent)
9535 {
9536         struct cache_extent *chunk_item;
9537         struct chunk_record *chunk_rec;
9538         struct block_group_record *bg_rec;
9539         struct device_extent_record *dext_rec;
9540         int err;
9541         int ret = 0;
9542
9543         chunk_item = first_cache_extent(chunk_cache);
9544         while (chunk_item) {
9545                 chunk_rec = container_of(chunk_item, struct chunk_record,
9546                                          cache);
9547                 err = check_chunk_refs(chunk_rec, block_group_cache,
9548                                        dev_extent_cache, silent);
9549                 if (err < 0)
9550                         ret = err;
9551                 if (err == 0 && good)
9552                         list_add_tail(&chunk_rec->list, good);
9553                 if (err > 0 && rebuild)
9554                         list_add_tail(&chunk_rec->list, rebuild);
9555                 if (err < 0 && bad)
9556                         list_add_tail(&chunk_rec->list, bad);
9557                 chunk_item = next_cache_extent(chunk_item);
9558         }
9559
9560         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9561                 if (!silent)
9562                         fprintf(stderr,
9563                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9564                                 bg_rec->objectid,
9565                                 bg_rec->offset,
9566                                 bg_rec->flags);
9567                 if (!ret)
9568                         ret = 1;
9569         }
9570
9571         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9572                             chunk_list) {
9573                 if (!silent)
9574                         fprintf(stderr,
9575                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9576                                 dext_rec->objectid,
9577                                 dext_rec->offset,
9578                                 dext_rec->length);
9579                 if (!ret)
9580                         ret = 1;
9581         }
9582         return ret;
9583 }
9584
9585
9586 static int check_device_used(struct device_record *dev_rec,
9587                              struct device_extent_tree *dext_cache)
9588 {
9589         struct cache_extent *cache;
9590         struct device_extent_record *dev_extent_rec;
9591         u64 total_byte = 0;
9592
9593         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9594         while (cache) {
9595                 dev_extent_rec = container_of(cache,
9596                                               struct device_extent_record,
9597                                               cache);
9598                 if (dev_extent_rec->objectid != dev_rec->devid)
9599                         break;
9600
9601                 list_del_init(&dev_extent_rec->device_list);
9602                 total_byte += dev_extent_rec->length;
9603                 cache = next_cache_extent(cache);
9604         }
9605
9606         if (total_byte != dev_rec->byte_used) {
9607                 fprintf(stderr,
9608                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9609                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9610                         dev_rec->type, dev_rec->offset);
9611                 return -1;
9612         } else {
9613                 return 0;
9614         }
9615 }
9616
9617 /* check btrfs_dev_item -> btrfs_dev_extent */
9618 static int check_devices(struct rb_root *dev_cache,
9619                          struct device_extent_tree *dev_extent_cache)
9620 {
9621         struct rb_node *dev_node;
9622         struct device_record *dev_rec;
9623         struct device_extent_record *dext_rec;
9624         int err;
9625         int ret = 0;
9626
9627         dev_node = rb_first(dev_cache);
9628         while (dev_node) {
9629                 dev_rec = container_of(dev_node, struct device_record, node);
9630                 err = check_device_used(dev_rec, dev_extent_cache);
9631                 if (err)
9632                         ret = err;
9633
9634                 dev_node = rb_next(dev_node);
9635         }
9636         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9637                             device_list) {
9638                 fprintf(stderr,
9639                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9640                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9641                 if (!ret)
9642                         ret = 1;
9643         }
9644         return ret;
9645 }
9646
9647 static int add_root_item_to_list(struct list_head *head,
9648                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9649                                   u8 level, u8 drop_level,
9650                                   int level_size, struct btrfs_key *drop_key)
9651 {
9652
9653         struct root_item_record *ri_rec;
9654         ri_rec = malloc(sizeof(*ri_rec));
9655         if (!ri_rec)
9656                 return -ENOMEM;
9657         ri_rec->bytenr = bytenr;
9658         ri_rec->objectid = objectid;
9659         ri_rec->level = level;
9660         ri_rec->level_size = level_size;
9661         ri_rec->drop_level = drop_level;
9662         ri_rec->last_snapshot = last_snapshot;
9663         if (drop_key)
9664                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9665         list_add_tail(&ri_rec->list, head);
9666
9667         return 0;
9668 }
9669
9670 static void free_root_item_list(struct list_head *list)
9671 {
9672         struct root_item_record *ri_rec;
9673
9674         while (!list_empty(list)) {
9675                 ri_rec = list_first_entry(list, struct root_item_record,
9676                                           list);
9677                 list_del_init(&ri_rec->list);
9678                 free(ri_rec);
9679         }
9680 }
9681
9682 static int deal_root_from_list(struct list_head *list,
9683                                struct btrfs_root *root,
9684                                struct block_info *bits,
9685                                int bits_nr,
9686                                struct cache_tree *pending,
9687                                struct cache_tree *seen,
9688                                struct cache_tree *reada,
9689                                struct cache_tree *nodes,
9690                                struct cache_tree *extent_cache,
9691                                struct cache_tree *chunk_cache,
9692                                struct rb_root *dev_cache,
9693                                struct block_group_tree *block_group_cache,
9694                                struct device_extent_tree *dev_extent_cache)
9695 {
9696         int ret = 0;
9697         u64 last;
9698
9699         while (!list_empty(list)) {
9700                 struct root_item_record *rec;
9701                 struct extent_buffer *buf;
9702                 rec = list_entry(list->next,
9703                                  struct root_item_record, list);
9704                 last = 0;
9705                 buf = read_tree_block(root->fs_info->tree_root,
9706                                       rec->bytenr, rec->level_size, 0);
9707                 if (!extent_buffer_uptodate(buf)) {
9708                         free_extent_buffer(buf);
9709                         ret = -EIO;
9710                         break;
9711                 }
9712                 ret = add_root_to_pending(buf, extent_cache, pending,
9713                                     seen, nodes, rec->objectid);
9714                 if (ret < 0)
9715                         break;
9716                 /*
9717                  * To rebuild extent tree, we need deal with snapshot
9718                  * one by one, otherwise we deal with node firstly which
9719                  * can maximize readahead.
9720                  */
9721                 while (1) {
9722                         ret = run_next_block(root, bits, bits_nr, &last,
9723                                              pending, seen, reada, nodes,
9724                                              extent_cache, chunk_cache,
9725                                              dev_cache, block_group_cache,
9726                                              dev_extent_cache, rec);
9727                         if (ret != 0)
9728                                 break;
9729                 }
9730                 free_extent_buffer(buf);
9731                 list_del(&rec->list);
9732                 free(rec);
9733                 if (ret < 0)
9734                         break;
9735         }
9736         while (ret >= 0) {
9737                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9738                                      reada, nodes, extent_cache, chunk_cache,
9739                                      dev_cache, block_group_cache,
9740                                      dev_extent_cache, NULL);
9741                 if (ret != 0) {
9742                         if (ret > 0)
9743                                 ret = 0;
9744                         break;
9745                 }
9746         }
9747         return ret;
9748 }
9749
9750 static int check_chunks_and_extents(struct btrfs_root *root)
9751 {
9752         struct rb_root dev_cache;
9753         struct cache_tree chunk_cache;
9754         struct block_group_tree block_group_cache;
9755         struct device_extent_tree dev_extent_cache;
9756         struct cache_tree extent_cache;
9757         struct cache_tree seen;
9758         struct cache_tree pending;
9759         struct cache_tree reada;
9760         struct cache_tree nodes;
9761         struct extent_io_tree excluded_extents;
9762         struct cache_tree corrupt_blocks;
9763         struct btrfs_path path;
9764         struct btrfs_key key;
9765         struct btrfs_key found_key;
9766         int ret, err = 0;
9767         struct block_info *bits;
9768         int bits_nr;
9769         struct extent_buffer *leaf;
9770         int slot;
9771         struct btrfs_root_item ri;
9772         struct list_head dropping_trees;
9773         struct list_head normal_trees;
9774         struct btrfs_root *root1;
9775         u64 objectid;
9776         u32 level_size;
9777         u8 level;
9778
9779         dev_cache = RB_ROOT;
9780         cache_tree_init(&chunk_cache);
9781         block_group_tree_init(&block_group_cache);
9782         device_extent_tree_init(&dev_extent_cache);
9783
9784         cache_tree_init(&extent_cache);
9785         cache_tree_init(&seen);
9786         cache_tree_init(&pending);
9787         cache_tree_init(&nodes);
9788         cache_tree_init(&reada);
9789         cache_tree_init(&corrupt_blocks);
9790         extent_io_tree_init(&excluded_extents);
9791         INIT_LIST_HEAD(&dropping_trees);
9792         INIT_LIST_HEAD(&normal_trees);
9793
9794         if (repair) {
9795                 root->fs_info->excluded_extents = &excluded_extents;
9796                 root->fs_info->fsck_extent_cache = &extent_cache;
9797                 root->fs_info->free_extent_hook = free_extent_hook;
9798                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9799         }
9800
9801         bits_nr = 1024;
9802         bits = malloc(bits_nr * sizeof(struct block_info));
9803         if (!bits) {
9804                 perror("malloc");
9805                 exit(1);
9806         }
9807
9808         if (ctx.progress_enabled) {
9809                 ctx.tp = TASK_EXTENTS;
9810                 task_start(ctx.info);
9811         }
9812
9813 again:
9814         root1 = root->fs_info->tree_root;
9815         level = btrfs_header_level(root1->node);
9816         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9817                                     root1->node->start, 0, level, 0,
9818                                     root1->nodesize, NULL);
9819         if (ret < 0)
9820                 goto out;
9821         root1 = root->fs_info->chunk_root;
9822         level = btrfs_header_level(root1->node);
9823         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9824                                     root1->node->start, 0, level, 0,
9825                                     root1->nodesize, NULL);
9826         if (ret < 0)
9827                 goto out;
9828         btrfs_init_path(&path);
9829         key.offset = 0;
9830         key.objectid = 0;
9831         key.type = BTRFS_ROOT_ITEM_KEY;
9832         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9833                                         &key, &path, 0, 0);
9834         if (ret < 0)
9835                 goto out;
9836         while(1) {
9837                 leaf = path.nodes[0];
9838                 slot = path.slots[0];
9839                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9840                         ret = btrfs_next_leaf(root, &path);
9841                         if (ret != 0)
9842                                 break;
9843                         leaf = path.nodes[0];
9844                         slot = path.slots[0];
9845                 }
9846                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9847                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9848                         unsigned long offset;
9849                         u64 last_snapshot;
9850
9851                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9852                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9853                         last_snapshot = btrfs_root_last_snapshot(&ri);
9854                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9855                                 level = btrfs_root_level(&ri);
9856                                 level_size = root->nodesize;
9857                                 ret = add_root_item_to_list(&normal_trees,
9858                                                 found_key.objectid,
9859                                                 btrfs_root_bytenr(&ri),
9860                                                 last_snapshot, level,
9861                                                 0, level_size, NULL);
9862                                 if (ret < 0)
9863                                         goto out;
9864                         } else {
9865                                 level = btrfs_root_level(&ri);
9866                                 level_size = root->nodesize;
9867                                 objectid = found_key.objectid;
9868                                 btrfs_disk_key_to_cpu(&found_key,
9869                                                       &ri.drop_progress);
9870                                 ret = add_root_item_to_list(&dropping_trees,
9871                                                 objectid,
9872                                                 btrfs_root_bytenr(&ri),
9873                                                 last_snapshot, level,
9874                                                 ri.drop_level,
9875                                                 level_size, &found_key);
9876                                 if (ret < 0)
9877                                         goto out;
9878                         }
9879                 }
9880                 path.slots[0]++;
9881         }
9882         btrfs_release_path(&path);
9883
9884         /*
9885          * check_block can return -EAGAIN if it fixes something, please keep
9886          * this in mind when dealing with return values from these functions, if
9887          * we get -EAGAIN we want to fall through and restart the loop.
9888          */
9889         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9890                                   &seen, &reada, &nodes, &extent_cache,
9891                                   &chunk_cache, &dev_cache, &block_group_cache,
9892                                   &dev_extent_cache);
9893         if (ret < 0) {
9894                 if (ret == -EAGAIN)
9895                         goto loop;
9896                 goto out;
9897         }
9898         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9899                                   &pending, &seen, &reada, &nodes,
9900                                   &extent_cache, &chunk_cache, &dev_cache,
9901                                   &block_group_cache, &dev_extent_cache);
9902         if (ret < 0) {
9903                 if (ret == -EAGAIN)
9904                         goto loop;
9905                 goto out;
9906         }
9907
9908         ret = check_chunks(&chunk_cache, &block_group_cache,
9909                            &dev_extent_cache, NULL, NULL, NULL, 0);
9910         if (ret) {
9911                 if (ret == -EAGAIN)
9912                         goto loop;
9913                 err = ret;
9914         }
9915
9916         ret = check_extent_refs(root, &extent_cache);
9917         if (ret < 0) {
9918                 if (ret == -EAGAIN)
9919                         goto loop;
9920                 goto out;
9921         }
9922
9923         ret = check_devices(&dev_cache, &dev_extent_cache);
9924         if (ret && err)
9925                 ret = err;
9926
9927 out:
9928         task_stop(ctx.info);
9929         if (repair) {
9930                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9931                 extent_io_tree_cleanup(&excluded_extents);
9932                 root->fs_info->fsck_extent_cache = NULL;
9933                 root->fs_info->free_extent_hook = NULL;
9934                 root->fs_info->corrupt_blocks = NULL;
9935                 root->fs_info->excluded_extents = NULL;
9936         }
9937         free(bits);
9938         free_chunk_cache_tree(&chunk_cache);
9939         free_device_cache_tree(&dev_cache);
9940         free_block_group_tree(&block_group_cache);
9941         free_device_extent_tree(&dev_extent_cache);
9942         free_extent_cache_tree(&seen);
9943         free_extent_cache_tree(&pending);
9944         free_extent_cache_tree(&reada);
9945         free_extent_cache_tree(&nodes);
9946         return ret;
9947 loop:
9948         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9949         free_extent_cache_tree(&seen);
9950         free_extent_cache_tree(&pending);
9951         free_extent_cache_tree(&reada);
9952         free_extent_cache_tree(&nodes);
9953         free_chunk_cache_tree(&chunk_cache);
9954         free_block_group_tree(&block_group_cache);
9955         free_device_cache_tree(&dev_cache);
9956         free_device_extent_tree(&dev_extent_cache);
9957         free_extent_record_cache(root->fs_info, &extent_cache);
9958         free_root_item_list(&normal_trees);
9959         free_root_item_list(&dropping_trees);
9960         extent_io_tree_cleanup(&excluded_extents);
9961         goto again;
9962 }
9963
9964 /*
9965  * Check backrefs of a tree block given by @bytenr or @eb.
9966  *
9967  * @root:       the root containing the @bytenr or @eb
9968  * @eb:         tree block extent buffer, can be NULL
9969  * @bytenr:     bytenr of the tree block to search
9970  * @level:      tree level of the tree block
9971  * @owner:      owner of the tree block
9972  *
9973  * Return >0 for any error found and output error message
9974  * Return 0 for no error found
9975  */
9976 static int check_tree_block_ref(struct btrfs_root *root,
9977                                 struct extent_buffer *eb, u64 bytenr,
9978                                 int level, u64 owner)
9979 {
9980         struct btrfs_key key;
9981         struct btrfs_root *extent_root = root->fs_info->extent_root;
9982         struct btrfs_path path;
9983         struct btrfs_extent_item *ei;
9984         struct btrfs_extent_inline_ref *iref;
9985         struct extent_buffer *leaf;
9986         unsigned long end;
9987         unsigned long ptr;
9988         int slot;
9989         int skinny_level;
9990         int type;
9991         u32 nodesize = root->nodesize;
9992         u32 item_size;
9993         u64 offset;
9994         int tree_reloc_root = 0;
9995         int found_ref = 0;
9996         int err = 0;
9997         int ret;
9998
9999         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10000             btrfs_header_bytenr(root->node) == bytenr)
10001                 tree_reloc_root = 1;
10002
10003         btrfs_init_path(&path);
10004         key.objectid = bytenr;
10005         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10006                 key.type = BTRFS_METADATA_ITEM_KEY;
10007         else
10008                 key.type = BTRFS_EXTENT_ITEM_KEY;
10009         key.offset = (u64)-1;
10010
10011         /* Search for the backref in extent tree */
10012         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10013         if (ret < 0) {
10014                 err |= BACKREF_MISSING;
10015                 goto out;
10016         }
10017         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10018         if (ret) {
10019                 err |= BACKREF_MISSING;
10020                 goto out;
10021         }
10022
10023         leaf = path.nodes[0];
10024         slot = path.slots[0];
10025         btrfs_item_key_to_cpu(leaf, &key, slot);
10026
10027         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10028
10029         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10030                 skinny_level = (int)key.offset;
10031                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10032         } else {
10033                 struct btrfs_tree_block_info *info;
10034
10035                 info = (struct btrfs_tree_block_info *)(ei + 1);
10036                 skinny_level = btrfs_tree_block_level(leaf, info);
10037                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10038         }
10039
10040         if (eb) {
10041                 u64 header_gen;
10042                 u64 extent_gen;
10043
10044                 if (!(btrfs_extent_flags(leaf, ei) &
10045                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10046                         error(
10047                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10048                                 key.objectid, nodesize,
10049                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10050                         err = BACKREF_MISMATCH;
10051                 }
10052                 header_gen = btrfs_header_generation(eb);
10053                 extent_gen = btrfs_extent_generation(leaf, ei);
10054                 if (header_gen != extent_gen) {
10055                         error(
10056         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10057                                 key.objectid, nodesize, header_gen,
10058                                 extent_gen);
10059                         err = BACKREF_MISMATCH;
10060                 }
10061                 if (level != skinny_level) {
10062                         error(
10063                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10064                                 key.objectid, nodesize, level, skinny_level);
10065                         err = BACKREF_MISMATCH;
10066                 }
10067                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10068                         error(
10069                         "extent[%llu %u] is referred by other roots than %llu",
10070                                 key.objectid, nodesize, root->objectid);
10071                         err = BACKREF_MISMATCH;
10072                 }
10073         }
10074
10075         /*
10076          * Iterate the extent/metadata item to find the exact backref
10077          */
10078         item_size = btrfs_item_size_nr(leaf, slot);
10079         ptr = (unsigned long)iref;
10080         end = (unsigned long)ei + item_size;
10081         while (ptr < end) {
10082                 iref = (struct btrfs_extent_inline_ref *)ptr;
10083                 type = btrfs_extent_inline_ref_type(leaf, iref);
10084                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10085
10086                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10087                         (offset == root->objectid || offset == owner)) {
10088                         found_ref = 1;
10089                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10090                         /*
10091                          * Backref of tree reloc root points to itself, no need
10092                          * to check backref any more.
10093                          */
10094                         if (tree_reloc_root)
10095                                 found_ref = 1;
10096                         else
10097                         /* Check if the backref points to valid referencer */
10098                                 found_ref = !check_tree_block_ref(root, NULL,
10099                                                 offset, level + 1, owner);
10100                 }
10101
10102                 if (found_ref)
10103                         break;
10104                 ptr += btrfs_extent_inline_ref_size(type);
10105         }
10106
10107         /*
10108          * Inlined extent item doesn't have what we need, check
10109          * TREE_BLOCK_REF_KEY
10110          */
10111         if (!found_ref) {
10112                 btrfs_release_path(&path);
10113                 key.objectid = bytenr;
10114                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10115                 key.offset = root->objectid;
10116
10117                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10118                 if (!ret)
10119                         found_ref = 1;
10120         }
10121         if (!found_ref)
10122                 err |= BACKREF_MISSING;
10123 out:
10124         btrfs_release_path(&path);
10125         if (eb && (err & BACKREF_MISSING))
10126                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10127                         bytenr, nodesize, owner, level);
10128         return err;
10129 }
10130
10131 /*
10132  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10133  *
10134  * Return >0 any error found and output error message
10135  * Return 0 for no error found
10136  */
10137 static int check_extent_data_item(struct btrfs_root *root,
10138                                   struct extent_buffer *eb, int slot)
10139 {
10140         struct btrfs_file_extent_item *fi;
10141         struct btrfs_path path;
10142         struct btrfs_root *extent_root = root->fs_info->extent_root;
10143         struct btrfs_key fi_key;
10144         struct btrfs_key dbref_key;
10145         struct extent_buffer *leaf;
10146         struct btrfs_extent_item *ei;
10147         struct btrfs_extent_inline_ref *iref;
10148         struct btrfs_extent_data_ref *dref;
10149         u64 owner;
10150         u64 disk_bytenr;
10151         u64 disk_num_bytes;
10152         u64 extent_num_bytes;
10153         u64 extent_flags;
10154         u32 item_size;
10155         unsigned long end;
10156         unsigned long ptr;
10157         int type;
10158         u64 ref_root;
10159         int found_dbackref = 0;
10160         int err = 0;
10161         int ret;
10162
10163         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10164         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10165
10166         /* Nothing to check for hole and inline data extents */
10167         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10168             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10169                 return 0;
10170
10171         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10172         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10173         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10174
10175         /* Check unaligned disk_num_bytes and num_bytes */
10176         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10177                 error(
10178 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10179                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10180                         root->sectorsize);
10181                 err |= BYTES_UNALIGNED;
10182         } else {
10183                 data_bytes_allocated += disk_num_bytes;
10184         }
10185         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10186                 error(
10187 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10188                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10189                         root->sectorsize);
10190                 err |= BYTES_UNALIGNED;
10191         } else {
10192                 data_bytes_referenced += extent_num_bytes;
10193         }
10194         owner = btrfs_header_owner(eb);
10195
10196         /* Check the extent item of the file extent in extent tree */
10197         btrfs_init_path(&path);
10198         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10199         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10200         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10201
10202         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10203         if (ret) {
10204                 err |= BACKREF_MISSING;
10205                 goto error;
10206         }
10207
10208         leaf = path.nodes[0];
10209         slot = path.slots[0];
10210         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10211
10212         extent_flags = btrfs_extent_flags(leaf, ei);
10213
10214         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10215                 error(
10216                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10217                     disk_bytenr, disk_num_bytes,
10218                     BTRFS_EXTENT_FLAG_DATA);
10219                 err |= BACKREF_MISMATCH;
10220         }
10221
10222         /* Check data backref inside that extent item */
10223         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10224         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10225         ptr = (unsigned long)iref;
10226         end = (unsigned long)ei + item_size;
10227         while (ptr < end) {
10228                 iref = (struct btrfs_extent_inline_ref *)ptr;
10229                 type = btrfs_extent_inline_ref_type(leaf, iref);
10230                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10231
10232                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10233                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10234                         if (ref_root == owner || ref_root == root->objectid)
10235                                 found_dbackref = 1;
10236                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10237                         found_dbackref = !check_tree_block_ref(root, NULL,
10238                                 btrfs_extent_inline_ref_offset(leaf, iref),
10239                                 0, owner);
10240                 }
10241
10242                 if (found_dbackref)
10243                         break;
10244                 ptr += btrfs_extent_inline_ref_size(type);
10245         }
10246
10247         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
10248         if (!found_dbackref) {
10249                 btrfs_release_path(&path);
10250
10251                 btrfs_init_path(&path);
10252                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10253                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10254                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10255                                 fi_key.objectid, fi_key.offset);
10256
10257                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10258                                         &dbref_key, &path, 0, 0);
10259                 if (!ret)
10260                         found_dbackref = 1;
10261         }
10262
10263         if (!found_dbackref)
10264                 err |= BACKREF_MISSING;
10265 error:
10266         btrfs_release_path(&path);
10267         if (err & BACKREF_MISSING) {
10268                 error("data extent[%llu %llu] backref lost",
10269                       disk_bytenr, disk_num_bytes);
10270         }
10271         return err;
10272 }
10273
10274 /*
10275  * Get real tree block level for the case like shared block
10276  * Return >= 0 as tree level
10277  * Return <0 for error
10278  */
10279 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10280 {
10281         struct extent_buffer *eb;
10282         struct btrfs_path path;
10283         struct btrfs_key key;
10284         struct btrfs_extent_item *ei;
10285         u64 flags;
10286         u64 transid;
10287         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10288         u8 backref_level;
10289         u8 header_level;
10290         int ret;
10291
10292         /* Search extent tree for extent generation and level */
10293         key.objectid = bytenr;
10294         key.type = BTRFS_METADATA_ITEM_KEY;
10295         key.offset = (u64)-1;
10296
10297         btrfs_init_path(&path);
10298         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10299         if (ret < 0)
10300                 goto release_out;
10301         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10302         if (ret < 0)
10303                 goto release_out;
10304         if (ret > 0) {
10305                 ret = -ENOENT;
10306                 goto release_out;
10307         }
10308
10309         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10310         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10311                             struct btrfs_extent_item);
10312         flags = btrfs_extent_flags(path.nodes[0], ei);
10313         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10314                 ret = -ENOENT;
10315                 goto release_out;
10316         }
10317
10318         /* Get transid for later read_tree_block() check */
10319         transid = btrfs_extent_generation(path.nodes[0], ei);
10320
10321         /* Get backref level as one source */
10322         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10323                 backref_level = key.offset;
10324         } else {
10325                 struct btrfs_tree_block_info *info;
10326
10327                 info = (struct btrfs_tree_block_info *)(ei + 1);
10328                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10329         }
10330         btrfs_release_path(&path);
10331
10332         /* Get level from tree block as an alternative source */
10333         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10334         if (!extent_buffer_uptodate(eb)) {
10335                 free_extent_buffer(eb);
10336                 return -EIO;
10337         }
10338         header_level = btrfs_header_level(eb);
10339         free_extent_buffer(eb);
10340
10341         if (header_level != backref_level)
10342                 return -EIO;
10343         return header_level;
10344
10345 release_out:
10346         btrfs_release_path(&path);
10347         return ret;
10348 }
10349
10350 /*
10351  * Check if a tree block backref is valid (points to a valid tree block)
10352  * if level == -1, level will be resolved
10353  * Return >0 for any error found and print error message
10354  */
10355 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10356                                     u64 bytenr, int level)
10357 {
10358         struct btrfs_root *root;
10359         struct btrfs_key key;
10360         struct btrfs_path path;
10361         struct extent_buffer *eb;
10362         struct extent_buffer *node;
10363         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10364         int err = 0;
10365         int ret;
10366
10367         /* Query level for level == -1 special case */
10368         if (level == -1)
10369                 level = query_tree_block_level(fs_info, bytenr);
10370         if (level < 0) {
10371                 err |= REFERENCER_MISSING;
10372                 goto out;
10373         }
10374
10375         key.objectid = root_id;
10376         key.type = BTRFS_ROOT_ITEM_KEY;
10377         key.offset = (u64)-1;
10378
10379         root = btrfs_read_fs_root(fs_info, &key);
10380         if (IS_ERR(root)) {
10381                 err |= REFERENCER_MISSING;
10382                 goto out;
10383         }
10384
10385         /* Read out the tree block to get item/node key */
10386         eb = read_tree_block(root, bytenr, root->nodesize, 0);
10387         if (!extent_buffer_uptodate(eb)) {
10388                 err |= REFERENCER_MISSING;
10389                 free_extent_buffer(eb);
10390                 goto out;
10391         }
10392
10393         /* Empty tree, no need to check key */
10394         if (!btrfs_header_nritems(eb) && !level) {
10395                 free_extent_buffer(eb);
10396                 goto out;
10397         }
10398
10399         if (level)
10400                 btrfs_node_key_to_cpu(eb, &key, 0);
10401         else
10402                 btrfs_item_key_to_cpu(eb, &key, 0);
10403
10404         free_extent_buffer(eb);
10405
10406         btrfs_init_path(&path);
10407         path.lowest_level = level;
10408         /* Search with the first key, to ensure we can reach it */
10409         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10410         if (ret < 0) {
10411                 err |= REFERENCER_MISSING;
10412                 goto release_out;
10413         }
10414
10415         node = path.nodes[level];
10416         if (btrfs_header_bytenr(node) != bytenr) {
10417                 error(
10418         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10419                         bytenr, nodesize, bytenr,
10420                         btrfs_header_bytenr(node));
10421                 err |= REFERENCER_MISMATCH;
10422         }
10423         if (btrfs_header_level(node) != level) {
10424                 error(
10425         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10426                         bytenr, nodesize, level,
10427                         btrfs_header_level(node));
10428                 err |= REFERENCER_MISMATCH;
10429         }
10430
10431 release_out:
10432         btrfs_release_path(&path);
10433 out:
10434         if (err & REFERENCER_MISSING) {
10435                 if (level < 0)
10436                         error("extent [%llu %d] lost referencer (owner: %llu)",
10437                                 bytenr, nodesize, root_id);
10438                 else
10439                         error(
10440                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10441                                 bytenr, nodesize, root_id, level);
10442         }
10443
10444         return err;
10445 }
10446
10447 /*
10448  * Check if tree block @eb is tree reloc root.
10449  * Return 0 if it's not or any problem happens
10450  * Return 1 if it's a tree reloc root
10451  */
10452 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10453                                  struct extent_buffer *eb)
10454 {
10455         struct btrfs_root *tree_reloc_root;
10456         struct btrfs_key key;
10457         u64 bytenr = btrfs_header_bytenr(eb);
10458         u64 owner = btrfs_header_owner(eb);
10459         int ret = 0;
10460
10461         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10462         key.offset = owner;
10463         key.type = BTRFS_ROOT_ITEM_KEY;
10464
10465         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10466         if (IS_ERR(tree_reloc_root))
10467                 return 0;
10468
10469         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10470                 ret = 1;
10471         btrfs_free_fs_root(tree_reloc_root);
10472         return ret;
10473 }
10474
10475 /*
10476  * Check referencer for shared block backref
10477  * If level == -1, this function will resolve the level.
10478  */
10479 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10480                                      u64 parent, u64 bytenr, int level)
10481 {
10482         struct extent_buffer *eb;
10483         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10484         u32 nr;
10485         int found_parent = 0;
10486         int i;
10487
10488         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10489         if (!extent_buffer_uptodate(eb))
10490                 goto out;
10491
10492         if (level == -1)
10493                 level = query_tree_block_level(fs_info, bytenr);
10494         if (level < 0)
10495                 goto out;
10496
10497         /* It's possible it's a tree reloc root */
10498         if (parent == bytenr) {
10499                 if (is_tree_reloc_root(fs_info, eb))
10500                         found_parent = 1;
10501                 goto out;
10502         }
10503
10504         if (level + 1 != btrfs_header_level(eb))
10505                 goto out;
10506
10507         nr = btrfs_header_nritems(eb);
10508         for (i = 0; i < nr; i++) {
10509                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10510                         found_parent = 1;
10511                         break;
10512                 }
10513         }
10514 out:
10515         free_extent_buffer(eb);
10516         if (!found_parent) {
10517                 error(
10518         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10519                         bytenr, nodesize, parent, level);
10520                 return REFERENCER_MISSING;
10521         }
10522         return 0;
10523 }
10524
10525 /*
10526  * Check referencer for normal (inlined) data ref
10527  * If len == 0, it will be resolved by searching in extent tree
10528  */
10529 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10530                                      u64 root_id, u64 objectid, u64 offset,
10531                                      u64 bytenr, u64 len, u32 count)
10532 {
10533         struct btrfs_root *root;
10534         struct btrfs_root *extent_root = fs_info->extent_root;
10535         struct btrfs_key key;
10536         struct btrfs_path path;
10537         struct extent_buffer *leaf;
10538         struct btrfs_file_extent_item *fi;
10539         u32 found_count = 0;
10540         int slot;
10541         int ret = 0;
10542
10543         if (!len) {
10544                 key.objectid = bytenr;
10545                 key.type = BTRFS_EXTENT_ITEM_KEY;
10546                 key.offset = (u64)-1;
10547
10548                 btrfs_init_path(&path);
10549                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10550                 if (ret < 0)
10551                         goto out;
10552                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10553                 if (ret)
10554                         goto out;
10555                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10556                 if (key.objectid != bytenr ||
10557                     key.type != BTRFS_EXTENT_ITEM_KEY)
10558                         goto out;
10559                 len = key.offset;
10560                 btrfs_release_path(&path);
10561         }
10562         key.objectid = root_id;
10563         key.type = BTRFS_ROOT_ITEM_KEY;
10564         key.offset = (u64)-1;
10565         btrfs_init_path(&path);
10566
10567         root = btrfs_read_fs_root(fs_info, &key);
10568         if (IS_ERR(root))
10569                 goto out;
10570
10571         key.objectid = objectid;
10572         key.type = BTRFS_EXTENT_DATA_KEY;
10573         /*
10574          * It can be nasty as data backref offset is
10575          * file offset - file extent offset, which is smaller or
10576          * equal to original backref offset.  The only special case is
10577          * overflow.  So we need to special check and do further search.
10578          */
10579         key.offset = offset & (1ULL << 63) ? 0 : offset;
10580
10581         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10582         if (ret < 0)
10583                 goto out;
10584
10585         /*
10586          * Search afterwards to get correct one
10587          * NOTE: As we must do a comprehensive check on the data backref to
10588          * make sure the dref count also matches, we must iterate all file
10589          * extents for that inode.
10590          */
10591         while (1) {
10592                 leaf = path.nodes[0];
10593                 slot = path.slots[0];
10594
10595                 btrfs_item_key_to_cpu(leaf, &key, slot);
10596                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10597                         break;
10598                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10599                 /*
10600                  * Except normal disk bytenr and disk num bytes, we still
10601                  * need to do extra check on dbackref offset as
10602                  * dbackref offset = file_offset - file_extent_offset
10603                  */
10604                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10605                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10606                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10607                     offset)
10608                         found_count++;
10609
10610                 ret = btrfs_next_item(root, &path);
10611                 if (ret)
10612                         break;
10613         }
10614 out:
10615         btrfs_release_path(&path);
10616         if (found_count != count) {
10617                 error(
10618 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10619                         bytenr, len, root_id, objectid, offset, count, found_count);
10620                 return REFERENCER_MISSING;
10621         }
10622         return 0;
10623 }
10624
10625 /*
10626  * Check if the referencer of a shared data backref exists
10627  */
10628 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10629                                      u64 parent, u64 bytenr)
10630 {
10631         struct extent_buffer *eb;
10632         struct btrfs_key key;
10633         struct btrfs_file_extent_item *fi;
10634         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10635         u32 nr;
10636         int found_parent = 0;
10637         int i;
10638
10639         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10640         if (!extent_buffer_uptodate(eb))
10641                 goto out;
10642
10643         nr = btrfs_header_nritems(eb);
10644         for (i = 0; i < nr; i++) {
10645                 btrfs_item_key_to_cpu(eb, &key, i);
10646                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10647                         continue;
10648
10649                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10650                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10651                         continue;
10652
10653                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10654                         found_parent = 1;
10655                         break;
10656                 }
10657         }
10658
10659 out:
10660         free_extent_buffer(eb);
10661         if (!found_parent) {
10662                 error("shared extent %llu referencer lost (parent: %llu)",
10663                         bytenr, parent);
10664                 return REFERENCER_MISSING;
10665         }
10666         return 0;
10667 }
10668
10669 /*
10670  * This function will check a given extent item, including its backref and
10671  * itself (like crossing stripe boundary and type)
10672  *
10673  * Since we don't use extent_record anymore, introduce new error bit
10674  */
10675 static int check_extent_item(struct btrfs_fs_info *fs_info,
10676                              struct extent_buffer *eb, int slot)
10677 {
10678         struct btrfs_extent_item *ei;
10679         struct btrfs_extent_inline_ref *iref;
10680         struct btrfs_extent_data_ref *dref;
10681         unsigned long end;
10682         unsigned long ptr;
10683         int type;
10684         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10685         u32 item_size = btrfs_item_size_nr(eb, slot);
10686         u64 flags;
10687         u64 offset;
10688         int metadata = 0;
10689         int level;
10690         struct btrfs_key key;
10691         int ret;
10692         int err = 0;
10693
10694         btrfs_item_key_to_cpu(eb, &key, slot);
10695         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10696                 bytes_used += key.offset;
10697         else
10698                 bytes_used += nodesize;
10699
10700         if (item_size < sizeof(*ei)) {
10701                 /*
10702                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10703                  * old thing when on disk format is still un-determined.
10704                  * No need to care about it anymore
10705                  */
10706                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10707                 return -ENOTTY;
10708         }
10709
10710         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10711         flags = btrfs_extent_flags(eb, ei);
10712
10713         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10714                 metadata = 1;
10715         if (metadata && check_crossing_stripes(global_info, key.objectid,
10716                                                eb->len)) {
10717                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10718                       key.objectid, key.objectid + nodesize);
10719                 err |= CROSSING_STRIPE_BOUNDARY;
10720         }
10721
10722         ptr = (unsigned long)(ei + 1);
10723
10724         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10725                 /* Old EXTENT_ITEM metadata */
10726                 struct btrfs_tree_block_info *info;
10727
10728                 info = (struct btrfs_tree_block_info *)ptr;
10729                 level = btrfs_tree_block_level(eb, info);
10730                 ptr += sizeof(struct btrfs_tree_block_info);
10731         } else {
10732                 /* New METADATA_ITEM */
10733                 level = key.offset;
10734         }
10735         end = (unsigned long)ei + item_size;
10736
10737         if (ptr >= end) {
10738                 err |= ITEM_SIZE_MISMATCH;
10739                 goto out;
10740         }
10741
10742         /* Now check every backref in this extent item */
10743 next:
10744         iref = (struct btrfs_extent_inline_ref *)ptr;
10745         type = btrfs_extent_inline_ref_type(eb, iref);
10746         offset = btrfs_extent_inline_ref_offset(eb, iref);
10747         switch (type) {
10748         case BTRFS_TREE_BLOCK_REF_KEY:
10749                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10750                                                level);
10751                 err |= ret;
10752                 break;
10753         case BTRFS_SHARED_BLOCK_REF_KEY:
10754                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10755                                                  level);
10756                 err |= ret;
10757                 break;
10758         case BTRFS_EXTENT_DATA_REF_KEY:
10759                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10760                 ret = check_extent_data_backref(fs_info,
10761                                 btrfs_extent_data_ref_root(eb, dref),
10762                                 btrfs_extent_data_ref_objectid(eb, dref),
10763                                 btrfs_extent_data_ref_offset(eb, dref),
10764                                 key.objectid, key.offset,
10765                                 btrfs_extent_data_ref_count(eb, dref));
10766                 err |= ret;
10767                 break;
10768         case BTRFS_SHARED_DATA_REF_KEY:
10769                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10770                 err |= ret;
10771                 break;
10772         default:
10773                 error("extent[%llu %d %llu] has unknown ref type: %d",
10774                         key.objectid, key.type, key.offset, type);
10775                 err |= UNKNOWN_TYPE;
10776                 goto out;
10777         }
10778
10779         ptr += btrfs_extent_inline_ref_size(type);
10780         if (ptr < end)
10781                 goto next;
10782
10783 out:
10784         return err;
10785 }
10786
10787 /*
10788  * Check if a dev extent item is referred correctly by its chunk
10789  */
10790 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10791                                  struct extent_buffer *eb, int slot)
10792 {
10793         struct btrfs_root *chunk_root = fs_info->chunk_root;
10794         struct btrfs_dev_extent *ptr;
10795         struct btrfs_path path;
10796         struct btrfs_key chunk_key;
10797         struct btrfs_key devext_key;
10798         struct btrfs_chunk *chunk;
10799         struct extent_buffer *l;
10800         int num_stripes;
10801         u64 length;
10802         int i;
10803         int found_chunk = 0;
10804         int ret;
10805
10806         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10807         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10808         length = btrfs_dev_extent_length(eb, ptr);
10809
10810         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10811         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10812         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10813
10814         btrfs_init_path(&path);
10815         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10816         if (ret)
10817                 goto out;
10818
10819         l = path.nodes[0];
10820         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10821         if (btrfs_chunk_length(l, chunk) != length)
10822                 goto out;
10823
10824         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10825         for (i = 0; i < num_stripes; i++) {
10826                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10827                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10828
10829                 if (devid == devext_key.objectid &&
10830                     offset == devext_key.offset) {
10831                         found_chunk = 1;
10832                         break;
10833                 }
10834         }
10835 out:
10836         btrfs_release_path(&path);
10837         if (!found_chunk) {
10838                 error(
10839                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10840                         devext_key.objectid, devext_key.offset, length);
10841                 return REFERENCER_MISSING;
10842         }
10843         return 0;
10844 }
10845
10846 /*
10847  * Check if the used space is correct with the dev item
10848  */
10849 static int check_dev_item(struct btrfs_fs_info *fs_info,
10850                           struct extent_buffer *eb, int slot)
10851 {
10852         struct btrfs_root *dev_root = fs_info->dev_root;
10853         struct btrfs_dev_item *dev_item;
10854         struct btrfs_path path;
10855         struct btrfs_key key;
10856         struct btrfs_dev_extent *ptr;
10857         u64 dev_id;
10858         u64 used;
10859         u64 total = 0;
10860         int ret;
10861
10862         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10863         dev_id = btrfs_device_id(eb, dev_item);
10864         used = btrfs_device_bytes_used(eb, dev_item);
10865
10866         key.objectid = dev_id;
10867         key.type = BTRFS_DEV_EXTENT_KEY;
10868         key.offset = 0;
10869
10870         btrfs_init_path(&path);
10871         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10872         if (ret < 0) {
10873                 btrfs_item_key_to_cpu(eb, &key, slot);
10874                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10875                         key.objectid, key.type, key.offset);
10876                 btrfs_release_path(&path);
10877                 return REFERENCER_MISSING;
10878         }
10879
10880         /* Iterate dev_extents to calculate the used space of a device */
10881         while (1) {
10882                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10883
10884                 if (key.objectid > dev_id)
10885                         break;
10886                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10887                         goto next;
10888
10889                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10890                                      struct btrfs_dev_extent);
10891                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10892 next:
10893                 ret = btrfs_next_item(dev_root, &path);
10894                 if (ret)
10895                         break;
10896         }
10897         btrfs_release_path(&path);
10898
10899         if (used != total) {
10900                 btrfs_item_key_to_cpu(eb, &key, slot);
10901                 error(
10902 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10903                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10904                         BTRFS_DEV_EXTENT_KEY, dev_id);
10905                 return ACCOUNTING_MISMATCH;
10906         }
10907         return 0;
10908 }
10909
10910 /*
10911  * Check a block group item with its referener (chunk) and its used space
10912  * with extent/metadata item
10913  */
10914 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10915                                   struct extent_buffer *eb, int slot)
10916 {
10917         struct btrfs_root *extent_root = fs_info->extent_root;
10918         struct btrfs_root *chunk_root = fs_info->chunk_root;
10919         struct btrfs_block_group_item *bi;
10920         struct btrfs_block_group_item bg_item;
10921         struct btrfs_path path;
10922         struct btrfs_key bg_key;
10923         struct btrfs_key chunk_key;
10924         struct btrfs_key extent_key;
10925         struct btrfs_chunk *chunk;
10926         struct extent_buffer *leaf;
10927         struct btrfs_extent_item *ei;
10928         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10929         u64 flags;
10930         u64 bg_flags;
10931         u64 used;
10932         u64 total = 0;
10933         int ret;
10934         int err = 0;
10935
10936         btrfs_item_key_to_cpu(eb, &bg_key, slot);
10937         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10938         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10939         used = btrfs_block_group_used(&bg_item);
10940         bg_flags = btrfs_block_group_flags(&bg_item);
10941
10942         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10943         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10944         chunk_key.offset = bg_key.objectid;
10945
10946         btrfs_init_path(&path);
10947         /* Search for the referencer chunk */
10948         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10949         if (ret) {
10950                 error(
10951                 "block group[%llu %llu] did not find the related chunk item",
10952                         bg_key.objectid, bg_key.offset);
10953                 err |= REFERENCER_MISSING;
10954         } else {
10955                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10956                                         struct btrfs_chunk);
10957                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10958                                                 bg_key.offset) {
10959                         error(
10960         "block group[%llu %llu] related chunk item length does not match",
10961                                 bg_key.objectid, bg_key.offset);
10962                         err |= REFERENCER_MISMATCH;
10963                 }
10964         }
10965         btrfs_release_path(&path);
10966
10967         /* Search from the block group bytenr */
10968         extent_key.objectid = bg_key.objectid;
10969         extent_key.type = 0;
10970         extent_key.offset = 0;
10971
10972         btrfs_init_path(&path);
10973         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10974         if (ret < 0)
10975                 goto out;
10976
10977         /* Iterate extent tree to account used space */
10978         while (1) {
10979                 leaf = path.nodes[0];
10980                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
10981                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
10982                         break;
10983
10984                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
10985                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
10986                         goto next;
10987                 if (extent_key.objectid < bg_key.objectid)
10988                         goto next;
10989
10990                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
10991                         total += nodesize;
10992                 else
10993                         total += extent_key.offset;
10994
10995                 ei = btrfs_item_ptr(leaf, path.slots[0],
10996                                     struct btrfs_extent_item);
10997                 flags = btrfs_extent_flags(leaf, ei);
10998                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
10999                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11000                                 error(
11001                         "bad extent[%llu, %llu) type mismatch with chunk",
11002                                         extent_key.objectid,
11003                                         extent_key.objectid + extent_key.offset);
11004                                 err |= CHUNK_TYPE_MISMATCH;
11005                         }
11006                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11007                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11008                                     BTRFS_BLOCK_GROUP_METADATA))) {
11009                                 error(
11010                         "bad extent[%llu, %llu) type mismatch with chunk",
11011                                         extent_key.objectid,
11012                                         extent_key.objectid + nodesize);
11013                                 err |= CHUNK_TYPE_MISMATCH;
11014                         }
11015                 }
11016 next:
11017                 ret = btrfs_next_item(extent_root, &path);
11018                 if (ret)
11019                         break;
11020         }
11021
11022 out:
11023         btrfs_release_path(&path);
11024
11025         if (total != used) {
11026                 error(
11027                 "block group[%llu %llu] used %llu but extent items used %llu",
11028                         bg_key.objectid, bg_key.offset, used, total);
11029                 err |= ACCOUNTING_MISMATCH;
11030         }
11031         return err;
11032 }
11033
11034 /*
11035  * Check a chunk item.
11036  * Including checking all referred dev_extents and block group
11037  */
11038 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11039                             struct extent_buffer *eb, int slot)
11040 {
11041         struct btrfs_root *extent_root = fs_info->extent_root;
11042         struct btrfs_root *dev_root = fs_info->dev_root;
11043         struct btrfs_path path;
11044         struct btrfs_key chunk_key;
11045         struct btrfs_key bg_key;
11046         struct btrfs_key devext_key;
11047         struct btrfs_chunk *chunk;
11048         struct extent_buffer *leaf;
11049         struct btrfs_block_group_item *bi;
11050         struct btrfs_block_group_item bg_item;
11051         struct btrfs_dev_extent *ptr;
11052         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11053         u64 length;
11054         u64 chunk_end;
11055         u64 type;
11056         u64 profile;
11057         int num_stripes;
11058         u64 offset;
11059         u64 objectid;
11060         int i;
11061         int ret;
11062         int err = 0;
11063
11064         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11065         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11066         length = btrfs_chunk_length(eb, chunk);
11067         chunk_end = chunk_key.offset + length;
11068         if (!IS_ALIGNED(length, sectorsize)) {
11069                 error("chunk[%llu %llu) not aligned to %u",
11070                         chunk_key.offset, chunk_end, sectorsize);
11071                 err |= BYTES_UNALIGNED;
11072                 goto out;
11073         }
11074
11075         type = btrfs_chunk_type(eb, chunk);
11076         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11077         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11078                 error("chunk[%llu %llu) has no chunk type",
11079                         chunk_key.offset, chunk_end);
11080                 err |= UNKNOWN_TYPE;
11081         }
11082         if (profile && (profile & (profile - 1))) {
11083                 error("chunk[%llu %llu) multiple profiles detected: %llx",
11084                         chunk_key.offset, chunk_end, profile);
11085                 err |= UNKNOWN_TYPE;
11086         }
11087
11088         bg_key.objectid = chunk_key.offset;
11089         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11090         bg_key.offset = length;
11091
11092         btrfs_init_path(&path);
11093         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11094         if (ret) {
11095                 error(
11096                 "chunk[%llu %llu) did not find the related block group item",
11097                         chunk_key.offset, chunk_end);
11098                 err |= REFERENCER_MISSING;
11099         } else{
11100                 leaf = path.nodes[0];
11101                 bi = btrfs_item_ptr(leaf, path.slots[0],
11102                                     struct btrfs_block_group_item);
11103                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11104                                    sizeof(bg_item));
11105                 if (btrfs_block_group_flags(&bg_item) != type) {
11106                         error(
11107 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11108                                 chunk_key.offset, chunk_end, type,
11109                                 btrfs_block_group_flags(&bg_item));
11110                         err |= REFERENCER_MISSING;
11111                 }
11112         }
11113
11114         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11115         for (i = 0; i < num_stripes; i++) {
11116                 btrfs_release_path(&path);
11117                 btrfs_init_path(&path);
11118                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11119                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11120                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11121
11122                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11123                                         0, 0);
11124                 if (ret)
11125                         goto not_match_dev;
11126
11127                 leaf = path.nodes[0];
11128                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11129                                      struct btrfs_dev_extent);
11130                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11131                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11132                 if (objectid != chunk_key.objectid ||
11133                     offset != chunk_key.offset ||
11134                     btrfs_dev_extent_length(leaf, ptr) != length)
11135                         goto not_match_dev;
11136                 continue;
11137 not_match_dev:
11138                 err |= BACKREF_MISSING;
11139                 error(
11140                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11141                         chunk_key.objectid, chunk_end, i);
11142                 continue;
11143         }
11144         btrfs_release_path(&path);
11145 out:
11146         return err;
11147 }
11148
11149 /*
11150  * Main entry function to check known items and update related accounting info
11151  */
11152 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11153 {
11154         struct btrfs_fs_info *fs_info = root->fs_info;
11155         struct btrfs_key key;
11156         int slot = 0;
11157         int type;
11158         struct btrfs_extent_data_ref *dref;
11159         int ret;
11160         int err = 0;
11161
11162 next:
11163         btrfs_item_key_to_cpu(eb, &key, slot);
11164         type = key.type;
11165
11166         switch (type) {
11167         case BTRFS_EXTENT_DATA_KEY:
11168                 ret = check_extent_data_item(root, eb, slot);
11169                 err |= ret;
11170                 break;
11171         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11172                 ret = check_block_group_item(fs_info, eb, slot);
11173                 err |= ret;
11174                 break;
11175         case BTRFS_DEV_ITEM_KEY:
11176                 ret = check_dev_item(fs_info, eb, slot);
11177                 err |= ret;
11178                 break;
11179         case BTRFS_CHUNK_ITEM_KEY:
11180                 ret = check_chunk_item(fs_info, eb, slot);
11181                 err |= ret;
11182                 break;
11183         case BTRFS_DEV_EXTENT_KEY:
11184                 ret = check_dev_extent_item(fs_info, eb, slot);
11185                 err |= ret;
11186                 break;
11187         case BTRFS_EXTENT_ITEM_KEY:
11188         case BTRFS_METADATA_ITEM_KEY:
11189                 ret = check_extent_item(fs_info, eb, slot);
11190                 err |= ret;
11191                 break;
11192         case BTRFS_EXTENT_CSUM_KEY:
11193                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11194                 break;
11195         case BTRFS_TREE_BLOCK_REF_KEY:
11196                 ret = check_tree_block_backref(fs_info, key.offset,
11197                                                key.objectid, -1);
11198                 err |= ret;
11199                 break;
11200         case BTRFS_EXTENT_DATA_REF_KEY:
11201                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11202                 ret = check_extent_data_backref(fs_info,
11203                                 btrfs_extent_data_ref_root(eb, dref),
11204                                 btrfs_extent_data_ref_objectid(eb, dref),
11205                                 btrfs_extent_data_ref_offset(eb, dref),
11206                                 key.objectid, 0,
11207                                 btrfs_extent_data_ref_count(eb, dref));
11208                 err |= ret;
11209                 break;
11210         case BTRFS_SHARED_BLOCK_REF_KEY:
11211                 ret = check_shared_block_backref(fs_info, key.offset,
11212                                                  key.objectid, -1);
11213                 err |= ret;
11214                 break;
11215         case BTRFS_SHARED_DATA_REF_KEY:
11216                 ret = check_shared_data_backref(fs_info, key.offset,
11217                                                 key.objectid);
11218                 err |= ret;
11219                 break;
11220         default:
11221                 break;
11222         }
11223
11224         if (++slot < btrfs_header_nritems(eb))
11225                 goto next;
11226
11227         return err;
11228 }
11229
11230 /*
11231  * Helper function for later fs/subvol tree check.  To determine if a tree
11232  * block should be checked.
11233  * This function will ensure only the direct referencer with lowest rootid to
11234  * check a fs/subvolume tree block.
11235  *
11236  * Backref check at extent tree would detect errors like missing subvolume
11237  * tree, so we can do aggressive check to reduce duplicated checks.
11238  */
11239 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11240 {
11241         struct btrfs_root *extent_root = root->fs_info->extent_root;
11242         struct btrfs_key key;
11243         struct btrfs_path path;
11244         struct extent_buffer *leaf;
11245         int slot;
11246         struct btrfs_extent_item *ei;
11247         unsigned long ptr;
11248         unsigned long end;
11249         int type;
11250         u32 item_size;
11251         u64 offset;
11252         struct btrfs_extent_inline_ref *iref;
11253         int ret;
11254
11255         btrfs_init_path(&path);
11256         key.objectid = btrfs_header_bytenr(eb);
11257         key.type = BTRFS_METADATA_ITEM_KEY;
11258         key.offset = (u64)-1;
11259
11260         /*
11261          * Any failure in backref resolving means we can't determine
11262          * whom the tree block belongs to.
11263          * So in that case, we need to check that tree block
11264          */
11265         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11266         if (ret < 0)
11267                 goto need_check;
11268
11269         ret = btrfs_previous_extent_item(extent_root, &path,
11270                                          btrfs_header_bytenr(eb));
11271         if (ret)
11272                 goto need_check;
11273
11274         leaf = path.nodes[0];
11275         slot = path.slots[0];
11276         btrfs_item_key_to_cpu(leaf, &key, slot);
11277         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11278
11279         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11280                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11281         } else {
11282                 struct btrfs_tree_block_info *info;
11283
11284                 info = (struct btrfs_tree_block_info *)(ei + 1);
11285                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11286         }
11287
11288         item_size = btrfs_item_size_nr(leaf, slot);
11289         ptr = (unsigned long)iref;
11290         end = (unsigned long)ei + item_size;
11291         while (ptr < end) {
11292                 iref = (struct btrfs_extent_inline_ref *)ptr;
11293                 type = btrfs_extent_inline_ref_type(leaf, iref);
11294                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11295
11296                 /*
11297                  * We only check the tree block if current root is
11298                  * the lowest referencer of it.
11299                  */
11300                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11301                     offset < root->objectid) {
11302                         btrfs_release_path(&path);
11303                         return 0;
11304                 }
11305
11306                 ptr += btrfs_extent_inline_ref_size(type);
11307         }
11308         /*
11309          * Normally we should also check keyed tree block ref, but that may be
11310          * very time consuming.  Inlined ref should already make us skip a lot
11311          * of refs now.  So skip search keyed tree block ref.
11312          */
11313
11314 need_check:
11315         btrfs_release_path(&path);
11316         return 1;
11317 }
11318
11319 /*
11320  * Traversal function for tree block. We will do:
11321  * 1) Skip shared fs/subvolume tree blocks
11322  * 2) Update related bytes accounting
11323  * 3) Pre-order traversal
11324  */
11325 static int traverse_tree_block(struct btrfs_root *root,
11326                                 struct extent_buffer *node)
11327 {
11328         struct extent_buffer *eb;
11329         struct btrfs_key key;
11330         struct btrfs_key drop_key;
11331         int level;
11332         u64 nr;
11333         int i;
11334         int err = 0;
11335         int ret;
11336
11337         /*
11338          * Skip shared fs/subvolume tree block, in that case they will
11339          * be checked by referencer with lowest rootid
11340          */
11341         if (is_fstree(root->objectid) && !should_check(root, node))
11342                 return 0;
11343
11344         /* Update bytes accounting */
11345         total_btree_bytes += node->len;
11346         if (fs_root_objectid(btrfs_header_owner(node)))
11347                 total_fs_tree_bytes += node->len;
11348         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11349                 total_extent_tree_bytes += node->len;
11350         if (!found_old_backref &&
11351             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11352             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11353             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11354                 found_old_backref = 1;
11355
11356         /* pre-order tranversal, check itself first */
11357         level = btrfs_header_level(node);
11358         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11359                                    btrfs_header_level(node),
11360                                    btrfs_header_owner(node));
11361         err |= ret;
11362         if (err)
11363                 error(
11364         "check %s failed root %llu bytenr %llu level %d, force continue check",
11365                         level ? "node":"leaf", root->objectid,
11366                         btrfs_header_bytenr(node), btrfs_header_level(node));
11367
11368         if (!level) {
11369                 btree_space_waste += btrfs_leaf_free_space(root, node);
11370                 ret = check_leaf_items(root, node);
11371                 err |= ret;
11372                 return err;
11373         }
11374
11375         nr = btrfs_header_nritems(node);
11376         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11377         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11378                 sizeof(struct btrfs_key_ptr);
11379
11380         /* Then check all its children */
11381         for (i = 0; i < nr; i++) {
11382                 u64 blocknr = btrfs_node_blockptr(node, i);
11383
11384                 btrfs_node_key_to_cpu(node, &key, i);
11385                 if (level == root->root_item.drop_level &&
11386                     is_dropped_key(&key, &drop_key))
11387                         continue;
11388
11389                 /*
11390                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11391                  * to call the function itself.
11392                  */
11393                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11394                 if (extent_buffer_uptodate(eb)) {
11395                         ret = traverse_tree_block(root, eb);
11396                         err |= ret;
11397                 }
11398                 free_extent_buffer(eb);
11399         }
11400
11401         return err;
11402 }
11403
11404 /*
11405  * Low memory usage version check_chunks_and_extents.
11406  */
11407 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11408 {
11409         struct btrfs_path path;
11410         struct btrfs_key key;
11411         struct btrfs_root *root1;
11412         struct btrfs_root *cur_root;
11413         int err = 0;
11414         int ret;
11415
11416         root1 = root->fs_info->chunk_root;
11417         ret = traverse_tree_block(root1, root1->node);
11418         err |= ret;
11419
11420         root1 = root->fs_info->tree_root;
11421         ret = traverse_tree_block(root1, root1->node);
11422         err |= ret;
11423
11424         btrfs_init_path(&path);
11425         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11426         key.offset = 0;
11427         key.type = BTRFS_ROOT_ITEM_KEY;
11428
11429         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11430         if (ret) {
11431                 error("cannot find extent treet in tree_root");
11432                 goto out;
11433         }
11434
11435         while (1) {
11436                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11437                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11438                         goto next;
11439                 key.offset = (u64)-1;
11440
11441                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11442                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11443                                         &key);
11444                 else
11445                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11446                 if (IS_ERR(cur_root) || !cur_root) {
11447                         error("failed to read tree: %lld", key.objectid);
11448                         goto next;
11449                 }
11450
11451                 ret = traverse_tree_block(cur_root, cur_root->node);
11452                 err |= ret;
11453
11454                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11455                         btrfs_free_fs_root(cur_root);
11456 next:
11457                 ret = btrfs_next_item(root1, &path);
11458                 if (ret)
11459                         goto out;
11460         }
11461
11462 out:
11463         btrfs_release_path(&path);
11464         return err;
11465 }
11466
11467 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11468                            struct btrfs_root *root, int overwrite)
11469 {
11470         struct extent_buffer *c;
11471         struct extent_buffer *old = root->node;
11472         int level;
11473         int ret;
11474         struct btrfs_disk_key disk_key = {0,0,0};
11475
11476         level = 0;
11477
11478         if (overwrite) {
11479                 c = old;
11480                 extent_buffer_get(c);
11481                 goto init;
11482         }
11483         c = btrfs_alloc_free_block(trans, root,
11484                                    root->nodesize,
11485                                    root->root_key.objectid,
11486                                    &disk_key, level, 0, 0);
11487         if (IS_ERR(c)) {
11488                 c = old;
11489                 extent_buffer_get(c);
11490                 overwrite = 1;
11491         }
11492 init:
11493         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11494         btrfs_set_header_level(c, level);
11495         btrfs_set_header_bytenr(c, c->start);
11496         btrfs_set_header_generation(c, trans->transid);
11497         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11498         btrfs_set_header_owner(c, root->root_key.objectid);
11499
11500         write_extent_buffer(c, root->fs_info->fsid,
11501                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11502
11503         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11504                             btrfs_header_chunk_tree_uuid(c),
11505                             BTRFS_UUID_SIZE);
11506
11507         btrfs_mark_buffer_dirty(c);
11508         /*
11509          * this case can happen in the following case:
11510          *
11511          * 1.overwrite previous root.
11512          *
11513          * 2.reinit reloc data root, this is because we skip pin
11514          * down reloc data tree before which means we can allocate
11515          * same block bytenr here.
11516          */
11517         if (old->start == c->start) {
11518                 btrfs_set_root_generation(&root->root_item,
11519                                           trans->transid);
11520                 root->root_item.level = btrfs_header_level(root->node);
11521                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11522                                         &root->root_key, &root->root_item);
11523                 if (ret) {
11524                         free_extent_buffer(c);
11525                         return ret;
11526                 }
11527         }
11528         free_extent_buffer(old);
11529         root->node = c;
11530         add_root_to_dirty_list(root);
11531         return 0;
11532 }
11533
11534 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11535                                 struct extent_buffer *eb, int tree_root)
11536 {
11537         struct extent_buffer *tmp;
11538         struct btrfs_root_item *ri;
11539         struct btrfs_key key;
11540         u64 bytenr;
11541         u32 nodesize;
11542         int level = btrfs_header_level(eb);
11543         int nritems;
11544         int ret;
11545         int i;
11546
11547         /*
11548          * If we have pinned this block before, don't pin it again.
11549          * This can not only avoid forever loop with broken filesystem
11550          * but also give us some speedups.
11551          */
11552         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11553                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11554                 return 0;
11555
11556         btrfs_pin_extent(fs_info, eb->start, eb->len);
11557
11558         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11559         nritems = btrfs_header_nritems(eb);
11560         for (i = 0; i < nritems; i++) {
11561                 if (level == 0) {
11562                         btrfs_item_key_to_cpu(eb, &key, i);
11563                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11564                                 continue;
11565                         /* Skip the extent root and reloc roots */
11566                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11567                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11568                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11569                                 continue;
11570                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11571                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11572
11573                         /*
11574                          * If at any point we start needing the real root we
11575                          * will have to build a stump root for the root we are
11576                          * in, but for now this doesn't actually use the root so
11577                          * just pass in extent_root.
11578                          */
11579                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11580                                               nodesize, 0);
11581                         if (!extent_buffer_uptodate(tmp)) {
11582                                 fprintf(stderr, "Error reading root block\n");
11583                                 return -EIO;
11584                         }
11585                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11586                         free_extent_buffer(tmp);
11587                         if (ret)
11588                                 return ret;
11589                 } else {
11590                         bytenr = btrfs_node_blockptr(eb, i);
11591
11592                         /* If we aren't the tree root don't read the block */
11593                         if (level == 1 && !tree_root) {
11594                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11595                                 continue;
11596                         }
11597
11598                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11599                                               nodesize, 0);
11600                         if (!extent_buffer_uptodate(tmp)) {
11601                                 fprintf(stderr, "Error reading tree block\n");
11602                                 return -EIO;
11603                         }
11604                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11605                         free_extent_buffer(tmp);
11606                         if (ret)
11607                                 return ret;
11608                 }
11609         }
11610
11611         return 0;
11612 }
11613
11614 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11615 {
11616         int ret;
11617
11618         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11619         if (ret)
11620                 return ret;
11621
11622         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11623 }
11624
11625 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11626 {
11627         struct btrfs_block_group_cache *cache;
11628         struct btrfs_path path;
11629         struct extent_buffer *leaf;
11630         struct btrfs_chunk *chunk;
11631         struct btrfs_key key;
11632         int ret;
11633         u64 start;
11634
11635         btrfs_init_path(&path);
11636         key.objectid = 0;
11637         key.type = BTRFS_CHUNK_ITEM_KEY;
11638         key.offset = 0;
11639         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11640         if (ret < 0) {
11641                 btrfs_release_path(&path);
11642                 return ret;
11643         }
11644
11645         /*
11646          * We do this in case the block groups were screwed up and had alloc
11647          * bits that aren't actually set on the chunks.  This happens with
11648          * restored images every time and could happen in real life I guess.
11649          */
11650         fs_info->avail_data_alloc_bits = 0;
11651         fs_info->avail_metadata_alloc_bits = 0;
11652         fs_info->avail_system_alloc_bits = 0;
11653
11654         /* First we need to create the in-memory block groups */
11655         while (1) {
11656                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11657                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11658                         if (ret < 0) {
11659                                 btrfs_release_path(&path);
11660                                 return ret;
11661                         }
11662                         if (ret) {
11663                                 ret = 0;
11664                                 break;
11665                         }
11666                 }
11667                 leaf = path.nodes[0];
11668                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11669                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11670                         path.slots[0]++;
11671                         continue;
11672                 }
11673
11674                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11675                 btrfs_add_block_group(fs_info, 0,
11676                                       btrfs_chunk_type(leaf, chunk),
11677                                       key.objectid, key.offset,
11678                                       btrfs_chunk_length(leaf, chunk));
11679                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11680                                  key.offset + btrfs_chunk_length(leaf, chunk),
11681                                  GFP_NOFS);
11682                 path.slots[0]++;
11683         }
11684         start = 0;
11685         while (1) {
11686                 cache = btrfs_lookup_first_block_group(fs_info, start);
11687                 if (!cache)
11688                         break;
11689                 cache->cached = 1;
11690                 start = cache->key.objectid + cache->key.offset;
11691         }
11692
11693         btrfs_release_path(&path);
11694         return 0;
11695 }
11696
11697 static int reset_balance(struct btrfs_trans_handle *trans,
11698                          struct btrfs_fs_info *fs_info)
11699 {
11700         struct btrfs_root *root = fs_info->tree_root;
11701         struct btrfs_path path;
11702         struct extent_buffer *leaf;
11703         struct btrfs_key key;
11704         int del_slot, del_nr = 0;
11705         int ret;
11706         int found = 0;
11707
11708         btrfs_init_path(&path);
11709         key.objectid = BTRFS_BALANCE_OBJECTID;
11710         key.type = BTRFS_BALANCE_ITEM_KEY;
11711         key.offset = 0;
11712         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11713         if (ret) {
11714                 if (ret > 0)
11715                         ret = 0;
11716                 if (!ret)
11717                         goto reinit_data_reloc;
11718                 else
11719                         goto out;
11720         }
11721
11722         ret = btrfs_del_item(trans, root, &path);
11723         if (ret)
11724                 goto out;
11725         btrfs_release_path(&path);
11726
11727         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11728         key.type = BTRFS_ROOT_ITEM_KEY;
11729         key.offset = 0;
11730         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11731         if (ret < 0)
11732                 goto out;
11733         while (1) {
11734                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11735                         if (!found)
11736                                 break;
11737
11738                         if (del_nr) {
11739                                 ret = btrfs_del_items(trans, root, &path,
11740                                                       del_slot, del_nr);
11741                                 del_nr = 0;
11742                                 if (ret)
11743                                         goto out;
11744                         }
11745                         key.offset++;
11746                         btrfs_release_path(&path);
11747
11748                         found = 0;
11749                         ret = btrfs_search_slot(trans, root, &key, &path,
11750                                                 -1, 1);
11751                         if (ret < 0)
11752                                 goto out;
11753                         continue;
11754                 }
11755                 found = 1;
11756                 leaf = path.nodes[0];
11757                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11758                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11759                         break;
11760                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11761                         path.slots[0]++;
11762                         continue;
11763                 }
11764                 if (!del_nr) {
11765                         del_slot = path.slots[0];
11766                         del_nr = 1;
11767                 } else {
11768                         del_nr++;
11769                 }
11770                 path.slots[0]++;
11771         }
11772
11773         if (del_nr) {
11774                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11775                 if (ret)
11776                         goto out;
11777         }
11778         btrfs_release_path(&path);
11779
11780 reinit_data_reloc:
11781         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11782         key.type = BTRFS_ROOT_ITEM_KEY;
11783         key.offset = (u64)-1;
11784         root = btrfs_read_fs_root(fs_info, &key);
11785         if (IS_ERR(root)) {
11786                 fprintf(stderr, "Error reading data reloc tree\n");
11787                 ret = PTR_ERR(root);
11788                 goto out;
11789         }
11790         record_root_in_trans(trans, root);
11791         ret = btrfs_fsck_reinit_root(trans, root, 0);
11792         if (ret)
11793                 goto out;
11794         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11795 out:
11796         btrfs_release_path(&path);
11797         return ret;
11798 }
11799
11800 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11801                               struct btrfs_fs_info *fs_info)
11802 {
11803         u64 start = 0;
11804         int ret;
11805
11806         /*
11807          * The only reason we don't do this is because right now we're just
11808          * walking the trees we find and pinning down their bytes, we don't look
11809          * at any of the leaves.  In order to do mixed groups we'd have to check
11810          * the leaves of any fs roots and pin down the bytes for any file
11811          * extents we find.  Not hard but why do it if we don't have to?
11812          */
11813         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11814                 fprintf(stderr, "We don't support re-initing the extent tree "
11815                         "for mixed block groups yet, please notify a btrfs "
11816                         "developer you want to do this so they can add this "
11817                         "functionality.\n");
11818                 return -EINVAL;
11819         }
11820
11821         /*
11822          * first we need to walk all of the trees except the extent tree and pin
11823          * down the bytes that are in use so we don't overwrite any existing
11824          * metadata.
11825          */
11826         ret = pin_metadata_blocks(fs_info);
11827         if (ret) {
11828                 fprintf(stderr, "error pinning down used bytes\n");
11829                 return ret;
11830         }
11831
11832         /*
11833          * Need to drop all the block groups since we're going to recreate all
11834          * of them again.
11835          */
11836         btrfs_free_block_groups(fs_info);
11837         ret = reset_block_groups(fs_info);
11838         if (ret) {
11839                 fprintf(stderr, "error resetting the block groups\n");
11840                 return ret;
11841         }
11842
11843         /* Ok we can allocate now, reinit the extent root */
11844         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11845         if (ret) {
11846                 fprintf(stderr, "extent root initialization failed\n");
11847                 /*
11848                  * When the transaction code is updated we should end the
11849                  * transaction, but for now progs only knows about commit so
11850                  * just return an error.
11851                  */
11852                 return ret;
11853         }
11854
11855         /*
11856          * Now we have all the in-memory block groups setup so we can make
11857          * allocations properly, and the metadata we care about is safe since we
11858          * pinned all of it above.
11859          */
11860         while (1) {
11861                 struct btrfs_block_group_cache *cache;
11862
11863                 cache = btrfs_lookup_first_block_group(fs_info, start);
11864                 if (!cache)
11865                         break;
11866                 start = cache->key.objectid + cache->key.offset;
11867                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11868                                         &cache->key, &cache->item,
11869                                         sizeof(cache->item));
11870                 if (ret) {
11871                         fprintf(stderr, "Error adding block group\n");
11872                         return ret;
11873                 }
11874                 btrfs_extent_post_op(trans, fs_info->extent_root);
11875         }
11876
11877         ret = reset_balance(trans, fs_info);
11878         if (ret)
11879                 fprintf(stderr, "error resetting the pending balance\n");
11880
11881         return ret;
11882 }
11883
11884 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11885 {
11886         struct btrfs_path path;
11887         struct btrfs_trans_handle *trans;
11888         struct btrfs_key key;
11889         int ret;
11890
11891         printf("Recowing metadata block %llu\n", eb->start);
11892         key.objectid = btrfs_header_owner(eb);
11893         key.type = BTRFS_ROOT_ITEM_KEY;
11894         key.offset = (u64)-1;
11895
11896         root = btrfs_read_fs_root(root->fs_info, &key);
11897         if (IS_ERR(root)) {
11898                 fprintf(stderr, "Couldn't find owner root %llu\n",
11899                         key.objectid);
11900                 return PTR_ERR(root);
11901         }
11902
11903         trans = btrfs_start_transaction(root, 1);
11904         if (IS_ERR(trans))
11905                 return PTR_ERR(trans);
11906
11907         btrfs_init_path(&path);
11908         path.lowest_level = btrfs_header_level(eb);
11909         if (path.lowest_level)
11910                 btrfs_node_key_to_cpu(eb, &key, 0);
11911         else
11912                 btrfs_item_key_to_cpu(eb, &key, 0);
11913
11914         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11915         btrfs_commit_transaction(trans, root);
11916         btrfs_release_path(&path);
11917         return ret;
11918 }
11919
11920 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11921 {
11922         struct btrfs_path path;
11923         struct btrfs_trans_handle *trans;
11924         struct btrfs_key key;
11925         int ret;
11926
11927         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11928                bad->key.type, bad->key.offset);
11929         key.objectid = bad->root_id;
11930         key.type = BTRFS_ROOT_ITEM_KEY;
11931         key.offset = (u64)-1;
11932
11933         root = btrfs_read_fs_root(root->fs_info, &key);
11934         if (IS_ERR(root)) {
11935                 fprintf(stderr, "Couldn't find owner root %llu\n",
11936                         key.objectid);
11937                 return PTR_ERR(root);
11938         }
11939
11940         trans = btrfs_start_transaction(root, 1);
11941         if (IS_ERR(trans))
11942                 return PTR_ERR(trans);
11943
11944         btrfs_init_path(&path);
11945         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11946         if (ret) {
11947                 if (ret > 0)
11948                         ret = 0;
11949                 goto out;
11950         }
11951         ret = btrfs_del_item(trans, root, &path);
11952 out:
11953         btrfs_commit_transaction(trans, root);
11954         btrfs_release_path(&path);
11955         return ret;
11956 }
11957
11958 static int zero_log_tree(struct btrfs_root *root)
11959 {
11960         struct btrfs_trans_handle *trans;
11961         int ret;
11962
11963         trans = btrfs_start_transaction(root, 1);
11964         if (IS_ERR(trans)) {
11965                 ret = PTR_ERR(trans);
11966                 return ret;
11967         }
11968         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11969         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11970         ret = btrfs_commit_transaction(trans, root);
11971         return ret;
11972 }
11973
11974 static int populate_csum(struct btrfs_trans_handle *trans,
11975                          struct btrfs_root *csum_root, char *buf, u64 start,
11976                          u64 len)
11977 {
11978         u64 offset = 0;
11979         u64 sectorsize;
11980         int ret = 0;
11981
11982         while (offset < len) {
11983                 sectorsize = csum_root->sectorsize;
11984                 ret = read_extent_data(csum_root, buf, start + offset,
11985                                        &sectorsize, 0);
11986                 if (ret)
11987                         break;
11988                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
11989                                             start + offset, buf, sectorsize);
11990                 if (ret)
11991                         break;
11992                 offset += sectorsize;
11993         }
11994         return ret;
11995 }
11996
11997 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
11998                                       struct btrfs_root *csum_root,
11999                                       struct btrfs_root *cur_root)
12000 {
12001         struct btrfs_path path;
12002         struct btrfs_key key;
12003         struct extent_buffer *node;
12004         struct btrfs_file_extent_item *fi;
12005         char *buf = NULL;
12006         u64 start = 0;
12007         u64 len = 0;
12008         int slot = 0;
12009         int ret = 0;
12010
12011         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12012         if (!buf)
12013                 return -ENOMEM;
12014
12015         btrfs_init_path(&path);
12016         key.objectid = 0;
12017         key.offset = 0;
12018         key.type = 0;
12019         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12020         if (ret < 0)
12021                 goto out;
12022         /* Iterate all regular file extents and fill its csum */
12023         while (1) {
12024                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12025
12026                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12027                         goto next;
12028                 node = path.nodes[0];
12029                 slot = path.slots[0];
12030                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12031                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12032                         goto next;
12033                 start = btrfs_file_extent_disk_bytenr(node, fi);
12034                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12035
12036                 ret = populate_csum(trans, csum_root, buf, start, len);
12037                 if (ret == -EEXIST)
12038                         ret = 0;
12039                 if (ret < 0)
12040                         goto out;
12041 next:
12042                 /*
12043                  * TODO: if next leaf is corrupted, jump to nearest next valid
12044                  * leaf.
12045                  */
12046                 ret = btrfs_next_item(cur_root, &path);
12047                 if (ret < 0)
12048                         goto out;
12049                 if (ret > 0) {
12050                         ret = 0;
12051                         goto out;
12052                 }
12053         }
12054
12055 out:
12056         btrfs_release_path(&path);
12057         free(buf);
12058         return ret;
12059 }
12060
12061 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12062                                   struct btrfs_root *csum_root)
12063 {
12064         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12065         struct btrfs_path path;
12066         struct btrfs_root *tree_root = fs_info->tree_root;
12067         struct btrfs_root *cur_root;
12068         struct extent_buffer *node;
12069         struct btrfs_key key;
12070         int slot = 0;
12071         int ret = 0;
12072
12073         btrfs_init_path(&path);
12074         key.objectid = BTRFS_FS_TREE_OBJECTID;
12075         key.offset = 0;
12076         key.type = BTRFS_ROOT_ITEM_KEY;
12077         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12078         if (ret < 0)
12079                 goto out;
12080         if (ret > 0) {
12081                 ret = -ENOENT;
12082                 goto out;
12083         }
12084
12085         while (1) {
12086                 node = path.nodes[0];
12087                 slot = path.slots[0];
12088                 btrfs_item_key_to_cpu(node, &key, slot);
12089                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12090                         goto out;
12091                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12092                         goto next;
12093                 if (!is_fstree(key.objectid))
12094                         goto next;
12095                 key.offset = (u64)-1;
12096
12097                 cur_root = btrfs_read_fs_root(fs_info, &key);
12098                 if (IS_ERR(cur_root) || !cur_root) {
12099                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12100                                 key.objectid);
12101                         goto out;
12102                 }
12103                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12104                                 cur_root);
12105                 if (ret < 0)
12106                         goto out;
12107 next:
12108                 ret = btrfs_next_item(tree_root, &path);
12109                 if (ret > 0) {
12110                         ret = 0;
12111                         goto out;
12112                 }
12113                 if (ret < 0)
12114                         goto out;
12115         }
12116
12117 out:
12118         btrfs_release_path(&path);
12119         return ret;
12120 }
12121
12122 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12123                                       struct btrfs_root *csum_root)
12124 {
12125         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12126         struct btrfs_path path;
12127         struct btrfs_extent_item *ei;
12128         struct extent_buffer *leaf;
12129         char *buf;
12130         struct btrfs_key key;
12131         int ret;
12132
12133         btrfs_init_path(&path);
12134         key.objectid = 0;
12135         key.type = BTRFS_EXTENT_ITEM_KEY;
12136         key.offset = 0;
12137         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12138         if (ret < 0) {
12139                 btrfs_release_path(&path);
12140                 return ret;
12141         }
12142
12143         buf = malloc(csum_root->sectorsize);
12144         if (!buf) {
12145                 btrfs_release_path(&path);
12146                 return -ENOMEM;
12147         }
12148
12149         while (1) {
12150                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12151                         ret = btrfs_next_leaf(extent_root, &path);
12152                         if (ret < 0)
12153                                 break;
12154                         if (ret) {
12155                                 ret = 0;
12156                                 break;
12157                         }
12158                 }
12159                 leaf = path.nodes[0];
12160
12161                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12162                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12163                         path.slots[0]++;
12164                         continue;
12165                 }
12166
12167                 ei = btrfs_item_ptr(leaf, path.slots[0],
12168                                     struct btrfs_extent_item);
12169                 if (!(btrfs_extent_flags(leaf, ei) &
12170                       BTRFS_EXTENT_FLAG_DATA)) {
12171                         path.slots[0]++;
12172                         continue;
12173                 }
12174
12175                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12176                                     key.offset);
12177                 if (ret)
12178                         break;
12179                 path.slots[0]++;
12180         }
12181
12182         btrfs_release_path(&path);
12183         free(buf);
12184         return ret;
12185 }
12186
12187 /*
12188  * Recalculate the csum and put it into the csum tree.
12189  *
12190  * Extent tree init will wipe out all the extent info, so in that case, we
12191  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12192  * will use fs/subvol trees to init the csum tree.
12193  */
12194 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12195                           struct btrfs_root *csum_root,
12196                           int search_fs_tree)
12197 {
12198         if (search_fs_tree)
12199                 return fill_csum_tree_from_fs(trans, csum_root);
12200         else
12201                 return fill_csum_tree_from_extent(trans, csum_root);
12202 }
12203
12204 static void free_roots_info_cache(void)
12205 {
12206         if (!roots_info_cache)
12207                 return;
12208
12209         while (!cache_tree_empty(roots_info_cache)) {
12210                 struct cache_extent *entry;
12211                 struct root_item_info *rii;
12212
12213                 entry = first_cache_extent(roots_info_cache);
12214                 if (!entry)
12215                         break;
12216                 remove_cache_extent(roots_info_cache, entry);
12217                 rii = container_of(entry, struct root_item_info, cache_extent);
12218                 free(rii);
12219         }
12220
12221         free(roots_info_cache);
12222         roots_info_cache = NULL;
12223 }
12224
12225 static int build_roots_info_cache(struct btrfs_fs_info *info)
12226 {
12227         int ret = 0;
12228         struct btrfs_key key;
12229         struct extent_buffer *leaf;
12230         struct btrfs_path path;
12231
12232         if (!roots_info_cache) {
12233                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12234                 if (!roots_info_cache)
12235                         return -ENOMEM;
12236                 cache_tree_init(roots_info_cache);
12237         }
12238
12239         btrfs_init_path(&path);
12240         key.objectid = 0;
12241         key.type = BTRFS_EXTENT_ITEM_KEY;
12242         key.offset = 0;
12243         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12244         if (ret < 0)
12245                 goto out;
12246         leaf = path.nodes[0];
12247
12248         while (1) {
12249                 struct btrfs_key found_key;
12250                 struct btrfs_extent_item *ei;
12251                 struct btrfs_extent_inline_ref *iref;
12252                 int slot = path.slots[0];
12253                 int type;
12254                 u64 flags;
12255                 u64 root_id;
12256                 u8 level;
12257                 struct cache_extent *entry;
12258                 struct root_item_info *rii;
12259
12260                 if (slot >= btrfs_header_nritems(leaf)) {
12261                         ret = btrfs_next_leaf(info->extent_root, &path);
12262                         if (ret < 0) {
12263                                 break;
12264                         } else if (ret) {
12265                                 ret = 0;
12266                                 break;
12267                         }
12268                         leaf = path.nodes[0];
12269                         slot = path.slots[0];
12270                 }
12271
12272                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12273
12274                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12275                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12276                         goto next;
12277
12278                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12279                 flags = btrfs_extent_flags(leaf, ei);
12280
12281                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12282                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12283                         goto next;
12284
12285                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12286                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12287                         level = found_key.offset;
12288                 } else {
12289                         struct btrfs_tree_block_info *binfo;
12290
12291                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12292                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12293                         level = btrfs_tree_block_level(leaf, binfo);
12294                 }
12295
12296                 /*
12297                  * For a root extent, it must be of the following type and the
12298                  * first (and only one) iref in the item.
12299                  */
12300                 type = btrfs_extent_inline_ref_type(leaf, iref);
12301                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12302                         goto next;
12303
12304                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12305                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12306                 if (!entry) {
12307                         rii = malloc(sizeof(struct root_item_info));
12308                         if (!rii) {
12309                                 ret = -ENOMEM;
12310                                 goto out;
12311                         }
12312                         rii->cache_extent.start = root_id;
12313                         rii->cache_extent.size = 1;
12314                         rii->level = (u8)-1;
12315                         entry = &rii->cache_extent;
12316                         ret = insert_cache_extent(roots_info_cache, entry);
12317                         ASSERT(ret == 0);
12318                 } else {
12319                         rii = container_of(entry, struct root_item_info,
12320                                            cache_extent);
12321                 }
12322
12323                 ASSERT(rii->cache_extent.start == root_id);
12324                 ASSERT(rii->cache_extent.size == 1);
12325
12326                 if (level > rii->level || rii->level == (u8)-1) {
12327                         rii->level = level;
12328                         rii->bytenr = found_key.objectid;
12329                         rii->gen = btrfs_extent_generation(leaf, ei);
12330                         rii->node_count = 1;
12331                 } else if (level == rii->level) {
12332                         rii->node_count++;
12333                 }
12334 next:
12335                 path.slots[0]++;
12336         }
12337
12338 out:
12339         btrfs_release_path(&path);
12340
12341         return ret;
12342 }
12343
12344 static int maybe_repair_root_item(struct btrfs_fs_info *info,
12345                                   struct btrfs_path *path,
12346                                   const struct btrfs_key *root_key,
12347                                   const int read_only_mode)
12348 {
12349         const u64 root_id = root_key->objectid;
12350         struct cache_extent *entry;
12351         struct root_item_info *rii;
12352         struct btrfs_root_item ri;
12353         unsigned long offset;
12354
12355         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12356         if (!entry) {
12357                 fprintf(stderr,
12358                         "Error: could not find extent items for root %llu\n",
12359                         root_key->objectid);
12360                 return -ENOENT;
12361         }
12362
12363         rii = container_of(entry, struct root_item_info, cache_extent);
12364         ASSERT(rii->cache_extent.start == root_id);
12365         ASSERT(rii->cache_extent.size == 1);
12366
12367         if (rii->node_count != 1) {
12368                 fprintf(stderr,
12369                         "Error: could not find btree root extent for root %llu\n",
12370                         root_id);
12371                 return -ENOENT;
12372         }
12373
12374         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12375         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12376
12377         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12378             btrfs_root_level(&ri) != rii->level ||
12379             btrfs_root_generation(&ri) != rii->gen) {
12380
12381                 /*
12382                  * If we're in repair mode but our caller told us to not update
12383                  * the root item, i.e. just check if it needs to be updated, don't
12384                  * print this message, since the caller will call us again shortly
12385                  * for the same root item without read only mode (the caller will
12386                  * open a transaction first).
12387                  */
12388                 if (!(read_only_mode && repair))
12389                         fprintf(stderr,
12390                                 "%sroot item for root %llu,"
12391                                 " current bytenr %llu, current gen %llu, current level %u,"
12392                                 " new bytenr %llu, new gen %llu, new level %u\n",
12393                                 (read_only_mode ? "" : "fixing "),
12394                                 root_id,
12395                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12396                                 btrfs_root_level(&ri),
12397                                 rii->bytenr, rii->gen, rii->level);
12398
12399                 if (btrfs_root_generation(&ri) > rii->gen) {
12400                         fprintf(stderr,
12401                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12402                                 root_id, btrfs_root_generation(&ri), rii->gen);
12403                         return -EINVAL;
12404                 }
12405
12406                 if (!read_only_mode) {
12407                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12408                         btrfs_set_root_level(&ri, rii->level);
12409                         btrfs_set_root_generation(&ri, rii->gen);
12410                         write_extent_buffer(path->nodes[0], &ri,
12411                                             offset, sizeof(ri));
12412                 }
12413
12414                 return 1;
12415         }
12416
12417         return 0;
12418 }
12419
12420 /*
12421  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12422  * caused read-only snapshots to be corrupted if they were created at a moment
12423  * when the source subvolume/snapshot had orphan items. The issue was that the
12424  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12425  * node instead of the post orphan cleanup root node.
12426  * So this function, and its callees, just detects and fixes those cases. Even
12427  * though the regression was for read-only snapshots, this function applies to
12428  * any snapshot/subvolume root.
12429  * This must be run before any other repair code - not doing it so, makes other
12430  * repair code delete or modify backrefs in the extent tree for example, which
12431  * will result in an inconsistent fs after repairing the root items.
12432  */
12433 static int repair_root_items(struct btrfs_fs_info *info)
12434 {
12435         struct btrfs_path path;
12436         struct btrfs_key key;
12437         struct extent_buffer *leaf;
12438         struct btrfs_trans_handle *trans = NULL;
12439         int ret = 0;
12440         int bad_roots = 0;
12441         int need_trans = 0;
12442
12443         btrfs_init_path(&path);
12444
12445         ret = build_roots_info_cache(info);
12446         if (ret)
12447                 goto out;
12448
12449         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12450         key.type = BTRFS_ROOT_ITEM_KEY;
12451         key.offset = 0;
12452
12453 again:
12454         /*
12455          * Avoid opening and committing transactions if a leaf doesn't have
12456          * any root items that need to be fixed, so that we avoid rotating
12457          * backup roots unnecessarily.
12458          */
12459         if (need_trans) {
12460                 trans = btrfs_start_transaction(info->tree_root, 1);
12461                 if (IS_ERR(trans)) {
12462                         ret = PTR_ERR(trans);
12463                         goto out;
12464                 }
12465         }
12466
12467         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12468                                 0, trans ? 1 : 0);
12469         if (ret < 0)
12470                 goto out;
12471         leaf = path.nodes[0];
12472
12473         while (1) {
12474                 struct btrfs_key found_key;
12475
12476                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12477                         int no_more_keys = find_next_key(&path, &key);
12478
12479                         btrfs_release_path(&path);
12480                         if (trans) {
12481                                 ret = btrfs_commit_transaction(trans,
12482                                                                info->tree_root);
12483                                 trans = NULL;
12484                                 if (ret < 0)
12485                                         goto out;
12486                         }
12487                         need_trans = 0;
12488                         if (no_more_keys)
12489                                 break;
12490                         goto again;
12491                 }
12492
12493                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12494
12495                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12496                         goto next;
12497                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12498                         goto next;
12499
12500                 ret = maybe_repair_root_item(info, &path, &found_key,
12501                                              trans ? 0 : 1);
12502                 if (ret < 0)
12503                         goto out;
12504                 if (ret) {
12505                         if (!trans && repair) {
12506                                 need_trans = 1;
12507                                 key = found_key;
12508                                 btrfs_release_path(&path);
12509                                 goto again;
12510                         }
12511                         bad_roots++;
12512                 }
12513 next:
12514                 path.slots[0]++;
12515         }
12516         ret = 0;
12517 out:
12518         free_roots_info_cache();
12519         btrfs_release_path(&path);
12520         if (trans)
12521                 btrfs_commit_transaction(trans, info->tree_root);
12522         if (ret < 0)
12523                 return ret;
12524
12525         return bad_roots;
12526 }
12527
12528 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12529 {
12530         struct btrfs_trans_handle *trans;
12531         struct btrfs_block_group_cache *bg_cache;
12532         u64 current = 0;
12533         int ret = 0;
12534
12535         /* Clear all free space cache inodes and its extent data */
12536         while (1) {
12537                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12538                 if (!bg_cache)
12539                         break;
12540                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12541                 if (ret < 0)
12542                         return ret;
12543                 current = bg_cache->key.objectid + bg_cache->key.offset;
12544         }
12545
12546         /* Don't forget to set cache_generation to -1 */
12547         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12548         if (IS_ERR(trans)) {
12549                 error("failed to update super block cache generation");
12550                 return PTR_ERR(trans);
12551         }
12552         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12553         btrfs_commit_transaction(trans, fs_info->tree_root);
12554
12555         return ret;
12556 }
12557
12558 const char * const cmd_check_usage[] = {
12559         "btrfs check [options] <device>",
12560         "Check structural integrity of a filesystem (unmounted).",
12561         "Check structural integrity of an unmounted filesystem. Verify internal",
12562         "trees' consistency and item connectivity. In the repair mode try to",
12563         "fix the problems found. ",
12564         "WARNING: the repair mode is considered dangerous",
12565         "",
12566         "-s|--super <superblock>     use this superblock copy",
12567         "-b|--backup                 use the first valid backup root copy",
12568         "--repair                    try to repair the filesystem",
12569         "--readonly                  run in read-only mode (default)",
12570         "--init-csum-tree            create a new CRC tree",
12571         "--init-extent-tree          create a new extent tree",
12572         "--mode <MODE>               allows choice of memory/IO trade-offs",
12573         "                            where MODE is one of:",
12574         "                            original - read inodes and extents to memory (requires",
12575         "                                       more memory, does less IO)",
12576         "                            lowmem   - try to use less memory but read blocks again",
12577         "                                       when needed",
12578         "--check-data-csum           verify checksums of data blocks",
12579         "-Q|--qgroup-report          print a report on qgroup consistency",
12580         "-E|--subvol-extents <subvolid>",
12581         "                            print subvolume extents and sharing state",
12582         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12583         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12584         "-p|--progress               indicate progress",
12585         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12586         NULL
12587 };
12588
12589 int cmd_check(int argc, char **argv)
12590 {
12591         struct cache_tree root_cache;
12592         struct btrfs_root *root;
12593         struct btrfs_fs_info *info;
12594         u64 bytenr = 0;
12595         u64 subvolid = 0;
12596         u64 tree_root_bytenr = 0;
12597         u64 chunk_root_bytenr = 0;
12598         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12599         int ret;
12600         int err = 0;
12601         u64 num;
12602         int init_csum_tree = 0;
12603         int readonly = 0;
12604         int clear_space_cache = 0;
12605         int qgroup_report = 0;
12606         int qgroups_repaired = 0;
12607         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12608
12609         while(1) {
12610                 int c;
12611                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12612                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12613                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12614                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12615                 static const struct option long_options[] = {
12616                         { "super", required_argument, NULL, 's' },
12617                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12618                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12619                         { "init-csum-tree", no_argument, NULL,
12620                                 GETOPT_VAL_INIT_CSUM },
12621                         { "init-extent-tree", no_argument, NULL,
12622                                 GETOPT_VAL_INIT_EXTENT },
12623                         { "check-data-csum", no_argument, NULL,
12624                                 GETOPT_VAL_CHECK_CSUM },
12625                         { "backup", no_argument, NULL, 'b' },
12626                         { "subvol-extents", required_argument, NULL, 'E' },
12627                         { "qgroup-report", no_argument, NULL, 'Q' },
12628                         { "tree-root", required_argument, NULL, 'r' },
12629                         { "chunk-root", required_argument, NULL,
12630                                 GETOPT_VAL_CHUNK_TREE },
12631                         { "progress", no_argument, NULL, 'p' },
12632                         { "mode", required_argument, NULL,
12633                                 GETOPT_VAL_MODE },
12634                         { "clear-space-cache", required_argument, NULL,
12635                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12636                         { NULL, 0, NULL, 0}
12637                 };
12638
12639                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12640                 if (c < 0)
12641                         break;
12642                 switch(c) {
12643                         case 'a': /* ignored */ break;
12644                         case 'b':
12645                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12646                                 break;
12647                         case 's':
12648                                 num = arg_strtou64(optarg);
12649                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12650                                         error(
12651                                         "super mirror should be less than %d",
12652                                                 BTRFS_SUPER_MIRROR_MAX);
12653                                         exit(1);
12654                                 }
12655                                 bytenr = btrfs_sb_offset(((int)num));
12656                                 printf("using SB copy %llu, bytenr %llu\n", num,
12657                                        (unsigned long long)bytenr);
12658                                 break;
12659                         case 'Q':
12660                                 qgroup_report = 1;
12661                                 break;
12662                         case 'E':
12663                                 subvolid = arg_strtou64(optarg);
12664                                 break;
12665                         case 'r':
12666                                 tree_root_bytenr = arg_strtou64(optarg);
12667                                 break;
12668                         case GETOPT_VAL_CHUNK_TREE:
12669                                 chunk_root_bytenr = arg_strtou64(optarg);
12670                                 break;
12671                         case 'p':
12672                                 ctx.progress_enabled = true;
12673                                 break;
12674                         case '?':
12675                         case 'h':
12676                                 usage(cmd_check_usage);
12677                         case GETOPT_VAL_REPAIR:
12678                                 printf("enabling repair mode\n");
12679                                 repair = 1;
12680                                 ctree_flags |= OPEN_CTREE_WRITES;
12681                                 break;
12682                         case GETOPT_VAL_READONLY:
12683                                 readonly = 1;
12684                                 break;
12685                         case GETOPT_VAL_INIT_CSUM:
12686                                 printf("Creating a new CRC tree\n");
12687                                 init_csum_tree = 1;
12688                                 repair = 1;
12689                                 ctree_flags |= OPEN_CTREE_WRITES;
12690                                 break;
12691                         case GETOPT_VAL_INIT_EXTENT:
12692                                 init_extent_tree = 1;
12693                                 ctree_flags |= (OPEN_CTREE_WRITES |
12694                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12695                                 repair = 1;
12696                                 break;
12697                         case GETOPT_VAL_CHECK_CSUM:
12698                                 check_data_csum = 1;
12699                                 break;
12700                         case GETOPT_VAL_MODE:
12701                                 check_mode = parse_check_mode(optarg);
12702                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12703                                         error("unknown mode: %s", optarg);
12704                                         exit(1);
12705                                 }
12706                                 break;
12707                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12708                                 if (strcmp(optarg, "v1") == 0) {
12709                                         clear_space_cache = 1;
12710                                 } else if (strcmp(optarg, "v2") == 0) {
12711                                         clear_space_cache = 2;
12712                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12713                                 } else {
12714                                         error(
12715                 "invalid argument to --clear-space-cache, must be v1 or v2");
12716                                         exit(1);
12717                                 }
12718                                 ctree_flags |= OPEN_CTREE_WRITES;
12719                                 break;
12720                 }
12721         }
12722
12723         if (check_argc_exact(argc - optind, 1))
12724                 usage(cmd_check_usage);
12725
12726         if (ctx.progress_enabled) {
12727                 ctx.tp = TASK_NOTHING;
12728                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12729         }
12730
12731         /* This check is the only reason for --readonly to exist */
12732         if (readonly && repair) {
12733                 error("repair options are not compatible with --readonly");
12734                 exit(1);
12735         }
12736
12737         /*
12738          * Not supported yet
12739          */
12740         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12741                 error("low memory mode doesn't support repair yet");
12742                 exit(1);
12743         }
12744
12745         radix_tree_init();
12746         cache_tree_init(&root_cache);
12747
12748         if((ret = check_mounted(argv[optind])) < 0) {
12749                 error("could not check mount status: %s", strerror(-ret));
12750                 err |= !!ret;
12751                 goto err_out;
12752         } else if(ret) {
12753                 error("%s is currently mounted, aborting", argv[optind]);
12754                 ret = -EBUSY;
12755                 err |= !!ret;
12756                 goto err_out;
12757         }
12758
12759         /* only allow partial opening under repair mode */
12760         if (repair)
12761                 ctree_flags |= OPEN_CTREE_PARTIAL;
12762
12763         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12764                                   chunk_root_bytenr, ctree_flags);
12765         if (!info) {
12766                 error("cannot open file system");
12767                 ret = -EIO;
12768                 err |= !!ret;
12769                 goto err_out;
12770         }
12771
12772         global_info = info;
12773         root = info->fs_root;
12774         if (clear_space_cache == 1) {
12775                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12776                         error(
12777                 "free space cache v2 detected, use --clear-space-cache v2");
12778                         ret = 1;
12779                         goto close_out;
12780                 }
12781                 printf("Clearing free space cache\n");
12782                 ret = clear_free_space_cache(info);
12783                 if (ret) {
12784                         error("failed to clear free space cache");
12785                         ret = 1;
12786                 } else {
12787                         printf("Free space cache cleared\n");
12788                 }
12789                 goto close_out;
12790         } else if (clear_space_cache == 2) {
12791                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12792                         printf("no free space cache v2 to clear\n");
12793                         ret = 0;
12794                         goto close_out;
12795                 }
12796                 printf("Clear free space cache v2\n");
12797                 ret = btrfs_clear_free_space_tree(info);
12798                 if (ret) {
12799                         error("failed to clear free space cache v2: %d", ret);
12800                         ret = 1;
12801                 } else {
12802                         printf("free space cache v2 cleared\n");
12803                 }
12804                 goto close_out;
12805         }
12806
12807         /*
12808          * repair mode will force us to commit transaction which
12809          * will make us fail to load log tree when mounting.
12810          */
12811         if (repair && btrfs_super_log_root(info->super_copy)) {
12812                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12813                 if (!ret) {
12814                         ret = 1;
12815                         err |= !!ret;
12816                         goto close_out;
12817                 }
12818                 ret = zero_log_tree(root);
12819                 err |= !!ret;
12820                 if (ret) {
12821                         error("failed to zero log tree: %d", ret);
12822                         goto close_out;
12823                 }
12824         }
12825
12826         uuid_unparse(info->super_copy->fsid, uuidbuf);
12827         if (qgroup_report) {
12828                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12829                        uuidbuf);
12830                 ret = qgroup_verify_all(info);
12831                 err |= !!ret;
12832                 if (ret == 0)
12833                         report_qgroups(1);
12834                 goto close_out;
12835         }
12836         if (subvolid) {
12837                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12838                        subvolid, argv[optind], uuidbuf);
12839                 ret = print_extent_state(info, subvolid);
12840                 err |= !!ret;
12841                 goto close_out;
12842         }
12843         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12844
12845         if (!extent_buffer_uptodate(info->tree_root->node) ||
12846             !extent_buffer_uptodate(info->dev_root->node) ||
12847             !extent_buffer_uptodate(info->chunk_root->node)) {
12848                 error("critical roots corrupted, unable to check the filesystem");
12849                 err |= !!ret;
12850                 ret = -EIO;
12851                 goto close_out;
12852         }
12853
12854         if (init_extent_tree || init_csum_tree) {
12855                 struct btrfs_trans_handle *trans;
12856
12857                 trans = btrfs_start_transaction(info->extent_root, 0);
12858                 if (IS_ERR(trans)) {
12859                         error("error starting transaction");
12860                         ret = PTR_ERR(trans);
12861                         err |= !!ret;
12862                         goto close_out;
12863                 }
12864
12865                 if (init_extent_tree) {
12866                         printf("Creating a new extent tree\n");
12867                         ret = reinit_extent_tree(trans, info);
12868                         err |= !!ret;
12869                         if (ret)
12870                                 goto close_out;
12871                 }
12872
12873                 if (init_csum_tree) {
12874                         printf("Reinitialize checksum tree\n");
12875                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12876                         if (ret) {
12877                                 error("checksum tree initialization failed: %d",
12878                                                 ret);
12879                                 ret = -EIO;
12880                                 err |= !!ret;
12881                                 goto close_out;
12882                         }
12883
12884                         ret = fill_csum_tree(trans, info->csum_root,
12885                                              init_extent_tree);
12886                         err |= !!ret;
12887                         if (ret) {
12888                                 error("checksum tree refilling failed: %d", ret);
12889                                 return -EIO;
12890                         }
12891                 }
12892                 /*
12893                  * Ok now we commit and run the normal fsck, which will add
12894                  * extent entries for all of the items it finds.
12895                  */
12896                 ret = btrfs_commit_transaction(trans, info->extent_root);
12897                 err |= !!ret;
12898                 if (ret)
12899                         goto close_out;
12900         }
12901         if (!extent_buffer_uptodate(info->extent_root->node)) {
12902                 error("critical: extent_root, unable to check the filesystem");
12903                 ret = -EIO;
12904                 err |= !!ret;
12905                 goto close_out;
12906         }
12907         if (!extent_buffer_uptodate(info->csum_root->node)) {
12908                 error("critical: csum_root, unable to check the filesystem");
12909                 ret = -EIO;
12910                 err |= !!ret;
12911                 goto close_out;
12912         }
12913
12914         if (!ctx.progress_enabled)
12915                 fprintf(stderr, "checking extents\n");
12916         if (check_mode == CHECK_MODE_LOWMEM)
12917                 ret = check_chunks_and_extents_v2(root);
12918         else
12919                 ret = check_chunks_and_extents(root);
12920         err |= !!ret;
12921         if (ret)
12922                 error(
12923                 "errors found in extent allocation tree or chunk allocation");
12924
12925         ret = repair_root_items(info);
12926         err |= !!ret;
12927         if (ret < 0)
12928                 goto close_out;
12929         if (repair) {
12930                 fprintf(stderr, "Fixed %d roots.\n", ret);
12931                 ret = 0;
12932         } else if (ret > 0) {
12933                 fprintf(stderr,
12934                        "Found %d roots with an outdated root item.\n",
12935                        ret);
12936                 fprintf(stderr,
12937                         "Please run a filesystem check with the option --repair to fix them.\n");
12938                 ret = 1;
12939                 err |= !!ret;
12940                 goto close_out;
12941         }
12942
12943         if (!ctx.progress_enabled) {
12944                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
12945                         fprintf(stderr, "checking free space tree\n");
12946                 else
12947                         fprintf(stderr, "checking free space cache\n");
12948         }
12949         ret = check_space_cache(root);
12950         err |= !!ret;
12951         if (ret)
12952                 goto out;
12953
12954         /*
12955          * We used to have to have these hole extents in between our real
12956          * extents so if we don't have this flag set we need to make sure there
12957          * are no gaps in the file extents for inodes, otherwise we can just
12958          * ignore it when this happens.
12959          */
12960         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
12961         if (!ctx.progress_enabled)
12962                 fprintf(stderr, "checking fs roots\n");
12963         if (check_mode == CHECK_MODE_LOWMEM)
12964                 ret = check_fs_roots_v2(root->fs_info);
12965         else
12966                 ret = check_fs_roots(root, &root_cache);
12967         err |= !!ret;
12968         if (ret)
12969                 goto out;
12970
12971         fprintf(stderr, "checking csums\n");
12972         ret = check_csums(root);
12973         err |= !!ret;
12974         if (ret)
12975                 goto out;
12976
12977         fprintf(stderr, "checking root refs\n");
12978         /* For low memory mode, check_fs_roots_v2 handles root refs */
12979         if (check_mode != CHECK_MODE_LOWMEM) {
12980                 ret = check_root_refs(root, &root_cache);
12981                 err |= !!ret;
12982                 if (ret)
12983                         goto out;
12984         }
12985
12986         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
12987                 struct extent_buffer *eb;
12988
12989                 eb = list_first_entry(&root->fs_info->recow_ebs,
12990                                       struct extent_buffer, recow);
12991                 list_del_init(&eb->recow);
12992                 ret = recow_extent_buffer(root, eb);
12993                 err |= !!ret;
12994                 if (ret)
12995                         break;
12996         }
12997
12998         while (!list_empty(&delete_items)) {
12999                 struct bad_item *bad;
13000
13001                 bad = list_first_entry(&delete_items, struct bad_item, list);
13002                 list_del_init(&bad->list);
13003                 if (repair) {
13004                         ret = delete_bad_item(root, bad);
13005                         err |= !!ret;
13006                 }
13007                 free(bad);
13008         }
13009
13010         if (info->quota_enabled) {
13011                 fprintf(stderr, "checking quota groups\n");
13012                 ret = qgroup_verify_all(info);
13013                 err |= !!ret;
13014                 if (ret)
13015                         goto out;
13016                 report_qgroups(0);
13017                 ret = repair_qgroups(info, &qgroups_repaired);
13018                 err |= !!ret;
13019                 if (err)
13020                         goto out;
13021                 ret = 0;
13022         }
13023
13024         if (!list_empty(&root->fs_info->recow_ebs)) {
13025                 error("transid errors in file system");
13026                 ret = 1;
13027                 err |= !!ret;
13028         }
13029 out:
13030         if (found_old_backref) { /*
13031                  * there was a disk format change when mixed
13032                  * backref was in testing tree. The old format
13033                  * existed about one week.
13034                  */
13035                 printf("\n * Found old mixed backref format. "
13036                        "The old format is not supported! *"
13037                        "\n * Please mount the FS in readonly mode, "
13038                        "backup data and re-format the FS. *\n\n");
13039                 err |= 1;
13040         }
13041         printf("found %llu bytes used err is %d\n",
13042                (unsigned long long)bytes_used, ret);
13043         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13044         printf("total tree bytes: %llu\n",
13045                (unsigned long long)total_btree_bytes);
13046         printf("total fs tree bytes: %llu\n",
13047                (unsigned long long)total_fs_tree_bytes);
13048         printf("total extent tree bytes: %llu\n",
13049                (unsigned long long)total_extent_tree_bytes);
13050         printf("btree space waste bytes: %llu\n",
13051                (unsigned long long)btree_space_waste);
13052         printf("file data blocks allocated: %llu\n referenced %llu\n",
13053                 (unsigned long long)data_bytes_allocated,
13054                 (unsigned long long)data_bytes_referenced);
13055
13056         free_qgroup_counts();
13057         free_root_recs_tree(&root_cache);
13058 close_out:
13059         close_ctree(root);
13060 err_out:
13061         if (ctx.progress_enabled)
13062                 task_deinit(ctx.info);
13063
13064         return err;
13065 }